November 2022 - Linux-stable-mirror

FAILED: patch "[PATCH] KVM: x86/pmu: Do not speculatively query Intel GP PMCs that" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. Possible dependencies: 8631ef59b622 ("KVM: x86/pmu: Do not speculatively query Intel GP PMCs that don't exist yet") 902caeb6841a ("KVM: x86/pmu: Add PEBS_DATA_CFG MSR emulation to support adaptive PEBS") 8183a538cd95 ("KVM: x86/pmu: Add IA32_DS_AREA MSR emulation to support guest DS") 6ebe44366bde ("KVM: x86/pmu: Adjust precise_ip to emulate Ice Lake guest PDIR counter") 79f3e3b58386 ("KVM: x86/pmu: Reprogram PEBS event to emulate guest PEBS counter") c59a1f106f5c ("KVM: x86/pmu: Add IA32_PEBS_ENABLE MSR emulation for extended PEBS") 0d23dc34a7ce ("x86/perf/core: Add pebs_capable to store valid PEBS_COUNTER_MASK value") 2c985527dd8d ("KVM: x86/pmu: Introduce the ctrl_mask value for fixed counter") 39a4d779546a ("perf/x86/core: Pass "struct kvm_pmu *" to determine the guest values") 38904911e864 ("Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 8631ef59b62290c7d88e7209e35dfb47f33f4902 Mon Sep 17 00:00:00 2001 From: Like Xu <likexu(a)tencent.com> Date: Mon, 19 Sep 2022 17:10:06 +0800 Subject: [PATCH] KVM: x86/pmu: Do not speculatively query Intel GP PMCs that don't exist yet The SDM lists an architectural MSR IA32_CORE_CAPABILITIES (0xCF) that limits the theoretical maximum value of the Intel GP PMC MSRs allocated at 0xC1 to 14; likewise the Intel April 2022 SDM adds IA32_OVERCLOCKING_STATUS at 0x195 which limits the number of event selection MSRs to 15 (0x186-0x194). Limiting the maximum number of counters to 14 or 18 based on the currently allocated MSRs is clearly fragile, and it seems likely that Intel will even place PMCs 8-15 at a completely different range of MSR indices. So stop at the maximum number of GP PMCs supported today on Intel processors. There are some machines, like Intel P4 with non Architectural PMU, that may indeed have 18 counters, but those counters are in a completely different MSR address range and are not supported by KVM. Cc: Vitaly Kuznetsov <vkuznets(a)redhat.com> Cc: stable(a)vger.kernel.org Fixes: cf05a67b68b8 ("KVM: x86: omit "impossible" pmu MSRs from MSR list") Suggested-by: Jim Mattson <jmattson(a)google.com> Signed-off-by: Like Xu <likexu(a)tencent.com> Reviewed-by: Jim Mattson <jmattson(a)google.com> Message-Id: <20220919091008.60695-1-likexu(a)tencent.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5f5eb577d583..73716fab120f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1442,20 +1442,10 @@ static const u32 msrs_to_save_all[] = { MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3, MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5, MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7, - MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9, - MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11, - MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13, - MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15, - MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17, MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1, MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3, MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5, MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7, - MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9, - MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11, - MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13, - MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15, - MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17, MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG, MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3, @@ -7041,12 +7031,12 @@ static void kvm_init_msr_list(void) intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2) continue; break; - case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17: + case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 7: if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >= min(INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp)) continue; break; - case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17: + case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 7: if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >= min(INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp)) continue;

2 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: x86/pmu: Do not speculatively query Intel GP PMCs that" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. Possible dependencies: 8631ef59b622 ("KVM: x86/pmu: Do not speculatively query Intel GP PMCs that don't exist yet") 902caeb6841a ("KVM: x86/pmu: Add PEBS_DATA_CFG MSR emulation to support adaptive PEBS") 8183a538cd95 ("KVM: x86/pmu: Add IA32_DS_AREA MSR emulation to support guest DS") 6ebe44366bde ("KVM: x86/pmu: Adjust precise_ip to emulate Ice Lake guest PDIR counter") 79f3e3b58386 ("KVM: x86/pmu: Reprogram PEBS event to emulate guest PEBS counter") c59a1f106f5c ("KVM: x86/pmu: Add IA32_PEBS_ENABLE MSR emulation for extended PEBS") 0d23dc34a7ce ("x86/perf/core: Add pebs_capable to store valid PEBS_COUNTER_MASK value") 2c985527dd8d ("KVM: x86/pmu: Introduce the ctrl_mask value for fixed counter") 39a4d779546a ("perf/x86/core: Pass "struct kvm_pmu *" to determine the guest values") 38904911e864 ("Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 8631ef59b62290c7d88e7209e35dfb47f33f4902 Mon Sep 17 00:00:00 2001 From: Like Xu <likexu(a)tencent.com> Date: Mon, 19 Sep 2022 17:10:06 +0800 Subject: [PATCH] KVM: x86/pmu: Do not speculatively query Intel GP PMCs that don't exist yet The SDM lists an architectural MSR IA32_CORE_CAPABILITIES (0xCF) that limits the theoretical maximum value of the Intel GP PMC MSRs allocated at 0xC1 to 14; likewise the Intel April 2022 SDM adds IA32_OVERCLOCKING_STATUS at 0x195 which limits the number of event selection MSRs to 15 (0x186-0x194). Limiting the maximum number of counters to 14 or 18 based on the currently allocated MSRs is clearly fragile, and it seems likely that Intel will even place PMCs 8-15 at a completely different range of MSR indices. So stop at the maximum number of GP PMCs supported today on Intel processors. There are some machines, like Intel P4 with non Architectural PMU, that may indeed have 18 counters, but those counters are in a completely different MSR address range and are not supported by KVM. Cc: Vitaly Kuznetsov <vkuznets(a)redhat.com> Cc: stable(a)vger.kernel.org Fixes: cf05a67b68b8 ("KVM: x86: omit "impossible" pmu MSRs from MSR list") Suggested-by: Jim Mattson <jmattson(a)google.com> Signed-off-by: Like Xu <likexu(a)tencent.com> Reviewed-by: Jim Mattson <jmattson(a)google.com> Message-Id: <20220919091008.60695-1-likexu(a)tencent.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5f5eb577d583..73716fab120f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1442,20 +1442,10 @@ static const u32 msrs_to_save_all[] = { MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3, MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5, MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7, - MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9, - MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11, - MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13, - MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15, - MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17, MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1, MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3, MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5, MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7, - MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9, - MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11, - MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13, - MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15, - MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17, MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG, MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3, @@ -7041,12 +7031,12 @@ static void kvm_init_msr_list(void) intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2) continue; break; - case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17: + case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 7: if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >= min(INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp)) continue; break; - case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17: + case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 7: if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >= min(INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp)) continue;

2 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: x86/pmu: Do not speculatively query Intel GP PMCs that" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. Possible dependencies: 8631ef59b622 ("KVM: x86/pmu: Do not speculatively query Intel GP PMCs that don't exist yet") 902caeb6841a ("KVM: x86/pmu: Add PEBS_DATA_CFG MSR emulation to support adaptive PEBS") 8183a538cd95 ("KVM: x86/pmu: Add IA32_DS_AREA MSR emulation to support guest DS") 6ebe44366bde ("KVM: x86/pmu: Adjust precise_ip to emulate Ice Lake guest PDIR counter") 79f3e3b58386 ("KVM: x86/pmu: Reprogram PEBS event to emulate guest PEBS counter") c59a1f106f5c ("KVM: x86/pmu: Add IA32_PEBS_ENABLE MSR emulation for extended PEBS") 0d23dc34a7ce ("x86/perf/core: Add pebs_capable to store valid PEBS_COUNTER_MASK value") 2c985527dd8d ("KVM: x86/pmu: Introduce the ctrl_mask value for fixed counter") 39a4d779546a ("perf/x86/core: Pass "struct kvm_pmu *" to determine the guest values") 38904911e864 ("Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 8631ef59b62290c7d88e7209e35dfb47f33f4902 Mon Sep 17 00:00:00 2001 From: Like Xu <likexu(a)tencent.com> Date: Mon, 19 Sep 2022 17:10:06 +0800 Subject: [PATCH] KVM: x86/pmu: Do not speculatively query Intel GP PMCs that don't exist yet The SDM lists an architectural MSR IA32_CORE_CAPABILITIES (0xCF) that limits the theoretical maximum value of the Intel GP PMC MSRs allocated at 0xC1 to 14; likewise the Intel April 2022 SDM adds IA32_OVERCLOCKING_STATUS at 0x195 which limits the number of event selection MSRs to 15 (0x186-0x194). Limiting the maximum number of counters to 14 or 18 based on the currently allocated MSRs is clearly fragile, and it seems likely that Intel will even place PMCs 8-15 at a completely different range of MSR indices. So stop at the maximum number of GP PMCs supported today on Intel processors. There are some machines, like Intel P4 with non Architectural PMU, that may indeed have 18 counters, but those counters are in a completely different MSR address range and are not supported by KVM. Cc: Vitaly Kuznetsov <vkuznets(a)redhat.com> Cc: stable(a)vger.kernel.org Fixes: cf05a67b68b8 ("KVM: x86: omit "impossible" pmu MSRs from MSR list") Suggested-by: Jim Mattson <jmattson(a)google.com> Signed-off-by: Like Xu <likexu(a)tencent.com> Reviewed-by: Jim Mattson <jmattson(a)google.com> Message-Id: <20220919091008.60695-1-likexu(a)tencent.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5f5eb577d583..73716fab120f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1442,20 +1442,10 @@ static const u32 msrs_to_save_all[] = { MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3, MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5, MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7, - MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9, - MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11, - MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13, - MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15, - MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17, MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1, MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3, MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5, MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7, - MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9, - MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11, - MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13, - MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15, - MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17, MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG, MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3, @@ -7041,12 +7031,12 @@ static void kvm_init_msr_list(void) intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2) continue; break; - case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17: + case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 7: if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >= min(INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp)) continue; break; - case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17: + case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 7: if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >= min(INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp)) continue;

2 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] KVM: x86/mmu: Block all page faults during" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. Possible dependencies: 6d3085e4d89a ("KVM: x86/mmu: Block all page faults during kvm_zap_gfn_range()") 20ec3ebd707c ("KVM: Rename mmu_notifier_* to mmu_invalidate_*") 65e3b446bcce ("KVM: x86/mmu: Document the "rules" for using host_pfn_mapping_level()") a8ac499bb6ab ("KVM: x86/mmu: Don't require refcounted "struct page" to create huge SPTEs") 9202aee816c8 ("KVM: x86/mmu: Rename pte_list_{destroy,remove}() to show they zap SPTEs") a42989e7fbb0 ("KVM: x86/mmu: Directly "destroy" PTE list when recycling rmaps") 2ff9039a75a8 ("KVM: x86/mmu: Decouple rmap_add() and link_shadow_page() from kvm_vcpu") 6ec6509eea39 ("KVM: x86/mmu: Pass const memslot to rmap_add()") 5d49f08c2e08 ("KVM: x86/mmu: Shove refcounted page dependency into host_pfn_mapping_level()") b14b2690c50e ("KVM: Rename/refactor kvm_is_reserved_pfn() to kvm_pfn_to_refcounted_page()") 284dc4930773 ("KVM: Take a 'struct page', not a pfn in kvm_is_zone_device_page()") b1624f99aa8f ("KVM: Remove kvm_vcpu_gfn_to_page() and kvm_vcpu_gpa_to_page()") 6573a6910ce4 ("KVM: Don't WARN if kvm_pfn_to_page() encounters a "reserved" pfn") 8e1c69149f27 ("KVM: Avoid pfn_to_page() and vice versa when releasing pages") a1040b0d42ac ("KVM: Don't set Accessed/Dirty bits for ZERO_PAGE") b31455e96f00 ("Merge branch 'kvm-5.20-early-patches' into HEAD") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 6d3085e4d89ad7e6c7f1c6cf929d903393565861 Mon Sep 17 00:00:00 2001 From: Sean Christopherson <seanjc(a)google.com> Date: Fri, 11 Nov 2022 00:18:41 +0000 Subject: [PATCH] KVM: x86/mmu: Block all page faults during kvm_zap_gfn_range() When zapping a GFN range, pass 0 => ALL_ONES for the to-be-invalidated range to effectively block all page faults while the zap is in-progress. The invalidation helpers take a host virtual address, whereas zapping a GFN obviously provides a guest physical address and with the wrong unit of measurement (frame vs. byte). Alternatively, KVM could walk all memslots to get the associated HVAs, but thanks to SMM, that would require multiple lookups. And practically speaking, kvm_zap_gfn_range() usage is quite rare and not a hot path, e.g. MTRR and CR0.CD are almost guaranteed to be done only on vCPU0 during boot, and APICv inhibits are similarly infrequent operations. Fixes: edb298c663fc ("KVM: x86/mmu: bump mmu notifier count in kvm_zap_gfn_range") Reported-by: Chao Peng <chao.p.peng(a)linux.intel.com> Cc: stable(a)vger.kernel.org Cc: Maxim Levitsky <mlevitsk(a)redhat.com> Signed-off-by: Sean Christopherson <seanjc(a)google.com> Message-Id: <20221111001841.2412598-1-seanjc(a)google.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6f81539061d6..1ccb769f62af 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6056,7 +6056,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) write_lock(&kvm->mmu_lock); - kvm_mmu_invalidate_begin(kvm, gfn_start, gfn_end); + kvm_mmu_invalidate_begin(kvm, 0, -1ul); flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end); @@ -6070,7 +6070,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end - gfn_start); - kvm_mmu_invalidate_end(kvm, gfn_start, gfn_end); + kvm_mmu_invalidate_end(kvm, 0, -1ul); write_unlock(&kvm->mmu_lock); }

2 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] dmaengine: idxd: Do not enable user type Work Queue without" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. Possible dependencies: 0ec8ce073944 ("dmaengine: idxd: Do not enable user type Work Queue without Shared Virtual Addressing") 403a2e236538 ("dmaengine: idxd: change MSIX allocation based on per wq activation") 23a50c803565 ("dmaengine: idxd: fix descriptor flushing locking") ec0d64231615 ("dmaengine: idxd: embed irq_entry in idxd_wq struct") 56fc39f5a367 ("dmaengine: idxd: handle interrupt handle revoked event") f6d442f7088c ("dmaengine: idxd: handle invalid interrupt handle descriptors") bd5970a0d01f ("dmaengine: idxd: create locked version of idxd_quiesce() call") 46c6df1c958e ("dmaengine: idxd: add helper for per interrupt handle drain") eb0cf33a91b4 ("dmaengine: idxd: move interrupt handle assignment") 8b67426e0558 ("dmaengine: idxd: int handle management refactoring") 5d78abb6fbc9 ("dmaengine: idxd: rework descriptor free path on failure") a3e340c1574b ("dmaengine: idxd: fix resource leak on dmaengine driver disable") e530a9f3db41 ("dmaengine: idxd: reconfig device after device reset command") 88d97ea82cbe ("dmaengine: idxd: add halt interrupt support") 85f604af9c83 ("dmaengine: idxd: move out percpu_ref_exit() to ensure it's outside submission") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 0ec8ce07394442d722806fe61b901a5b2b17249d Mon Sep 17 00:00:00 2001 From: Fenghua Yu <fenghua.yu(a)intel.com> Date: Fri, 14 Oct 2022 15:25:41 -0700 Subject: [PATCH] dmaengine: idxd: Do not enable user type Work Queue without Shared Virtual Addressing When the idxd_user_drv driver is bound to a Work Queue (WQ) device without IOMMU or with IOMMU Passthrough without Shared Virtual Addressing (SVA), the application gains direct access to physical memory via the device by programming physical address to a submitted descriptor. This allows direct userspace read and write access to arbitrary physical memory. This is inconsistent with the security goals of a good kernel API. Unlike vfio_pci driver, the IDXD char device driver does not provide any ways to pin user pages and translate the address from user VA to IOVA or PA without IOMMU SVA. Therefore the application has no way to instruct the device to perform DMA function. This makes the char device not usable for normal application usage. Since user type WQ without SVA cannot be used for normal application usage and presents the security issue, bind idxd_user_drv driver and enable user type WQ only when SVA is enabled (i.e. user PASID is enabled). Fixes: 448c3de8ac83 ("dmaengine: idxd: create user driver for wq 'device'") Cc: stable(a)vger.kernel.org Suggested-by: Arjan Van De Ven <arjan.van.de.ven(a)intel.com> Signed-off-by: Fenghua Yu <fenghua.yu(a)intel.com> Reviewed-by: Dave Jiang <dave.jiang(a)intel.com> Reviewed-by: Jerry Snitselaar <jsnitsel(a)redhat.com> Link: https://lore.kernel.org/r/20221014222541.3912195-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul <vkoul(a)kernel.org> diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index c2808fd081d6..a9b96b18772f 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -312,6 +312,24 @@ static int idxd_user_drv_probe(struct idxd_dev *idxd_dev) if (idxd->state != IDXD_DEV_ENABLED) return -ENXIO; + /* + * User type WQ is enabled only when SVA is enabled for two reasons: + * - If no IOMMU or IOMMU Passthrough without SVA, userspace + * can directly access physical address through the WQ. + * - The IDXD cdev driver does not provide any ways to pin + * user pages and translate the address from user VA to IOVA or + * PA without IOMMU SVA. Therefore the application has no way + * to instruct the device to perform DMA function. This makes + * the cdev not usable for normal application usage. + */ + if (!device_user_pasid_enabled(idxd)) { + idxd->cmd_status = IDXD_SCMD_WQ_USER_NO_IOMMU; + dev_dbg(&idxd->pdev->dev, + "User type WQ cannot be enabled without SVA.\n"); + + return -EOPNOTSUPP; + } + mutex_lock(&wq->wq_lock); wq->type = IDXD_WQT_USER; rc = drv_enable_wq(wq); diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 095299c75828..2b9e7feba3f3 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -29,6 +29,7 @@ enum idxd_scmd_stat { IDXD_SCMD_WQ_NO_SIZE = 0x800e0000, IDXD_SCMD_WQ_NO_PRIV = 0x800f0000, IDXD_SCMD_WQ_IRQ_ERR = 0x80100000, + IDXD_SCMD_WQ_USER_NO_IOMMU = 0x80110000, }; #define IDXD_SCMD_SOFTERR_MASK 0x80000000

2 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] btrfs: zoned: clone zoned device info when cloning a device" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. Possible dependencies: 21e61ec6d0bb ("btrfs: zoned: clone zoned device info when cloning a device") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 21e61ec6d0bb786818490e926aa9aeb4de95ad0d Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn <johannes.thumshirn(a)wdc.com> Date: Fri, 4 Nov 2022 07:12:33 -0700 Subject: [PATCH] btrfs: zoned: clone zoned device info when cloning a device When cloning a btrfs_device, we're not cloning the associated btrfs_zoned_device_info structure of the device in case of a zoned filesystem. Later on this leads to a NULL pointer dereference when accessing the device's zone_info for instance when setting a zone as active. This was uncovered by fstests' testcase btrfs/161. CC: stable(a)vger.kernel.org # 5.15+ Signed-off-by: Johannes Thumshirn <johannes.thumshirn(a)wdc.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f09d09c259f5..3cb968ede675 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1011,6 +1011,18 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) rcu_assign_pointer(device->name, name); } + if (orig_dev->zone_info) { + struct btrfs_zoned_device_info *zone_info; + + zone_info = btrfs_clone_dev_zone_info(orig_dev); + if (!zone_info) { + btrfs_free_device(device); + ret = -ENOMEM; + goto error; + } + device->zone_info = zone_info; + } + list_add(&device->dev_list, &fs_devices->devices); device->fs_devices = fs_devices; fs_devices->num_devices++; diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index e2d073b08a7d..1912abf6d020 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -639,6 +639,46 @@ void btrfs_destroy_dev_zone_info(struct btrfs_device *device) device->zone_info = NULL; } +struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev) +{ + struct btrfs_zoned_device_info *zone_info; + + zone_info = kmemdup(orig_dev->zone_info, sizeof(*zone_info), GFP_KERNEL); + if (!zone_info) + return NULL; + + zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); + if (!zone_info->seq_zones) + goto out; + + bitmap_copy(zone_info->seq_zones, orig_dev->zone_info->seq_zones, + zone_info->nr_zones); + + zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); + if (!zone_info->empty_zones) + goto out; + + bitmap_copy(zone_info->empty_zones, orig_dev->zone_info->empty_zones, + zone_info->nr_zones); + + zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); + if (!zone_info->active_zones) + goto out; + + bitmap_copy(zone_info->active_zones, orig_dev->zone_info->active_zones, + zone_info->nr_zones); + zone_info->zone_cache = NULL; + + return zone_info; + +out: + bitmap_free(zone_info->seq_zones); + bitmap_free(zone_info->empty_zones); + bitmap_free(zone_info->active_zones); + kfree(zone_info); + return NULL; +} + int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, struct blk_zone *zone) { diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index e17462db3a84..8bd16d40b7c6 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -36,6 +36,7 @@ int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info); int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache); void btrfs_destroy_dev_zone_info(struct btrfs_device *device); +struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev); int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info); int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info); int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw, @@ -103,6 +104,16 @@ static inline int btrfs_get_dev_zone_info(struct btrfs_device *device, static inline void btrfs_destroy_dev_zone_info(struct btrfs_device *device) { } +/* + * In case the kernel is compiled without CONFIG_BLK_DEV_ZONED we'll never call + * into btrfs_clone_dev_zone_info() so it's safe to return NULL here. + */ +static inline struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info( + struct btrfs_device *orig_dev) +{ + return NULL; +} + static inline int btrfs_check_zoned_mode(const struct btrfs_fs_info *fs_info) { if (!btrfs_is_zoned(fs_info))

2 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] hugetlbfs: don't delete error page from pagecache" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. Possible dependencies: 8625147cafaa ("hugetlbfs: don't delete error page from pagecache") 7e1813d48dd3 ("hugetlb: rename remove_huge_page to hugetlb_delete_from_page_cache") 1508062ecd55 ("hugetlbfs: Convert remove_inode_hugepages() to use filemap_get_folios()") d9ef44de5d73 ("hugetlb: Convert huge_add_to_page_cache() to use a folio") dd0f230a0a80 ("mm: hwpoison: refactor refcount check handling") ea6d0630100b ("mm/hwpoison: do not lock page again when me_huge_page() successfully recovers") 171936ddaf97 ("mm/memory-failure: use a mutex to avoid memory_failure() races") e32905e57358 ("userfaultfd: hugetlbfs: fix new flag usage in error path") 15b836536321 ("mm/hugetlb: remove unused variable pseudo_vma in remove_inode_hugepages()") d4241a049ac0 ("mm/hugetlb: avoid calculating fault_mutex_hash in truncate_op case") d6995da31122 ("hugetlb: use page.private for hugetlb specific page flags") 585fc0d2871c ("mm: hugetlbfs: fix cannot migrate the fallocated HugeTLB page") a8b2c2ce89d4 ("mm,hwpoison: take free pages off the buddy freelists") 5d1fd5dc877b ("mm,hwpoison: introduce MF_MSG_UNSPLIT_THP") 694bf0b0cdf9 ("mm,hwpoison: unify THP handling for hard and soft offline") dd6e2402fad9 ("mm,hwpoison: kill put_hwpoison_page") 7e27f22c9e40 ("mm,hwpoison: unexport get_hwpoison_page and make it static") bbe88753bd42 ("mm/hugetlb: make hugetlb migration callback CMA aware") 41b4dc14ee80 ("mm/gup: restrict CMA region by using allocation scope API") 19fc7bed252c ("mm/migrate: introduce a standard migration target allocation function") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 8625147cafaa9ba74713d682f5185eb62cb2aedb Mon Sep 17 00:00:00 2001 From: James Houghton <jthoughton(a)google.com> Date: Tue, 18 Oct 2022 20:01:25 +0000 Subject: [PATCH] hugetlbfs: don't delete error page from pagecache This change is very similar to the change that was made for shmem [1], and it solves the same problem but for HugeTLBFS instead. Currently, when poison is found in a HugeTLB page, the page is removed from the page cache. That means that attempting to map or read that hugepage in the future will result in a new hugepage being allocated instead of notifying the user that the page was poisoned. As [1] states, this is effectively memory corruption. The fix is to leave the page in the page cache. If the user attempts to use a poisoned HugeTLB page with a syscall, the syscall will fail with EIO, the same error code that shmem uses. For attempts to map the page, the thread will get a BUS_MCEERR_AR SIGBUS. [1]: commit a76054266661 ("mm: shmem: don't truncate page if memory failure happens") Link: https://lkml.kernel.org/r/20221018200125.848471-1-jthoughton@google.com Signed-off-by: James Houghton <jthoughton(a)google.com> Reviewed-by: Mike Kravetz <mike.kravetz(a)oracle.com> Reviewed-by: Naoya Horiguchi <naoya.horiguchi(a)nec.com> Tested-by: Naoya Horiguchi <naoya.horiguchi(a)nec.com> Reviewed-by: Yang Shi <shy828301(a)gmail.com> Cc: Axel Rasmussen <axelrasmussen(a)google.com> Cc: James Houghton <jthoughton(a)google.com> Cc: Miaohe Lin <linmiaohe(a)huawei.com> Cc: Muchun Song <songmuchun(a)bytedance.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index dd54f67e47fd..df7772335dc0 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -328,6 +328,12 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) } else { unlock_page(page); + if (PageHWPoison(page)) { + put_page(page); + retval = -EIO; + break; + } + /* * We have the page, copy it to user space buffer. */ @@ -1111,13 +1117,6 @@ static int hugetlbfs_migrate_folio(struct address_space *mapping, static int hugetlbfs_error_remove_page(struct address_space *mapping, struct page *page) { - struct inode *inode = mapping->host; - pgoff_t index = page->index; - - hugetlb_delete_from_page_cache(page); - if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1))) - hugetlb_fix_reserve_counts(inode); - return 0; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 546df97c31e4..e48f8ef45b17 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6111,6 +6111,10 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ptl = huge_pte_lock(h, dst_mm, dst_pte); + ret = -EIO; + if (PageHWPoison(page)) + goto out_release_unlock; + /* * We allow to overwrite a pte marker: consider when both MISSING|WP * registered, we firstly wr-protect a none pte which has no page cache diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 145bb561ddb3..bead6bccc7f2 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1080,6 +1080,7 @@ static int me_huge_page(struct page_state *ps, struct page *p) int res; struct page *hpage = compound_head(p); struct address_space *mapping; + bool extra_pins = false; if (!PageHuge(hpage)) return MF_DELAYED; @@ -1087,6 +1088,8 @@ static int me_huge_page(struct page_state *ps, struct page *p) mapping = page_mapping(hpage); if (mapping) { res = truncate_error_page(hpage, page_to_pfn(p), mapping); + /* The page is kept in page cache. */ + extra_pins = true; unlock_page(hpage); } else { unlock_page(hpage); @@ -1104,7 +1107,7 @@ static int me_huge_page(struct page_state *ps, struct page *p) } } - if (has_extra_refcount(ps, p, false)) + if (has_extra_refcount(ps, p, extra_pins)) res = MF_FAILED; return res;

2 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] hugetlbfs: don't delete error page from pagecache" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. Possible dependencies: 8625147cafaa ("hugetlbfs: don't delete error page from pagecache") 7e1813d48dd3 ("hugetlb: rename remove_huge_page to hugetlb_delete_from_page_cache") 1508062ecd55 ("hugetlbfs: Convert remove_inode_hugepages() to use filemap_get_folios()") d9ef44de5d73 ("hugetlb: Convert huge_add_to_page_cache() to use a folio") dd0f230a0a80 ("mm: hwpoison: refactor refcount check handling") ea6d0630100b ("mm/hwpoison: do not lock page again when me_huge_page() successfully recovers") 171936ddaf97 ("mm/memory-failure: use a mutex to avoid memory_failure() races") e32905e57358 ("userfaultfd: hugetlbfs: fix new flag usage in error path") 15b836536321 ("mm/hugetlb: remove unused variable pseudo_vma in remove_inode_hugepages()") d4241a049ac0 ("mm/hugetlb: avoid calculating fault_mutex_hash in truncate_op case") d6995da31122 ("hugetlb: use page.private for hugetlb specific page flags") 585fc0d2871c ("mm: hugetlbfs: fix cannot migrate the fallocated HugeTLB page") a8b2c2ce89d4 ("mm,hwpoison: take free pages off the buddy freelists") 5d1fd5dc877b ("mm,hwpoison: introduce MF_MSG_UNSPLIT_THP") 694bf0b0cdf9 ("mm,hwpoison: unify THP handling for hard and soft offline") dd6e2402fad9 ("mm,hwpoison: kill put_hwpoison_page") 7e27f22c9e40 ("mm,hwpoison: unexport get_hwpoison_page and make it static") bbe88753bd42 ("mm/hugetlb: make hugetlb migration callback CMA aware") 41b4dc14ee80 ("mm/gup: restrict CMA region by using allocation scope API") 19fc7bed252c ("mm/migrate: introduce a standard migration target allocation function") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 8625147cafaa9ba74713d682f5185eb62cb2aedb Mon Sep 17 00:00:00 2001 From: James Houghton <jthoughton(a)google.com> Date: Tue, 18 Oct 2022 20:01:25 +0000 Subject: [PATCH] hugetlbfs: don't delete error page from pagecache This change is very similar to the change that was made for shmem [1], and it solves the same problem but for HugeTLBFS instead. Currently, when poison is found in a HugeTLB page, the page is removed from the page cache. That means that attempting to map or read that hugepage in the future will result in a new hugepage being allocated instead of notifying the user that the page was poisoned. As [1] states, this is effectively memory corruption. The fix is to leave the page in the page cache. If the user attempts to use a poisoned HugeTLB page with a syscall, the syscall will fail with EIO, the same error code that shmem uses. For attempts to map the page, the thread will get a BUS_MCEERR_AR SIGBUS. [1]: commit a76054266661 ("mm: shmem: don't truncate page if memory failure happens") Link: https://lkml.kernel.org/r/20221018200125.848471-1-jthoughton@google.com Signed-off-by: James Houghton <jthoughton(a)google.com> Reviewed-by: Mike Kravetz <mike.kravetz(a)oracle.com> Reviewed-by: Naoya Horiguchi <naoya.horiguchi(a)nec.com> Tested-by: Naoya Horiguchi <naoya.horiguchi(a)nec.com> Reviewed-by: Yang Shi <shy828301(a)gmail.com> Cc: Axel Rasmussen <axelrasmussen(a)google.com> Cc: James Houghton <jthoughton(a)google.com> Cc: Miaohe Lin <linmiaohe(a)huawei.com> Cc: Muchun Song <songmuchun(a)bytedance.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index dd54f67e47fd..df7772335dc0 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -328,6 +328,12 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) } else { unlock_page(page); + if (PageHWPoison(page)) { + put_page(page); + retval = -EIO; + break; + } + /* * We have the page, copy it to user space buffer. */ @@ -1111,13 +1117,6 @@ static int hugetlbfs_migrate_folio(struct address_space *mapping, static int hugetlbfs_error_remove_page(struct address_space *mapping, struct page *page) { - struct inode *inode = mapping->host; - pgoff_t index = page->index; - - hugetlb_delete_from_page_cache(page); - if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1))) - hugetlb_fix_reserve_counts(inode); - return 0; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 546df97c31e4..e48f8ef45b17 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6111,6 +6111,10 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ptl = huge_pte_lock(h, dst_mm, dst_pte); + ret = -EIO; + if (PageHWPoison(page)) + goto out_release_unlock; + /* * We allow to overwrite a pte marker: consider when both MISSING|WP * registered, we firstly wr-protect a none pte which has no page cache diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 145bb561ddb3..bead6bccc7f2 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1080,6 +1080,7 @@ static int me_huge_page(struct page_state *ps, struct page *p) int res; struct page *hpage = compound_head(p); struct address_space *mapping; + bool extra_pins = false; if (!PageHuge(hpage)) return MF_DELAYED; @@ -1087,6 +1088,8 @@ static int me_huge_page(struct page_state *ps, struct page *p) mapping = page_mapping(hpage); if (mapping) { res = truncate_error_page(hpage, page_to_pfn(p), mapping); + /* The page is kept in page cache. */ + extra_pins = true; unlock_page(hpage); } else { unlock_page(hpage); @@ -1104,7 +1107,7 @@ static int me_huge_page(struct page_state *ps, struct page *p) } } - if (has_extra_refcount(ps, p, false)) + if (has_extra_refcount(ps, p, extra_pins)) res = MF_FAILED; return res;

2 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] hugetlbfs: don't delete error page from pagecache" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. Possible dependencies: 8625147cafaa ("hugetlbfs: don't delete error page from pagecache") 7e1813d48dd3 ("hugetlb: rename remove_huge_page to hugetlb_delete_from_page_cache") 1508062ecd55 ("hugetlbfs: Convert remove_inode_hugepages() to use filemap_get_folios()") d9ef44de5d73 ("hugetlb: Convert huge_add_to_page_cache() to use a folio") dd0f230a0a80 ("mm: hwpoison: refactor refcount check handling") ea6d0630100b ("mm/hwpoison: do not lock page again when me_huge_page() successfully recovers") 171936ddaf97 ("mm/memory-failure: use a mutex to avoid memory_failure() races") e32905e57358 ("userfaultfd: hugetlbfs: fix new flag usage in error path") 15b836536321 ("mm/hugetlb: remove unused variable pseudo_vma in remove_inode_hugepages()") d4241a049ac0 ("mm/hugetlb: avoid calculating fault_mutex_hash in truncate_op case") d6995da31122 ("hugetlb: use page.private for hugetlb specific page flags") 585fc0d2871c ("mm: hugetlbfs: fix cannot migrate the fallocated HugeTLB page") a8b2c2ce89d4 ("mm,hwpoison: take free pages off the buddy freelists") 5d1fd5dc877b ("mm,hwpoison: introduce MF_MSG_UNSPLIT_THP") 694bf0b0cdf9 ("mm,hwpoison: unify THP handling for hard and soft offline") dd6e2402fad9 ("mm,hwpoison: kill put_hwpoison_page") 7e27f22c9e40 ("mm,hwpoison: unexport get_hwpoison_page and make it static") bbe88753bd42 ("mm/hugetlb: make hugetlb migration callback CMA aware") 41b4dc14ee80 ("mm/gup: restrict CMA region by using allocation scope API") 19fc7bed252c ("mm/migrate: introduce a standard migration target allocation function") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 8625147cafaa9ba74713d682f5185eb62cb2aedb Mon Sep 17 00:00:00 2001 From: James Houghton <jthoughton(a)google.com> Date: Tue, 18 Oct 2022 20:01:25 +0000 Subject: [PATCH] hugetlbfs: don't delete error page from pagecache This change is very similar to the change that was made for shmem [1], and it solves the same problem but for HugeTLBFS instead. Currently, when poison is found in a HugeTLB page, the page is removed from the page cache. That means that attempting to map or read that hugepage in the future will result in a new hugepage being allocated instead of notifying the user that the page was poisoned. As [1] states, this is effectively memory corruption. The fix is to leave the page in the page cache. If the user attempts to use a poisoned HugeTLB page with a syscall, the syscall will fail with EIO, the same error code that shmem uses. For attempts to map the page, the thread will get a BUS_MCEERR_AR SIGBUS. [1]: commit a76054266661 ("mm: shmem: don't truncate page if memory failure happens") Link: https://lkml.kernel.org/r/20221018200125.848471-1-jthoughton@google.com Signed-off-by: James Houghton <jthoughton(a)google.com> Reviewed-by: Mike Kravetz <mike.kravetz(a)oracle.com> Reviewed-by: Naoya Horiguchi <naoya.horiguchi(a)nec.com> Tested-by: Naoya Horiguchi <naoya.horiguchi(a)nec.com> Reviewed-by: Yang Shi <shy828301(a)gmail.com> Cc: Axel Rasmussen <axelrasmussen(a)google.com> Cc: James Houghton <jthoughton(a)google.com> Cc: Miaohe Lin <linmiaohe(a)huawei.com> Cc: Muchun Song <songmuchun(a)bytedance.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index dd54f67e47fd..df7772335dc0 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -328,6 +328,12 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) } else { unlock_page(page); + if (PageHWPoison(page)) { + put_page(page); + retval = -EIO; + break; + } + /* * We have the page, copy it to user space buffer. */ @@ -1111,13 +1117,6 @@ static int hugetlbfs_migrate_folio(struct address_space *mapping, static int hugetlbfs_error_remove_page(struct address_space *mapping, struct page *page) { - struct inode *inode = mapping->host; - pgoff_t index = page->index; - - hugetlb_delete_from_page_cache(page); - if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1))) - hugetlb_fix_reserve_counts(inode); - return 0; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 546df97c31e4..e48f8ef45b17 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6111,6 +6111,10 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ptl = huge_pte_lock(h, dst_mm, dst_pte); + ret = -EIO; + if (PageHWPoison(page)) + goto out_release_unlock; + /* * We allow to overwrite a pte marker: consider when both MISSING|WP * registered, we firstly wr-protect a none pte which has no page cache diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 145bb561ddb3..bead6bccc7f2 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1080,6 +1080,7 @@ static int me_huge_page(struct page_state *ps, struct page *p) int res; struct page *hpage = compound_head(p); struct address_space *mapping; + bool extra_pins = false; if (!PageHuge(hpage)) return MF_DELAYED; @@ -1087,6 +1088,8 @@ static int me_huge_page(struct page_state *ps, struct page *p) mapping = page_mapping(hpage); if (mapping) { res = truncate_error_page(hpage, page_to_pfn(p), mapping); + /* The page is kept in page cache. */ + extra_pins = true; unlock_page(hpage); } else { unlock_page(hpage); @@ -1104,7 +1107,7 @@ static int me_huge_page(struct page_state *ps, struct page *p) } } - if (has_extra_refcount(ps, p, false)) + if (has_extra_refcount(ps, p, extra_pins)) res = MF_FAILED; return res;

2 years, 5 months

1
0
0 0

FAILED: patch "[PATCH] hugetlbfs: don't delete error page from pagecache" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. Possible dependencies: 8625147cafaa ("hugetlbfs: don't delete error page from pagecache") 7e1813d48dd3 ("hugetlb: rename remove_huge_page to hugetlb_delete_from_page_cache") 1508062ecd55 ("hugetlbfs: Convert remove_inode_hugepages() to use filemap_get_folios()") d9ef44de5d73 ("hugetlb: Convert huge_add_to_page_cache() to use a folio") dd0f230a0a80 ("mm: hwpoison: refactor refcount check handling") ea6d0630100b ("mm/hwpoison: do not lock page again when me_huge_page() successfully recovers") 171936ddaf97 ("mm/memory-failure: use a mutex to avoid memory_failure() races") e32905e57358 ("userfaultfd: hugetlbfs: fix new flag usage in error path") 15b836536321 ("mm/hugetlb: remove unused variable pseudo_vma in remove_inode_hugepages()") d4241a049ac0 ("mm/hugetlb: avoid calculating fault_mutex_hash in truncate_op case") d6995da31122 ("hugetlb: use page.private for hugetlb specific page flags") 585fc0d2871c ("mm: hugetlbfs: fix cannot migrate the fallocated HugeTLB page") a8b2c2ce89d4 ("mm,hwpoison: take free pages off the buddy freelists") 5d1fd5dc877b ("mm,hwpoison: introduce MF_MSG_UNSPLIT_THP") 694bf0b0cdf9 ("mm,hwpoison: unify THP handling for hard and soft offline") dd6e2402fad9 ("mm,hwpoison: kill put_hwpoison_page") 7e27f22c9e40 ("mm,hwpoison: unexport get_hwpoison_page and make it static") bbe88753bd42 ("mm/hugetlb: make hugetlb migration callback CMA aware") 41b4dc14ee80 ("mm/gup: restrict CMA region by using allocation scope API") 19fc7bed252c ("mm/migrate: introduce a standard migration target allocation function") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 8625147cafaa9ba74713d682f5185eb62cb2aedb Mon Sep 17 00:00:00 2001 From: James Houghton <jthoughton(a)google.com> Date: Tue, 18 Oct 2022 20:01:25 +0000 Subject: [PATCH] hugetlbfs: don't delete error page from pagecache This change is very similar to the change that was made for shmem [1], and it solves the same problem but for HugeTLBFS instead. Currently, when poison is found in a HugeTLB page, the page is removed from the page cache. That means that attempting to map or read that hugepage in the future will result in a new hugepage being allocated instead of notifying the user that the page was poisoned. As [1] states, this is effectively memory corruption. The fix is to leave the page in the page cache. If the user attempts to use a poisoned HugeTLB page with a syscall, the syscall will fail with EIO, the same error code that shmem uses. For attempts to map the page, the thread will get a BUS_MCEERR_AR SIGBUS. [1]: commit a76054266661 ("mm: shmem: don't truncate page if memory failure happens") Link: https://lkml.kernel.org/r/20221018200125.848471-1-jthoughton@google.com Signed-off-by: James Houghton <jthoughton(a)google.com> Reviewed-by: Mike Kravetz <mike.kravetz(a)oracle.com> Reviewed-by: Naoya Horiguchi <naoya.horiguchi(a)nec.com> Tested-by: Naoya Horiguchi <naoya.horiguchi(a)nec.com> Reviewed-by: Yang Shi <shy828301(a)gmail.com> Cc: Axel Rasmussen <axelrasmussen(a)google.com> Cc: James Houghton <jthoughton(a)google.com> Cc: Miaohe Lin <linmiaohe(a)huawei.com> Cc: Muchun Song <songmuchun(a)bytedance.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index dd54f67e47fd..df7772335dc0 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -328,6 +328,12 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) } else { unlock_page(page); + if (PageHWPoison(page)) { + put_page(page); + retval = -EIO; + break; + } + /* * We have the page, copy it to user space buffer. */ @@ -1111,13 +1117,6 @@ static int hugetlbfs_migrate_folio(struct address_space *mapping, static int hugetlbfs_error_remove_page(struct address_space *mapping, struct page *page) { - struct inode *inode = mapping->host; - pgoff_t index = page->index; - - hugetlb_delete_from_page_cache(page); - if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1))) - hugetlb_fix_reserve_counts(inode); - return 0; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 546df97c31e4..e48f8ef45b17 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6111,6 +6111,10 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ptl = huge_pte_lock(h, dst_mm, dst_pte); + ret = -EIO; + if (PageHWPoison(page)) + goto out_release_unlock; + /* * We allow to overwrite a pte marker: consider when both MISSING|WP * registered, we firstly wr-protect a none pte which has no page cache diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 145bb561ddb3..bead6bccc7f2 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1080,6 +1080,7 @@ static int me_huge_page(struct page_state *ps, struct page *p) int res; struct page *hpage = compound_head(p); struct address_space *mapping; + bool extra_pins = false; if (!PageHuge(hpage)) return MF_DELAYED; @@ -1087,6 +1088,8 @@ static int me_huge_page(struct page_state *ps, struct page *p) mapping = page_mapping(hpage); if (mapping) { res = truncate_error_page(hpage, page_to_pfn(p), mapping); + /* The page is kept in page cache. */ + extra_pins = true; unlock_page(hpage); } else { unlock_page(hpage); @@ -1104,7 +1107,7 @@ static int me_huge_page(struct page_state *ps, struct page *p) } } - if (has_extra_refcount(ps, p, false)) + if (has_extra_refcount(ps, p, extra_pins)) res = MF_FAILED; return res;

2 years, 5 months

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror November 2022