The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From e649b3f0188f8fd34dd0dde8d43fd3312b902fb2 Mon Sep 17 00:00:00 2001
From: Eiichi Tsukata <eiichi.tsukata(a)nutanix.com>
Date: Sat, 6 Jun 2020 13:26:27 +0900
Subject: [PATCH] KVM: x86: Fix APIC page invalidation race
Commit b1394e745b94 ("KVM: x86: fix APIC page invalidation") tried
to fix inappropriate APIC page invalidation by re-introducing arch
specific kvm_arch_mmu_notifier_invalidate_range() and calling it from
kvm_mmu_notifier_invalidate_range_start. However, the patch left a
possible race where the VMCS APIC address cache is updated *before*
it is unmapped:
(Invalidator) kvm_mmu_notifier_invalidate_range_start()
(Invalidator) kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD)
(KVM VCPU) vcpu_enter_guest()
(KVM VCPU) kvm_vcpu_reload_apic_access_page()
(Invalidator) actually unmap page
Because of the above race, there can be a mismatch between the
host physical address stored in the APIC_ACCESS_PAGE VMCS field and
the host physical address stored in the EPT entry for the APIC GPA
(0xfee0000). When this happens, the processor will not trap APIC
accesses, and will instead show the raw contents of the APIC-access page.
Because Windows OS periodically checks for unexpected modifications to
the LAPIC register, this will show up as a BSOD crash with BugCheck
CRITICAL_STRUCTURE_CORRUPTION (109) we are currently seeing in
https://bugzilla.redhat.com/show_bug.cgi?id=1751017.
The root cause of the issue is that kvm_arch_mmu_notifier_invalidate_range()
cannot guarantee that no additional references are taken to the pages in
the range before kvm_mmu_notifier_invalidate_range_end(). Fortunately,
this case is supported by the MMU notifier API, as documented in
include/linux/mmu_notifier.h:
* If the subsystem
* can't guarantee that no additional references are taken to
* the pages in the range, it has to implement the
* invalidate_range() notifier to remove any references taken
* after invalidate_range_start().
The fix therefore is to reload the APIC-access page field in the VMCS
from kvm_mmu_notifier_invalidate_range() instead of ..._range_start().
Cc: stable(a)vger.kernel.org
Fixes: b1394e745b94 ("KVM: x86: fix APIC page invalidation")
Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=197951
Signed-off-by: Eiichi Tsukata <eiichi.tsukata(a)nutanix.com>
Message-Id: <20200606042627.61070-1-eiichi.tsukata(a)nutanix.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c26dd1363151..24de847af52e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8270,9 +8270,8 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
kvm_x86_ops.load_eoi_exitmap(vcpu, eoi_exit_bitmap);
}
-int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
- unsigned long start, unsigned long end,
- bool blockable)
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end)
{
unsigned long apic_address;
@@ -8283,8 +8282,6 @@ int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
if (start <= apic_address && apic_address < end)
kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
-
- return 0;
}
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d38d6b9c24be..e2f82131bb3e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1420,8 +1420,8 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
}
#endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */
-int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
- unsigned long start, unsigned long end, bool blockable);
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end);
#ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4db151f6101e..7b6013f2ba19 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -155,10 +155,9 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm);
static unsigned long long kvm_createvm_count;
static unsigned long long kvm_active_vms;
-__weak int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
- unsigned long start, unsigned long end, bool blockable)
+__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end)
{
- return 0;
}
bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
@@ -384,6 +383,18 @@ static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
return container_of(mn, struct kvm, mmu_notifier);
}
+static void kvm_mmu_notifier_invalidate_range(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ struct kvm *kvm = mmu_notifier_to_kvm(mn);
+ int idx;
+
+ idx = srcu_read_lock(&kvm->srcu);
+ kvm_arch_mmu_notifier_invalidate_range(kvm, start, end);
+ srcu_read_unlock(&kvm->srcu, idx);
+}
+
static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long address,
@@ -408,7 +419,6 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
int need_tlb_flush = 0, idx;
- int ret;
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
@@ -425,14 +435,9 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
kvm_flush_remote_tlbs(kvm);
spin_unlock(&kvm->mmu_lock);
-
- ret = kvm_arch_mmu_notifier_invalidate_range(kvm, range->start,
- range->end,
- mmu_notifier_range_blockable(range));
-
srcu_read_unlock(&kvm->srcu, idx);
- return ret;
+ return 0;
}
static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
@@ -538,6 +543,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
}
static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
+ .invalidate_range = kvm_mmu_notifier_invalidate_range,
.invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
.invalidate_range_end = kvm_mmu_notifier_invalidate_range_end,
.clear_flush_young = kvm_mmu_notifier_clear_flush_young,
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6129ed877d409037b79866327102c9dc59a302fe Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson(a)intel.com>
Date: Wed, 27 May 2020 01:49:09 -0700
Subject: [PATCH] KVM: x86/mmu: Set mmio_value to '0' if reserved #PF can't be
generated
Set the mmio_value to '0' instead of simply clearing the present bit to
squash a benign warning in kvm_mmu_set_mmio_spte_mask() that complains
about the mmio_value overlapping the lower GFN mask on systems with 52
bits of PA space.
Opportunistically clean up the code and comments.
Cc: stable(a)vger.kernel.org
Fixes: d43e2675e96fc ("KVM: x86: only do L1TF workaround on affected processors")
Signed-off-by: Sean Christopherson <sean.j.christopherson(a)intel.com>
Message-Id: <20200527084909.23492-1-sean.j.christopherson(a)intel.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 86619631ff6a..92d056954194 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -6143,25 +6143,16 @@ static void kvm_set_mmio_spte_mask(void)
u64 mask;
/*
- * Set the reserved bits and the present bit of an paging-structure
- * entry to generate page fault with PFER.RSV = 1.
+ * Set a reserved PA bit in MMIO SPTEs to generate page faults with
+ * PFEC.RSVD=1 on MMIO accesses. 64-bit PTEs (PAE, x86-64, and EPT
+ * paging) support a maximum of 52 bits of PA, i.e. if the CPU supports
+ * 52-bit physical addresses then there are no reserved PA bits in the
+ * PTEs and so the reserved PA approach must be disabled.
*/
-
- /*
- * Mask the uppermost physical address bit, which would be reserved as
- * long as the supported physical address width is less than 52.
- */
- mask = 1ull << 51;
-
- /* Set the present bit. */
- mask |= 1ull;
-
- /*
- * If reserved bit is not supported, clear the present bit to disable
- * mmio page fault.
- */
- if (shadow_phys_bits == 52)
- mask &= ~1ull;
+ if (shadow_phys_bits < 52)
+ mask = BIT_ULL(51) | PT_PRESENT_MASK;
+ else
+ mask = 0;
kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK);
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6129ed877d409037b79866327102c9dc59a302fe Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson(a)intel.com>
Date: Wed, 27 May 2020 01:49:09 -0700
Subject: [PATCH] KVM: x86/mmu: Set mmio_value to '0' if reserved #PF can't be
generated
Set the mmio_value to '0' instead of simply clearing the present bit to
squash a benign warning in kvm_mmu_set_mmio_spte_mask() that complains
about the mmio_value overlapping the lower GFN mask on systems with 52
bits of PA space.
Opportunistically clean up the code and comments.
Cc: stable(a)vger.kernel.org
Fixes: d43e2675e96fc ("KVM: x86: only do L1TF workaround on affected processors")
Signed-off-by: Sean Christopherson <sean.j.christopherson(a)intel.com>
Message-Id: <20200527084909.23492-1-sean.j.christopherson(a)intel.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 86619631ff6a..92d056954194 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -6143,25 +6143,16 @@ static void kvm_set_mmio_spte_mask(void)
u64 mask;
/*
- * Set the reserved bits and the present bit of an paging-structure
- * entry to generate page fault with PFER.RSV = 1.
+ * Set a reserved PA bit in MMIO SPTEs to generate page faults with
+ * PFEC.RSVD=1 on MMIO accesses. 64-bit PTEs (PAE, x86-64, and EPT
+ * paging) support a maximum of 52 bits of PA, i.e. if the CPU supports
+ * 52-bit physical addresses then there are no reserved PA bits in the
+ * PTEs and so the reserved PA approach must be disabled.
*/
-
- /*
- * Mask the uppermost physical address bit, which would be reserved as
- * long as the supported physical address width is less than 52.
- */
- mask = 1ull << 51;
-
- /* Set the present bit. */
- mask |= 1ull;
-
- /*
- * If reserved bit is not supported, clear the present bit to disable
- * mmio page fault.
- */
- if (shadow_phys_bits == 52)
- mask &= ~1ull;
+ if (shadow_phys_bits < 52)
+ mask = BIT_ULL(51) | PT_PRESENT_MASK;
+ else
+ mask = 0;
kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK);
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 0abcc8f65cc23b65bc8d1614cc64b02b1641ed7c Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk(a)redhat.com>
Date: Sat, 23 May 2020 19:14:54 +0300
Subject: [PATCH] KVM: VMX: enable X86_FEATURE_WAITPKG in KVM capabilities
Even though we might not allow the guest to use WAITPKG's new
instructions, we should tell KVM that the feature is supported by the
host CPU.
Note that vmx_waitpkg_supported checks that WAITPKG _can_ be set in
secondary execution controls as specified by VMX capability MSR, rather
that we actually enable it for a guest.
Cc: stable(a)vger.kernel.org
Fixes: e69e72faa3a0 ("KVM: x86: Add support for user wait instructions")
Suggested-by: Paolo Bonzini <pbonzini(a)redhat.com>
Signed-off-by: Maxim Levitsky <mlevitsk(a)redhat.com>
Message-Id: <20200523161455.3940-2-mlevitsk(a)redhat.com>
Reviewed-by: Sean Christopherson <sean.j.christopherson(a)intel.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 89c766fad889..9b63ac8d97ee 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7138,6 +7138,9 @@ static __init void vmx_set_cpu_caps(void)
/* CPUID 0x80000001 */
if (!cpu_has_vmx_rdtscp())
kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);
+
+ if (vmx_waitpkg_supported())
+ kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
}
static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
The patch below does not apply to the 5.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 0abcc8f65cc23b65bc8d1614cc64b02b1641ed7c Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk(a)redhat.com>
Date: Sat, 23 May 2020 19:14:54 +0300
Subject: [PATCH] KVM: VMX: enable X86_FEATURE_WAITPKG in KVM capabilities
Even though we might not allow the guest to use WAITPKG's new
instructions, we should tell KVM that the feature is supported by the
host CPU.
Note that vmx_waitpkg_supported checks that WAITPKG _can_ be set in
secondary execution controls as specified by VMX capability MSR, rather
that we actually enable it for a guest.
Cc: stable(a)vger.kernel.org
Fixes: e69e72faa3a0 ("KVM: x86: Add support for user wait instructions")
Suggested-by: Paolo Bonzini <pbonzini(a)redhat.com>
Signed-off-by: Maxim Levitsky <mlevitsk(a)redhat.com>
Message-Id: <20200523161455.3940-2-mlevitsk(a)redhat.com>
Reviewed-by: Sean Christopherson <sean.j.christopherson(a)intel.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 89c766fad889..9b63ac8d97ee 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7138,6 +7138,9 @@ static __init void vmx_set_cpu_caps(void)
/* CPUID 0x80000001 */
if (!cpu_has_vmx_rdtscp())
kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);
+
+ if (vmx_waitpkg_supported())
+ kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
}
static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 4d8df8cbb9156b0a0ab3f802b80cb5db57acc0bf Mon Sep 17 00:00:00 2001
From: Anthony Steinhauser <asteinhauser(a)google.com>
Date: Sun, 7 Jun 2020 05:44:19 -0700
Subject: [PATCH] x86/speculation: PR_SPEC_FORCE_DISABLE enforcement for
indirect branches.
Currently, it is possible to enable indirect branch speculation even after
it was force-disabled using the PR_SPEC_FORCE_DISABLE option. Moreover, the
PR_GET_SPECULATION_CTRL command gives afterwards an incorrect result
(force-disabled when it is in fact enabled). This also is inconsistent
vs. STIBP and the documention which cleary states that
PR_SPEC_FORCE_DISABLE cannot be undone.
Fix this by actually enforcing force-disabled indirect branch
speculation. PR_SPEC_ENABLE called after PR_SPEC_FORCE_DISABLE now fails
with -EPERM as described in the documentation.
Fixes: 9137bb27e60e ("x86/speculation: Add prctl() control for indirect branch speculation")
Signed-off-by: Anthony Steinhauser <asteinhauser(a)google.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 8d57562b1d2c..56f573aa764f 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1175,11 +1175,14 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
return 0;
/*
* Indirect branch speculation is always disabled in strict
- * mode.
+ * mode. It can neither be enabled if it was force-disabled
+ * by a previous prctl call.
+
*/
if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ||
+ task_spec_ib_force_disable(task))
return -EPERM;
task_clear_spec_ib_disable(task);
task_update_spec_tif(task);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 4d8df8cbb9156b0a0ab3f802b80cb5db57acc0bf Mon Sep 17 00:00:00 2001
From: Anthony Steinhauser <asteinhauser(a)google.com>
Date: Sun, 7 Jun 2020 05:44:19 -0700
Subject: [PATCH] x86/speculation: PR_SPEC_FORCE_DISABLE enforcement for
indirect branches.
Currently, it is possible to enable indirect branch speculation even after
it was force-disabled using the PR_SPEC_FORCE_DISABLE option. Moreover, the
PR_GET_SPECULATION_CTRL command gives afterwards an incorrect result
(force-disabled when it is in fact enabled). This also is inconsistent
vs. STIBP and the documention which cleary states that
PR_SPEC_FORCE_DISABLE cannot be undone.
Fix this by actually enforcing force-disabled indirect branch
speculation. PR_SPEC_ENABLE called after PR_SPEC_FORCE_DISABLE now fails
with -EPERM as described in the documentation.
Fixes: 9137bb27e60e ("x86/speculation: Add prctl() control for indirect branch speculation")
Signed-off-by: Anthony Steinhauser <asteinhauser(a)google.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 8d57562b1d2c..56f573aa764f 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1175,11 +1175,14 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
return 0;
/*
* Indirect branch speculation is always disabled in strict
- * mode.
+ * mode. It can neither be enabled if it was force-disabled
+ * by a previous prctl call.
+
*/
if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ||
+ task_spec_ib_force_disable(task))
return -EPERM;
task_clear_spec_ib_disable(task);
task_update_spec_tif(task);
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 4d8df8cbb9156b0a0ab3f802b80cb5db57acc0bf Mon Sep 17 00:00:00 2001
From: Anthony Steinhauser <asteinhauser(a)google.com>
Date: Sun, 7 Jun 2020 05:44:19 -0700
Subject: [PATCH] x86/speculation: PR_SPEC_FORCE_DISABLE enforcement for
indirect branches.
Currently, it is possible to enable indirect branch speculation even after
it was force-disabled using the PR_SPEC_FORCE_DISABLE option. Moreover, the
PR_GET_SPECULATION_CTRL command gives afterwards an incorrect result
(force-disabled when it is in fact enabled). This also is inconsistent
vs. STIBP and the documention which cleary states that
PR_SPEC_FORCE_DISABLE cannot be undone.
Fix this by actually enforcing force-disabled indirect branch
speculation. PR_SPEC_ENABLE called after PR_SPEC_FORCE_DISABLE now fails
with -EPERM as described in the documentation.
Fixes: 9137bb27e60e ("x86/speculation: Add prctl() control for indirect branch speculation")
Signed-off-by: Anthony Steinhauser <asteinhauser(a)google.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 8d57562b1d2c..56f573aa764f 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1175,11 +1175,14 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
return 0;
/*
* Indirect branch speculation is always disabled in strict
- * mode.
+ * mode. It can neither be enabled if it was force-disabled
+ * by a previous prctl call.
+
*/
if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ||
+ task_spec_ib_force_disable(task))
return -EPERM;
task_clear_spec_ib_disable(task);
task_update_spec_tif(task);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 21998a351512eba4ed5969006f0c55882d995ada Mon Sep 17 00:00:00 2001
From: Anthony Steinhauser <asteinhauser(a)google.com>
Date: Tue, 19 May 2020 06:40:42 -0700
Subject: [PATCH] x86/speculation: Avoid force-disabling IBPB based on STIBP
and enhanced IBRS.
When STIBP is unavailable or enhanced IBRS is available, Linux
force-disables the IBPB mitigation of Spectre-BTB even when simultaneous
multithreading is disabled. While attempts to enable IBPB using
prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, ...) fail with
EPERM, the seccomp syscall (or its prctl(PR_SET_SECCOMP, ...) equivalent)
which are used e.g. by Chromium or OpenSSH succeed with no errors but the
application remains silently vulnerable to cross-process Spectre v2 attacks
(classical BTB poisoning). At the same time the SYSFS reporting
(/sys/devices/system/cpu/vulnerabilities/spectre_v2) displays that IBPB is
conditionally enabled when in fact it is unconditionally disabled.
STIBP is useful only when SMT is enabled. When SMT is disabled and STIBP is
unavailable, it makes no sense to force-disable also IBPB, because IBPB
protects against cross-process Spectre-BTB attacks regardless of the SMT
state. At the same time since missing STIBP was only observed on AMD CPUs,
AMD does not recommend using STIBP, but recommends using IBPB, so disabling
IBPB because of missing STIBP goes directly against AMD's advice:
https://developer.amd.com/wp-content/resources/Architecture_Guidelines_Upda…
Similarly, enhanced IBRS is designed to protect cross-core BTB poisoning
and BTB-poisoning attacks from user space against kernel (and
BTB-poisoning attacks from guest against hypervisor), it is not designed
to prevent cross-process (or cross-VM) BTB poisoning between processes (or
VMs) running on the same core. Therefore, even with enhanced IBRS it is
necessary to flush the BTB during context-switches, so there is no reason
to force disable IBPB when enhanced IBRS is available.
Enable the prctl control of IBPB even when STIBP is unavailable or enhanced
IBRS is available.
Fixes: 7cc765a67d8e ("x86/speculation: Enable prctl mode for spectre_v2_user")
Signed-off-by: Anthony Steinhauser <asteinhauser(a)google.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index ed54b3b21c39..8d57562b1d2c 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -495,7 +495,9 @@ early_param("nospectre_v1", nospectre_v1_cmdline);
static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
SPECTRE_V2_NONE;
-static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init =
+static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init =
+ SPECTRE_V2_USER_NONE;
+static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init =
SPECTRE_V2_USER_NONE;
#ifdef CONFIG_RETPOLINE
@@ -641,15 +643,6 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
break;
}
- /*
- * At this point, an STIBP mode other than "off" has been set.
- * If STIBP support is not being forced, check if STIBP always-on
- * is preferred.
- */
- if (mode != SPECTRE_V2_USER_STRICT &&
- boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
- mode = SPECTRE_V2_USER_STRICT_PREFERRED;
-
/* Initialize Indirect Branch Prediction Barrier */
if (boot_cpu_has(X86_FEATURE_IBPB)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
@@ -672,23 +665,36 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n",
static_key_enabled(&switch_mm_always_ibpb) ?
"always-on" : "conditional");
+
+ spectre_v2_user_ibpb = mode;
}
- /* If enhanced IBRS is enabled no STIBP required */
- if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+ /*
+ * If enhanced IBRS is enabled or SMT impossible, STIBP is not
+ * required.
+ */
+ if (!smt_possible || spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
return;
/*
- * If SMT is not possible or STIBP is not available clear the STIBP
- * mode.
+ * At this point, an STIBP mode other than "off" has been set.
+ * If STIBP support is not being forced, check if STIBP always-on
+ * is preferred.
+ */
+ if (mode != SPECTRE_V2_USER_STRICT &&
+ boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
+ mode = SPECTRE_V2_USER_STRICT_PREFERRED;
+
+ /*
+ * If STIBP is not available, clear the STIBP mode.
*/
- if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP))
+ if (!boot_cpu_has(X86_FEATURE_STIBP))
mode = SPECTRE_V2_USER_NONE;
+
+ spectre_v2_user_stibp = mode;
+
set_mode:
- spectre_v2_user = mode;
- /* Only print the STIBP mode when SMT possible */
- if (smt_possible)
- pr_info("%s\n", spectre_v2_user_strings[mode]);
+ pr_info("%s\n", spectre_v2_user_strings[mode]);
}
static const char * const spectre_v2_strings[] = {
@@ -921,7 +927,7 @@ void cpu_bugs_smt_update(void)
{
mutex_lock(&spec_ctrl_mutex);
- switch (spectre_v2_user) {
+ switch (spectre_v2_user_stibp) {
case SPECTRE_V2_USER_NONE:
break;
case SPECTRE_V2_USER_STRICT:
@@ -1164,14 +1170,16 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
{
switch (ctrl) {
case PR_SPEC_ENABLE:
- if (spectre_v2_user == SPECTRE_V2_USER_NONE)
+ if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
+ spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return 0;
/*
* Indirect branch speculation is always disabled in strict
* mode.
*/
- if (spectre_v2_user == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED)
+ if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
return -EPERM;
task_clear_spec_ib_disable(task);
task_update_spec_tif(task);
@@ -1182,10 +1190,12 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
* Indirect branch speculation is always allowed when
* mitigation is force disabled.
*/
- if (spectre_v2_user == SPECTRE_V2_USER_NONE)
+ if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
+ spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return -EPERM;
- if (spectre_v2_user == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED)
+ if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
return 0;
task_set_spec_ib_disable(task);
if (ctrl == PR_SPEC_FORCE_DISABLE)
@@ -1216,7 +1226,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task)
{
if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
- if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP)
+ if (spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP)
ib_prctl_set(task, PR_SPEC_FORCE_DISABLE);
}
#endif
@@ -1247,22 +1258,24 @@ static int ib_prctl_get(struct task_struct *task)
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
return PR_SPEC_NOT_AFFECTED;
- switch (spectre_v2_user) {
- case SPECTRE_V2_USER_NONE:
+ if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
+ spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return PR_SPEC_ENABLE;
- case SPECTRE_V2_USER_PRCTL:
- case SPECTRE_V2_USER_SECCOMP:
+ else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
+ return PR_SPEC_DISABLE;
+ else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL ||
+ spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) {
if (task_spec_ib_force_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
if (task_spec_ib_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
- case SPECTRE_V2_USER_STRICT:
- case SPECTRE_V2_USER_STRICT_PREFERRED:
- return PR_SPEC_DISABLE;
- default:
+ } else
return PR_SPEC_NOT_AFFECTED;
- }
}
int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
@@ -1501,7 +1514,7 @@ static char *stibp_state(void)
if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
return "";
- switch (spectre_v2_user) {
+ switch (spectre_v2_user_stibp) {
case SPECTRE_V2_USER_NONE:
return ", STIBP: disabled";
case SPECTRE_V2_USER_STRICT:
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 21998a351512eba4ed5969006f0c55882d995ada Mon Sep 17 00:00:00 2001
From: Anthony Steinhauser <asteinhauser(a)google.com>
Date: Tue, 19 May 2020 06:40:42 -0700
Subject: [PATCH] x86/speculation: Avoid force-disabling IBPB based on STIBP
and enhanced IBRS.
When STIBP is unavailable or enhanced IBRS is available, Linux
force-disables the IBPB mitigation of Spectre-BTB even when simultaneous
multithreading is disabled. While attempts to enable IBPB using
prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, ...) fail with
EPERM, the seccomp syscall (or its prctl(PR_SET_SECCOMP, ...) equivalent)
which are used e.g. by Chromium or OpenSSH succeed with no errors but the
application remains silently vulnerable to cross-process Spectre v2 attacks
(classical BTB poisoning). At the same time the SYSFS reporting
(/sys/devices/system/cpu/vulnerabilities/spectre_v2) displays that IBPB is
conditionally enabled when in fact it is unconditionally disabled.
STIBP is useful only when SMT is enabled. When SMT is disabled and STIBP is
unavailable, it makes no sense to force-disable also IBPB, because IBPB
protects against cross-process Spectre-BTB attacks regardless of the SMT
state. At the same time since missing STIBP was only observed on AMD CPUs,
AMD does not recommend using STIBP, but recommends using IBPB, so disabling
IBPB because of missing STIBP goes directly against AMD's advice:
https://developer.amd.com/wp-content/resources/Architecture_Guidelines_Upda…
Similarly, enhanced IBRS is designed to protect cross-core BTB poisoning
and BTB-poisoning attacks from user space against kernel (and
BTB-poisoning attacks from guest against hypervisor), it is not designed
to prevent cross-process (or cross-VM) BTB poisoning between processes (or
VMs) running on the same core. Therefore, even with enhanced IBRS it is
necessary to flush the BTB during context-switches, so there is no reason
to force disable IBPB when enhanced IBRS is available.
Enable the prctl control of IBPB even when STIBP is unavailable or enhanced
IBRS is available.
Fixes: 7cc765a67d8e ("x86/speculation: Enable prctl mode for spectre_v2_user")
Signed-off-by: Anthony Steinhauser <asteinhauser(a)google.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index ed54b3b21c39..8d57562b1d2c 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -495,7 +495,9 @@ early_param("nospectre_v1", nospectre_v1_cmdline);
static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
SPECTRE_V2_NONE;
-static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init =
+static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init =
+ SPECTRE_V2_USER_NONE;
+static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init =
SPECTRE_V2_USER_NONE;
#ifdef CONFIG_RETPOLINE
@@ -641,15 +643,6 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
break;
}
- /*
- * At this point, an STIBP mode other than "off" has been set.
- * If STIBP support is not being forced, check if STIBP always-on
- * is preferred.
- */
- if (mode != SPECTRE_V2_USER_STRICT &&
- boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
- mode = SPECTRE_V2_USER_STRICT_PREFERRED;
-
/* Initialize Indirect Branch Prediction Barrier */
if (boot_cpu_has(X86_FEATURE_IBPB)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
@@ -672,23 +665,36 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n",
static_key_enabled(&switch_mm_always_ibpb) ?
"always-on" : "conditional");
+
+ spectre_v2_user_ibpb = mode;
}
- /* If enhanced IBRS is enabled no STIBP required */
- if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+ /*
+ * If enhanced IBRS is enabled or SMT impossible, STIBP is not
+ * required.
+ */
+ if (!smt_possible || spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
return;
/*
- * If SMT is not possible or STIBP is not available clear the STIBP
- * mode.
+ * At this point, an STIBP mode other than "off" has been set.
+ * If STIBP support is not being forced, check if STIBP always-on
+ * is preferred.
+ */
+ if (mode != SPECTRE_V2_USER_STRICT &&
+ boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
+ mode = SPECTRE_V2_USER_STRICT_PREFERRED;
+
+ /*
+ * If STIBP is not available, clear the STIBP mode.
*/
- if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP))
+ if (!boot_cpu_has(X86_FEATURE_STIBP))
mode = SPECTRE_V2_USER_NONE;
+
+ spectre_v2_user_stibp = mode;
+
set_mode:
- spectre_v2_user = mode;
- /* Only print the STIBP mode when SMT possible */
- if (smt_possible)
- pr_info("%s\n", spectre_v2_user_strings[mode]);
+ pr_info("%s\n", spectre_v2_user_strings[mode]);
}
static const char * const spectre_v2_strings[] = {
@@ -921,7 +927,7 @@ void cpu_bugs_smt_update(void)
{
mutex_lock(&spec_ctrl_mutex);
- switch (spectre_v2_user) {
+ switch (spectre_v2_user_stibp) {
case SPECTRE_V2_USER_NONE:
break;
case SPECTRE_V2_USER_STRICT:
@@ -1164,14 +1170,16 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
{
switch (ctrl) {
case PR_SPEC_ENABLE:
- if (spectre_v2_user == SPECTRE_V2_USER_NONE)
+ if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
+ spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return 0;
/*
* Indirect branch speculation is always disabled in strict
* mode.
*/
- if (spectre_v2_user == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED)
+ if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
return -EPERM;
task_clear_spec_ib_disable(task);
task_update_spec_tif(task);
@@ -1182,10 +1190,12 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
* Indirect branch speculation is always allowed when
* mitigation is force disabled.
*/
- if (spectre_v2_user == SPECTRE_V2_USER_NONE)
+ if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
+ spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return -EPERM;
- if (spectre_v2_user == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED)
+ if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
return 0;
task_set_spec_ib_disable(task);
if (ctrl == PR_SPEC_FORCE_DISABLE)
@@ -1216,7 +1226,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task)
{
if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
- if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP)
+ if (spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP)
ib_prctl_set(task, PR_SPEC_FORCE_DISABLE);
}
#endif
@@ -1247,22 +1258,24 @@ static int ib_prctl_get(struct task_struct *task)
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
return PR_SPEC_NOT_AFFECTED;
- switch (spectre_v2_user) {
- case SPECTRE_V2_USER_NONE:
+ if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
+ spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return PR_SPEC_ENABLE;
- case SPECTRE_V2_USER_PRCTL:
- case SPECTRE_V2_USER_SECCOMP:
+ else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
+ return PR_SPEC_DISABLE;
+ else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL ||
+ spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) {
if (task_spec_ib_force_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
if (task_spec_ib_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
- case SPECTRE_V2_USER_STRICT:
- case SPECTRE_V2_USER_STRICT_PREFERRED:
- return PR_SPEC_DISABLE;
- default:
+ } else
return PR_SPEC_NOT_AFFECTED;
- }
}
int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
@@ -1501,7 +1514,7 @@ static char *stibp_state(void)
if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
return "";
- switch (spectre_v2_user) {
+ switch (spectre_v2_user_stibp) {
case SPECTRE_V2_USER_NONE:
return ", STIBP: disabled";
case SPECTRE_V2_USER_STRICT: