The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 1b3ab5ad1b8ad99bae76ec583809c5f5a31c707c Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson(a)intel.com>
Date: Mon, 3 Dec 2018 13:52:51 -0800
Subject: [PATCH] KVM: nVMX: Free the VMREAD/VMWRITE bitmaps if
alloc_kvm_area() fails
Fixes: 34a1cd60d17f ("kvm: x86: vmx: move some vmx setting from vmx_init() to hardware_setup()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <sean.j.christopherson(a)intel.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c379d0bfdcba..3ec47b7a94d6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8037,13 +8037,16 @@ static __init int hardware_setup(void)
kvm_mce_cap_supported |= MCG_LMCE_P;
- return alloc_kvm_area();
+ r = alloc_kvm_area();
+ if (r)
+ goto out;
+ return 0;
out:
for (i = 0; i < VMX_BITMAP_NR; i++)
free_page((unsigned long)vmx_bitmap[i]);
- return r;
+ return r;
}
static __exit void hardware_unsetup(void)
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 1b3ab5ad1b8ad99bae76ec583809c5f5a31c707c Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson(a)intel.com>
Date: Mon, 3 Dec 2018 13:52:51 -0800
Subject: [PATCH] KVM: nVMX: Free the VMREAD/VMWRITE bitmaps if
alloc_kvm_area() fails
Fixes: 34a1cd60d17f ("kvm: x86: vmx: move some vmx setting from vmx_init() to hardware_setup()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <sean.j.christopherson(a)intel.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c379d0bfdcba..3ec47b7a94d6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8037,13 +8037,16 @@ static __init int hardware_setup(void)
kvm_mce_cap_supported |= MCG_LMCE_P;
- return alloc_kvm_area();
+ r = alloc_kvm_area();
+ if (r)
+ goto out;
+ return 0;
out:
for (i = 0; i < VMX_BITMAP_NR; i++)
free_page((unsigned long)vmx_bitmap[i]);
- return r;
+ return r;
}
static __exit void hardware_unsetup(void)
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 1b3ab5ad1b8ad99bae76ec583809c5f5a31c707c Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson(a)intel.com>
Date: Mon, 3 Dec 2018 13:52:51 -0800
Subject: [PATCH] KVM: nVMX: Free the VMREAD/VMWRITE bitmaps if
alloc_kvm_area() fails
Fixes: 34a1cd60d17f ("kvm: x86: vmx: move some vmx setting from vmx_init() to hardware_setup()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <sean.j.christopherson(a)intel.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c379d0bfdcba..3ec47b7a94d6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8037,13 +8037,16 @@ static __init int hardware_setup(void)
kvm_mce_cap_supported |= MCG_LMCE_P;
- return alloc_kvm_area();
+ r = alloc_kvm_area();
+ if (r)
+ goto out;
+ return 0;
out:
for (i = 0; i < VMX_BITMAP_NR; i++)
free_page((unsigned long)vmx_bitmap[i]);
- return r;
+ return r;
}
static __exit void hardware_unsetup(void)
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 234ff0b729ad882d20f7996591a964965647addf Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus(a)ozlabs.org>
Date: Fri, 16 Nov 2018 21:28:18 +1100
Subject: [PATCH] KVM: PPC: Book3S HV: Fix race between kvm_unmap_hva_range and
MMU mode switch
Testing has revealed an occasional crash which appears to be caused
by a race between kvmppc_switch_mmu_to_hpt and kvm_unmap_hva_range_hv.
The symptom is a NULL pointer dereference in __find_linux_pte() called
from kvm_unmap_radix() with kvm->arch.pgtable == NULL.
Looking at kvmppc_switch_mmu_to_hpt(), it does indeed clear
kvm->arch.pgtable (via kvmppc_free_radix()) before setting
kvm->arch.radix to NULL, and there is nothing to prevent
kvm_unmap_hva_range_hv() or the other MMU callback functions from
being called concurrently with kvmppc_switch_mmu_to_hpt() or
kvmppc_switch_mmu_to_radix().
This patch therefore adds calls to spin_lock/unlock on the kvm->mmu_lock
around the assignments to kvm->arch.radix, and makes sure that the
partition-scoped radix tree or HPT is only freed after changing
kvm->arch.radix.
This also takes the kvm->mmu_lock in kvmppc_rmap_reset() to make sure
that the clearing of each rmap array (one per memslot) doesn't happen
concurrently with use of the array in the kvm_unmap_hva_range_hv()
or the other MMU callbacks.
Fixes: 18c3640cefc7 ("KVM: PPC: Book3S HV: Add infrastructure for running HPT guests on radix host")
Cc: stable(a)vger.kernel.org # v4.15+
Signed-off-by: Paul Mackerras <paulus(a)ozlabs.org>
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index c615617e78ac..a18afda3d0f0 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -743,12 +743,15 @@ void kvmppc_rmap_reset(struct kvm *kvm)
srcu_idx = srcu_read_lock(&kvm->srcu);
slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, slots) {
+ /* Mutual exclusion with kvm_unmap_hva_range etc. */
+ spin_lock(&kvm->mmu_lock);
/*
* This assumes it is acceptable to lose reference and
* change bits across a reset.
*/
memset(memslot->arch.rmap, 0,
memslot->npages * sizeof(*memslot->arch.rmap));
+ spin_unlock(&kvm->mmu_lock);
}
srcu_read_unlock(&kvm->srcu, srcu_idx);
}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index a56f8413758a..ab43306c4ea1 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4532,12 +4532,15 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
{
if (nesting_enabled(kvm))
kvmhv_release_all_nested(kvm);
+ kvmppc_rmap_reset(kvm);
+ kvm->arch.process_table = 0;
+ /* Mutual exclusion with kvm_unmap_hva_range etc. */
+ spin_lock(&kvm->mmu_lock);
+ kvm->arch.radix = 0;
+ spin_unlock(&kvm->mmu_lock);
kvmppc_free_radix(kvm);
kvmppc_update_lpcr(kvm, LPCR_VPM1,
LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
- kvmppc_rmap_reset(kvm);
- kvm->arch.radix = 0;
- kvm->arch.process_table = 0;
return 0;
}
@@ -4549,12 +4552,14 @@ int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
err = kvmppc_init_vm_radix(kvm);
if (err)
return err;
-
+ kvmppc_rmap_reset(kvm);
+ /* Mutual exclusion with kvm_unmap_hva_range etc. */
+ spin_lock(&kvm->mmu_lock);
+ kvm->arch.radix = 1;
+ spin_unlock(&kvm->mmu_lock);
kvmppc_free_hpt(&kvm->arch.hpt);
kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR,
LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
- kvmppc_rmap_reset(kvm);
- kvm->arch.radix = 1;
return 0;
}
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 5b5e4d623ec8a34689df98e42d038a3b594d2ff9 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko(a)suse.com>
Date: Tue, 13 Nov 2018 19:49:10 +0100
Subject: [PATCH] x86/speculation/l1tf: Drop the swap storage limit restriction
when l1tf=off
Swap storage is restricted to max_swapfile_size (~16TB on x86_64) whenever
the system is deemed affected by L1TF vulnerability. Even though the limit
is quite high for most deployments it seems to be too restrictive for
deployments which are willing to live with the mitigation disabled.
We have a customer to deploy 8x 6,4TB PCIe/NVMe SSD swap devices which is
clearly out of the limit.
Drop the swap restriction when l1tf=off is specified. It also doesn't make
much sense to warn about too much memory for the l1tf mitigation when it is
forcefully disabled by the administrator.
[ tglx: Folded the documentation delta change ]
Fixes: 377eeaa8e11f ("x86/speculation/l1tf: Limit swap file size to MAX_PA/2")
Signed-off-by: Michal Hocko <mhocko(a)suse.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Pavel Tatashin <pasha.tatashin(a)soleen.com>
Reviewed-by: Andi Kleen <ak(a)linux.intel.com>
Acked-by: Jiri Kosina <jkosina(a)suse.cz>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Dave Hansen <dave.hansen(a)intel.com>
Cc: Andi Kleen <ak(a)linux.intel.com>
Cc: Borislav Petkov <bp(a)suse.de>
Cc: <linux-mm(a)kvack.org>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/20181113184910.26697-1-mhocko@kernel.org
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 05a252e5178d..835e422572eb 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2095,6 +2095,9 @@
off
Disables hypervisor mitigations and doesn't
emit any warnings.
+ It also drops the swap size and available
+ RAM limit restriction on both hypervisor and
+ bare metal.
Default is 'flush'.
diff --git a/Documentation/admin-guide/l1tf.rst b/Documentation/admin-guide/l1tf.rst
index b85dd80510b0..9af977384168 100644
--- a/Documentation/admin-guide/l1tf.rst
+++ b/Documentation/admin-guide/l1tf.rst
@@ -405,6 +405,9 @@ time with the option "l1tf=". The valid arguments for this option are:
off Disables hypervisor mitigations and doesn't emit any
warnings.
+ It also drops the swap size and available RAM limit restrictions
+ on both hypervisor and bare metal.
+
============ =============================================================
The default is 'flush'. For details about L1D flushing see :ref:`l1d_flush`.
@@ -576,7 +579,8 @@ Default mitigations
The kernel default mitigations for vulnerable processors are:
- PTE inversion to protect against malicious user space. This is done
- unconditionally and cannot be controlled.
+ unconditionally and cannot be controlled. The swap storage is limited
+ to ~16TB.
- L1D conditional flushing on VMENTER when EPT is enabled for
a guest.
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index a68b32cb845a..58689ac64440 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1002,7 +1002,8 @@ static void __init l1tf_select_mitigation(void)
#endif
half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
- if (e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) {
+ if (l1tf_mitigation != L1TF_MITIGATION_OFF &&
+ e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) {
pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n",
half_pa);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index ef99f3892e1f..427a955a2cf2 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -931,7 +931,7 @@ unsigned long max_swapfile_size(void)
pages = generic_max_swapfile_size();
- if (boot_cpu_has_bug(X86_BUG_L1TF)) {
+ if (boot_cpu_has_bug(X86_BUG_L1TF) && l1tf_mitigation != L1TF_MITIGATION_OFF) {
/* Limit the swap file size to MAX_PA/2 for L1TF workaround */
unsigned long long l1tf_limit = l1tf_pfn_limit();
/*
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 5b5e4d623ec8a34689df98e42d038a3b594d2ff9 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko(a)suse.com>
Date: Tue, 13 Nov 2018 19:49:10 +0100
Subject: [PATCH] x86/speculation/l1tf: Drop the swap storage limit restriction
when l1tf=off
Swap storage is restricted to max_swapfile_size (~16TB on x86_64) whenever
the system is deemed affected by L1TF vulnerability. Even though the limit
is quite high for most deployments it seems to be too restrictive for
deployments which are willing to live with the mitigation disabled.
We have a customer to deploy 8x 6,4TB PCIe/NVMe SSD swap devices which is
clearly out of the limit.
Drop the swap restriction when l1tf=off is specified. It also doesn't make
much sense to warn about too much memory for the l1tf mitigation when it is
forcefully disabled by the administrator.
[ tglx: Folded the documentation delta change ]
Fixes: 377eeaa8e11f ("x86/speculation/l1tf: Limit swap file size to MAX_PA/2")
Signed-off-by: Michal Hocko <mhocko(a)suse.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Pavel Tatashin <pasha.tatashin(a)soleen.com>
Reviewed-by: Andi Kleen <ak(a)linux.intel.com>
Acked-by: Jiri Kosina <jkosina(a)suse.cz>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Dave Hansen <dave.hansen(a)intel.com>
Cc: Andi Kleen <ak(a)linux.intel.com>
Cc: Borislav Petkov <bp(a)suse.de>
Cc: <linux-mm(a)kvack.org>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/20181113184910.26697-1-mhocko@kernel.org
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 05a252e5178d..835e422572eb 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2095,6 +2095,9 @@
off
Disables hypervisor mitigations and doesn't
emit any warnings.
+ It also drops the swap size and available
+ RAM limit restriction on both hypervisor and
+ bare metal.
Default is 'flush'.
diff --git a/Documentation/admin-guide/l1tf.rst b/Documentation/admin-guide/l1tf.rst
index b85dd80510b0..9af977384168 100644
--- a/Documentation/admin-guide/l1tf.rst
+++ b/Documentation/admin-guide/l1tf.rst
@@ -405,6 +405,9 @@ time with the option "l1tf=". The valid arguments for this option are:
off Disables hypervisor mitigations and doesn't emit any
warnings.
+ It also drops the swap size and available RAM limit restrictions
+ on both hypervisor and bare metal.
+
============ =============================================================
The default is 'flush'. For details about L1D flushing see :ref:`l1d_flush`.
@@ -576,7 +579,8 @@ Default mitigations
The kernel default mitigations for vulnerable processors are:
- PTE inversion to protect against malicious user space. This is done
- unconditionally and cannot be controlled.
+ unconditionally and cannot be controlled. The swap storage is limited
+ to ~16TB.
- L1D conditional flushing on VMENTER when EPT is enabled for
a guest.
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index a68b32cb845a..58689ac64440 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1002,7 +1002,8 @@ static void __init l1tf_select_mitigation(void)
#endif
half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
- if (e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) {
+ if (l1tf_mitigation != L1TF_MITIGATION_OFF &&
+ e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) {
pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n",
half_pa);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index ef99f3892e1f..427a955a2cf2 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -931,7 +931,7 @@ unsigned long max_swapfile_size(void)
pages = generic_max_swapfile_size();
- if (boot_cpu_has_bug(X86_BUG_L1TF)) {
+ if (boot_cpu_has_bug(X86_BUG_L1TF) && l1tf_mitigation != L1TF_MITIGATION_OFF) {
/* Limit the swap file size to MAX_PA/2 for L1TF workaround */
unsigned long long l1tf_limit = l1tf_pfn_limit();
/*