Three tiny patches fixing bugs and optimizing the IBRS code. These are all fairly obvious, but they escaped review. They should go in through the x86/pti tree and should apply to both 4.9 and 4.14 trees.
Thanks!
Paolo
Paolo Bonzini (3): KVM: x86: use native MSR ops for SPEC_CTRL KVM: nVMX: fix wrong condition for SPEC_CTRL and PRED_CMD MSRs KVM: VMX: mark RDMSR path as unlikely
arch/x86/kvm/svm.c | 9 +++++---- arch/x86/kvm/vmx.c | 15 ++++++++------- 2 files changed, 13 insertions(+), 11 deletions(-)
Having a paravirt indirect call in the IBRS restore path is not a good idea, since we are trying to protect from speculative execution of bogus indirect branch targets. It is also slower, so use native_wrmsrl on the vmentry path too.
Fixes: d28b387fb74da95d69d2615732f50cceb38e9a4d Cc: x86@kernel.org Cc: Radim Krčmář rkrcmar@redhat.com Cc: KarimAllah Ahmed karahmed@amazon.de Cc: David Woodhouse dwmw@amazon.co.uk Cc: Jim Mattson jmattson@google.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Ingo Molnar mingo@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini pbonzini@redhat.com --- arch/x86/kvm/svm.c | 7 ++++--- arch/x86/kvm/vmx.c | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b3e488a74828..1598beeda11c 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -49,6 +49,7 @@ #include <asm/debugreg.h> #include <asm/kvm_para.h> #include <asm/irq_remapping.h> +#include <asm/microcode.h> #include <asm/nospec-branch.h>
#include <asm/virtext.h> @@ -5355,7 +5356,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * being speculatively taken. */ if (svm->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
asm volatile ( "push %%" _ASM_BP "; \n\t" @@ -5465,10 +5466,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * save it. */ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) - rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
if (svm->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, 0); + native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
/* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 67b028d8e726..5caeb8dc5bda 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -51,6 +51,7 @@ #include <asm/apic.h> #include <asm/irq_remapping.h> #include <asm/mmu_context.h> +#include <asm/microcode.h> #include <asm/nospec-branch.h>
#include "trace.h" @@ -9453,7 +9454,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * being speculatively taken. */ if (vmx->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
vmx->__launched = vmx->loaded_vmcs->launched; asm( @@ -9589,10 +9590,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * save it. */ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) - rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
if (vmx->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, 0); + native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
/* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB();
On Wed, Feb 21, 2018 at 1:41 PM, Paolo Bonzini pbonzini@redhat.com wrote:
Having a paravirt indirect call in the IBRS restore path is not a good idea, since we are trying to protect from speculative execution of bogus indirect branch targets. It is also slower, so use native_wrmsrl on the vmentry path too.
Fixes: d28b387fb74da95d69d2615732f50cceb38e9a4d Cc: x86@kernel.org Cc: Radim Krčmář rkrcmar@redhat.com Cc: KarimAllah Ahmed karahmed@amazon.de Cc: David Woodhouse dwmw@amazon.co.uk Cc: Jim Mattson jmattson@google.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Ingo Molnar mingo@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini pbonzini@redhat.com
Seems like there ought to be a native_rdmsrl, but otherwise this looks fine.
Reviewed-by: Jim Mattson jmattson@google.com
On Wed, Feb 21, 2018 at 10:41:35PM +0100, Paolo Bonzini wrote:
Having a paravirt indirect call in the IBRS restore path is not a good idea, since we are trying to protect from speculative execution of bogus indirect branch targets. It is also slower, so use native_wrmsrl on the vmentry path too.
But it gets replaced during patching. As in once the machine boots the assembler changes from:
callq *0xfffflbah
to wrmsr
? I don't think you need this patch.
Fixes: d28b387fb74da95d69d2615732f50cceb38e9a4d Cc: x86@kernel.org Cc: Radim Krčmář rkrcmar@redhat.com Cc: KarimAllah Ahmed karahmed@amazon.de Cc: David Woodhouse dwmw@amazon.co.uk Cc: Jim Mattson jmattson@google.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Ingo Molnar mingo@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini pbonzini@redhat.com
arch/x86/kvm/svm.c | 7 ++++--- arch/x86/kvm/vmx.c | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b3e488a74828..1598beeda11c 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -49,6 +49,7 @@ #include <asm/debugreg.h> #include <asm/kvm_para.h> #include <asm/irq_remapping.h> +#include <asm/microcode.h> #include <asm/nospec-branch.h> #include <asm/virtext.h> @@ -5355,7 +5356,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * being speculatively taken. */ if (svm->spec_ctrl)
wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
asm volatile ( "push %%" _ASM_BP "; \n\t" @@ -5465,10 +5466,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * save it. */ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
if (svm->spec_ctrl)
wrmsrl(MSR_IA32_SPEC_CTRL, 0);
native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
/* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 67b028d8e726..5caeb8dc5bda 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -51,6 +51,7 @@ #include <asm/apic.h> #include <asm/irq_remapping.h> #include <asm/mmu_context.h> +#include <asm/microcode.h> #include <asm/nospec-branch.h> #include "trace.h" @@ -9453,7 +9454,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * being speculatively taken. */ if (vmx->spec_ctrl)
wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
vmx->__launched = vmx->loaded_vmcs->launched; asm( @@ -9589,10 +9590,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * save it. */ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
if (vmx->spec_ctrl)
wrmsrl(MSR_IA32_SPEC_CTRL, 0);
native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
/* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); -- 1.8.3.1
On 22/02/2018 18:07, Konrad Rzeszutek Wilk wrote:
Having a paravirt indirect call in the IBRS restore path is not a good idea, since we are trying to protect from speculative execution of bogus indirect branch targets. It is also slower, so use native_wrmsrl on the vmentry path too.
But it gets replaced during patching. As in once the machine boots the assembler changes from:
callq *0xfffflbah
to wrmsr
? I don't think you need this patch.
Why not be explicit? According to the spec, PRED_CMD and SPEC_CTRL should be passed down to the guest without interception so it's safe to do this. On the other hand, especially with nested virtualization, I don't think you can absolutely guarantee that the paravirt call will be patched to rdmsr/wrmsr.
Paolo
On Fri, Feb 23, 2018 at 10:37:49AM +0100, Paolo Bonzini wrote:
On 22/02/2018 18:07, Konrad Rzeszutek Wilk wrote:
Having a paravirt indirect call in the IBRS restore path is not a good idea, since we are trying to protect from speculative execution of bogus indirect branch targets. It is also slower, so use native_wrmsrl on the vmentry path too.
But it gets replaced during patching. As in once the machine boots the assembler changes from:
callq *0xfffflbah
to wrmsr
? I don't think you need this patch.
Why not be explicit? According to the spec, PRED_CMD and SPEC_CTRL
Explicit is fine.
But I would recommend you change the commit message to say so, and perhaps remove 'It is also slower' - as that is incorrect.
should be passed down to the guest without interception so it's safe to do this. On the other hand, especially with nested virtualization, I don't think you can absolutely guarantee that the paravirt call will be patched to rdmsr/wrmsr.
<scratches his head> If it is detected to be Xen PV, then yes it will be a call to a function. But that won't help as Xen PV runs in ring 3, so it has a whole bunch of other issues.
If it detects it as KVM or Xen HVM guest it will patch it with the default - which is normal MSRs. Ditto for HyperV.
But <shrugs> no biggie - explicit is fine, just nagging on the commit message could use a bit of expansion.
Paolo
On 23/02/2018 18:22, Konrad Rzeszutek Wilk wrote:
On Fri, Feb 23, 2018 at 10:37:49AM +0100, Paolo Bonzini wrote:
On 22/02/2018 18:07, Konrad Rzeszutek Wilk wrote:
Having a paravirt indirect call in the IBRS restore path is not a good idea, since we are trying to protect from speculative execution of bogus indirect branch targets. It is also slower, so use native_wrmsrl on the vmentry path too.
But it gets replaced during patching. As in once the machine boots the assembler changes from:
callq *0xfffflbah
to wrmsr
? I don't think you need this patch.
Why not be explicit? According to the spec, PRED_CMD and SPEC_CTRL
Explicit is fine.
But I would recommend you change the commit message to say so, and perhaps remove 'It is also slower' - as that is incorrect.
Actually it is faster---that's why I made the change in the first place, though later I noticed
If it is detected to be Xen PV, then yes it will be a call to a function. But that won't help as Xen PV runs in ring 3, so it has a whole bunch of other issues.
Ok, I wasn't sure about PVH (which runs in ring 0 afair).
Paolo
On Fri, Feb 23, 2018 at 06:35:30PM +0100, Paolo Bonzini wrote:
On 23/02/2018 18:22, Konrad Rzeszutek Wilk wrote:
On Fri, Feb 23, 2018 at 10:37:49AM +0100, Paolo Bonzini wrote:
On 22/02/2018 18:07, Konrad Rzeszutek Wilk wrote:
Having a paravirt indirect call in the IBRS restore path is not a good idea, since we are trying to protect from speculative execution of bogus indirect branch targets. It is also slower, so use native_wrmsrl on the vmentry path too.
But it gets replaced during patching. As in once the machine boots the assembler changes from:
callq *0xfffflbah
to wrmsr
? I don't think you need this patch.
Why not be explicit? According to the spec, PRED_CMD and SPEC_CTRL
Explicit is fine.
But I would recommend you change the commit message to say so, and perhaps remove 'It is also slower' - as that is incorrect.
Actually it is faster---that's why I made the change in the first place, though later I noticed
If it is detected to be Xen PV, then yes it will be a call to a function. But that won't help as Xen PV runs in ring 3, so it has a whole bunch of other issues.
Ok, I wasn't sure about PVH (which runs in ring 0 afair).
Right. PVH is HVM without any emulated devices or BIOSes or such.
In the context of the paravirt ops, Xen PVH == Xen HVM.
Xen PV (and lguests) are the only ones that patch the the callq *0xffffblah
to callq 0xffff800
While everyone else does the wrmsr.
Paolo
We need to change the default all-1s bitmap if the MSRs are _not_ intercepted. However, the code was disabling the intercept when it was _enabled_ in the VMCS01. This is not causing bigger trouble, because vmx_vcpu_run checks the VMCS02's MSR bitmap and would do the right thing even if fed garbage... but it's obviously a bug and it can cause extra MSR reads and writes when running nested guests.
Fixes: d28b387fb74da95d69d2615732f50cceb38e9a4d Fixes: 15d45071523d89b3fb7372e2135fbd72f6af9506 Cc: x86@kernel.org Cc: Radim Krčmář rkrcmar@redhat.com Cc: KarimAllah Ahmed karahmed@amazon.de Cc: David Woodhouse dwmw@amazon.co.uk Cc: Jim Mattson jmattson@google.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Ingo Molnar mingo@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini pbonzini@redhat.com --- arch/x86/kvm/vmx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5caeb8dc5bda..af89d377681d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10235,7 +10235,7 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, return false;
if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && - !pred_cmd && !spec_ctrl) + pred_cmd && spec_ctrl) return false;
page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap); @@ -10278,13 +10278,13 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, MSR_TYPE_W); }
- if (spec_ctrl) + if (!spec_ctrl) nested_vmx_disable_intercept_for_msr( msr_bitmap_l1, msr_bitmap_l0, MSR_IA32_SPEC_CTRL, MSR_TYPE_R | MSR_TYPE_W);
- if (pred_cmd) + if (!pred_cmd) nested_vmx_disable_intercept_for_msr( msr_bitmap_l1, msr_bitmap_l0, MSR_IA32_PRED_CMD,
On Wed, Feb 21, 2018 at 1:41 PM, Paolo Bonzini pbonzini@redhat.com wrote:
We need to change the default all-1s bitmap if the MSRs are _not_ intercepted. However, the code was disabling the intercept when it was _enabled_ in the VMCS01. This is not causing bigger trouble, because vmx_vcpu_run checks the VMCS02's MSR bitmap and would do the right thing even if fed garbage... but it's obviously a bug and it can cause extra MSR reads and writes when running nested guests.
Fixes: d28b387fb74da95d69d2615732f50cceb38e9a4d Fixes: 15d45071523d89b3fb7372e2135fbd72f6af9506 Cc: x86@kernel.org Cc: Radim Krčmář rkrcmar@redhat.com Cc: KarimAllah Ahmed karahmed@amazon.de Cc: David Woodhouse dwmw@amazon.co.uk Cc: Jim Mattson jmattson@google.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Ingo Molnar mingo@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini pbonzini@redhat.com
Wasn't this already fixed by 206587a9fb76 ("X86/nVMX: Properly set spec_ctrl and pred_cmd before merging MSRs")?
On 22/02/2018 01:07, Jim Mattson wrote:
On Wed, Feb 21, 2018 at 1:41 PM, Paolo Bonzini pbonzini@redhat.com wrote:
We need to change the default all-1s bitmap if the MSRs are _not_ intercepted. However, the code was disabling the intercept when it was _enabled_ in the VMCS01. This is not causing bigger trouble, because vmx_vcpu_run checks the VMCS02's MSR bitmap and would do the right thing even if fed garbage... but it's obviously a bug and it can cause extra MSR reads and writes when running nested guests.
Fixes: d28b387fb74da95d69d2615732f50cceb38e9a4d Fixes: 15d45071523d89b3fb7372e2135fbd72f6af9506 Cc: x86@kernel.org Cc: Radim Krčmář rkrcmar@redhat.com Cc: KarimAllah Ahmed karahmed@amazon.de Cc: David Woodhouse dwmw@amazon.co.uk Cc: Jim Mattson jmattson@google.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Ingo Molnar mingo@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini pbonzini@redhat.com
Wasn't this already fixed by 206587a9fb76 ("X86/nVMX: Properly set spec_ctrl and pred_cmd before merging MSRs")?
Ouch, yes, and my patch would have no conflicts at all so it would reintroduce the bug! Will resend v2 without it.
Paolo
vmx_vcpu_run and svm_vcpu_run are large functions, and this can actually make a substantial cycle difference by keeping the fast path contiguous in memory. Without it, the retpoline guest/retpoline host case is about 50 cycles slower.
Cc: x86@kernel.org Cc: Radim Krčmář rkrcmar@redhat.com Cc: KarimAllah Ahmed karahmed@amazon.de Cc: David Woodhouse dwmw@amazon.co.uk Cc: Jim Mattson jmattson@google.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Ingo Molnar mingo@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini pbonzini@redhat.com --- arch/x86/kvm/svm.c | 2 +- arch/x86/kvm/vmx.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1598beeda11c..24c9521ebc24 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -5465,7 +5465,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * If the L02 MSR bitmap does not intercept the MSR, then we need to * save it. */ - if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) + if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
if (svm->spec_ctrl) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index af89d377681d..e13fd2a833c4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9589,7 +9589,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * If the L02 MSR bitmap does not intercept the MSR, then we need to * save it. */ - if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) + if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
if (vmx->spec_ctrl)
On Wed, Feb 21, 2018 at 1:41 PM, Paolo Bonzini pbonzini@redhat.com wrote:
vmx_vcpu_run and svm_vcpu_run are large functions, and this can actually make a substantial cycle difference by keeping the fast path contiguous in memory. Without it, the retpoline guest/retpoline host case is about 50 cycles slower.
Cc: x86@kernel.org Cc: Radim Krčmář rkrcmar@redhat.com Cc: KarimAllah Ahmed karahmed@amazon.de Cc: David Woodhouse dwmw@amazon.co.uk Cc: Jim Mattson jmattson@google.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Ingo Molnar mingo@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini pbonzini@redhat.com
Reviewed-by: Jim Mattson jmattson@google.com
linux-stable-mirror@lists.linaro.org