Restoration of the host IA32_SPEC_CTRL value is probably too late with respect to the return thunk training sequence.
With respect to the user/kernel boundary, AMD says, "If software chooses to toggle STIBP (e.g., set STIBP on kernel entry, and clear it on kernel exit), software should set STIBP to 1 before executing the return thunk training sequence." I assume the same requirements apply to the guest/host boundary. The return thunk training sequence is in vmenter.S, quite close to the VM-exit. On hosts without V_SPEC_CTRL, however, the host's IA32_SPEC_CTRL value is not restored until much later.
To avoid this, move the restoration of host SPEC_CTRL to assembly and, for consistency, move the restoration of the guest SPEC_CTRL as well. This is not particularly difficult, apart from some care to cover both 32- and 64-bit, and to share code between SEV-ES and normal vmentry.
Cc: stable@vger.kernel.org Fixes: a149180fbcf3 ("x86: Add magic AMD return-thunk") Suggested-by: Jim Mattson jmattson@google.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com --- arch/x86/kernel/cpu/bugs.c | 13 +---- arch/x86/kvm/kvm-asm-offsets.c | 1 + arch/x86/kvm/svm/svm.c | 38 +++++------- arch/x86/kvm/svm/svm.h | 4 +- arch/x86/kvm/svm/vmenter.S | 102 ++++++++++++++++++++++++++++++++- 5 files changed, 121 insertions(+), 37 deletions(-)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index da7c361f47e0..6ec0b7ce7453 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -196,22 +196,15 @@ void __init check_bugs(void) }
/* - * NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is - * done in vmenter.S. + * NOTE: This function is *only* called for SVM, since Intel uses + * MSR_IA32_SPEC_CTRL for SSBD. */ void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { - u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current(); + u64 guestval, hostval; struct thread_info *ti = current_thread_info();
- if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { - if (hostval != guestval) { - msrval = setguest ? guestval : hostval; - wrmsrl(MSR_IA32_SPEC_CTRL, msrval); - } - } - /* * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported. diff --git a/arch/x86/kvm/kvm-asm-offsets.c b/arch/x86/kvm/kvm-asm-offsets.c index f83e88b85bf2..b2877c2c8df1 100644 --- a/arch/x86/kvm/kvm-asm-offsets.c +++ b/arch/x86/kvm/kvm-asm-offsets.c @@ -16,6 +16,7 @@ static void __used common(void) BLANK(); OFFSET(SVM_vcpu_arch_regs, vcpu_svm, vcpu.arch.regs); OFFSET(SVM_current_vmcb, vcpu_svm, current_vmcb); + OFFSET(SVM_spec_ctrl, vcpu_svm, spec_ctrl); OFFSET(SVM_vmcb01, vcpu_svm, vmcb01); OFFSET(KVM_VMCB_pa, kvm_vmcb_info, pa); } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index e15f6ea9e5cc..512bc06a4ba1 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -730,6 +730,15 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) u32 offset; u32 *msrpm;
+ /* + * For non-nested case: + * If the L01 MSR bitmap does not intercept the MSR, then we need to + * save it. + * + * For nested case: + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm: to_svm(vcpu)->msrpm;
@@ -3911,18 +3920,19 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) return EXIT_FASTPATH_NONE; }
-static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) +static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_intercepted) { struct vcpu_svm *svm = to_svm(vcpu);
guest_state_enter_irqoff();
if (sev_es_guest(vcpu->kvm)) { - __svm_sev_es_vcpu_run(svm); + __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted); } else { struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
- __svm_vcpu_run(svm, __sme_page_pa(sd->save_area)); + __svm_vcpu_run(svm, __sme_page_pa(sd->save_area), + spec_ctrl_intercepted); }
guest_state_exit_irqoff(); @@ -3931,6 +3941,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL);
trace_kvm_entry(vcpu);
@@ -3989,26 +4000,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL)) x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
- svm_vcpu_enter_exit(vcpu); - - /* - * We do not use IBRS in the kernel. If this vCPU has used the - * SPEC_CTRL MSR it may have left it on; save the value and - * turn it off. This is much more efficient than blindly adding - * it to the atomic save/restore list. Especially as the former - * (Saving guest MSRs on vmexit) doesn't even exist in KVM. - * - * For non-nested case: - * If the L01 MSR bitmap does not intercept the MSR, then we need to - * save it. - * - * For nested case: - * If the L02 MSR bitmap does not intercept the MSR, then we need to - * save it. - */ - if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL) && - unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) - svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); + svm_vcpu_enter_exit(vcpu, spec_ctrl_intercepted);
if (!sev_es_guest(vcpu->kvm)) reload_tss(vcpu); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 932f26be5675..bf9ff39dc420 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -683,7 +683,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm);
/* vmenter.S */
-void __svm_sev_es_vcpu_run(struct vcpu_svm *svm); -void __svm_vcpu_run(struct vcpu_svm *svm, unsigned long hsave_pa); +void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted); +void __svm_vcpu_run(struct vcpu_svm *svm, unsigned long hsave_pa, bool spec_ctrl_intercepted);
#endif diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 0a4272faf80f..a02eef724379 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -32,10 +32,70 @@
.section .noinstr.text, "ax"
+.macro RESTORE_GUEST_SPEC_CTRL + /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */ + ALTERNATIVE_2 "", \ + "jmp 800f", X86_FEATURE_MSR_SPEC_CTRL, \ + "", X86_FEATURE_V_SPEC_CTRL +801: +.endm + +.macro RESTORE_HOST_SPEC_CTRL + /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */ + ALTERNATIVE_2 "", \ + "jmp 900f", X86_FEATURE_MSR_SPEC_CTRL, \ + "", X86_FEATURE_V_SPEC_CTRL +901: +.endm + +.macro RESTORE_SPEC_CTRL_BODY +800: + /* + * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the + * host's, write the MSR. This is kept out-of-line so that the common + * case does not have to jump. + * + * IMPORTANT: To avoid RSB underflow attacks and any other nastiness, + * there must not be any returns or indirect branches between this code + * and vmentry. + */ + movl SVM_spec_ctrl(%_ASM_DI), %eax + cmp PER_CPU_VAR(x86_spec_ctrl_current), %eax + je 801b + mov $MSR_IA32_SPEC_CTRL, %ecx + xor %edx, %edx + wrmsr + jmp 801b + +900: + /* Same for after vmexit. */ + mov $MSR_IA32_SPEC_CTRL, %ecx + + /* + * Load the value that the guest had written into MSR_IA32_SPEC_CTRL, + * if it was not intercepted during guest execution. + */ + cmpb $0, (%_ASM_SP) + jnz 998f + rdmsr + movl %eax, SVM_spec_ctrl(%_ASM_DI) +998: + + /* Now restore the host value of the MSR if different from the guest's. */ + movl PER_CPU_VAR(x86_spec_ctrl_current), %eax + cmp SVM_spec_ctrl(%_ASM_DI), %eax + je 901b + xor %edx, %edx + wrmsr + jmp 901b +.endm + + /** * __svm_vcpu_run - Run a vCPU via a transition to SVM guest mode * @svm: struct vcpu_svm * * @hsave_pa: unsigned long + * @spec_ctrl_intercepted: bool */ SYM_FUNC_START(__svm_vcpu_run) push %_ASM_BP @@ -50,7 +110,12 @@ SYM_FUNC_START(__svm_vcpu_run) #endif push %_ASM_BX
- /* @hsave_pa is needed last after vmexit, save it first. */ + /* + * Both @spec_ctrl_intercepted and @hsave_pa are used only after vmexit. + * @spec_ctrl_intercepted is needed later and accessed directly from + * the stack in RESTORE_HOST_SPEC_CTRL, so save it first. + */ + push %_ASM_ARG3 push %_ASM_ARG2
/* Save @svm. */ @@ -61,6 +126,8 @@ SYM_FUNC_START(__svm_vcpu_run) mov %_ASM_ARG1, %_ASM_DI .endif
+ RESTORE_GUEST_SPEC_CTRL + /* * Use a single vmcb (vmcb01 because it's always valid) for * context switching guest state via VMLOAD/VMSAVE, that way @@ -138,6 +205,8 @@ SYM_FUNC_START(__svm_vcpu_run) FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE #endif
+ RESTORE_HOST_SPEC_CTRL + /* * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be * untrained as soon as we exit the VM and are back to the @@ -173,6 +242,9 @@ SYM_FUNC_START(__svm_vcpu_run) xor %r15d, %r15d #endif
+ /* "Pop" @spec_ctrl_intercepted. */ + pop %_ASM_BX + pop %_ASM_BX
#ifdef CONFIG_X86_64 @@ -187,6 +259,8 @@ SYM_FUNC_START(__svm_vcpu_run) pop %_ASM_BP RET
+ RESTORE_SPEC_CTRL_BODY + 10: cmpb $0, kvm_rebooting jne 2b ud2 @@ -210,6 +284,7 @@ SYM_FUNC_END(__svm_vcpu_run) /** * __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode * @svm: struct vcpu_svm * + * @spec_ctrl_intercepted: bool */ SYM_FUNC_START(__svm_sev_es_vcpu_run) push %_ASM_BP @@ -224,8 +299,21 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) #endif push %_ASM_BX
+ /* Save @spec_ctrl_intercepted for RESTORE_HOST_SPEC_CTRL. */ + push %_ASM_ARG2 + + /* Save @svm. */ + push %_ASM_ARG1 + +.ifnc _ASM_ARG1, _ASM_DI + /* Move @svm to RDI for RESTORE_GUEST_SPEC_CTRL. */ + mov %_ASM_ARG1, %_ASM_DI +.endif + + RESTORE_GUEST_SPEC_CTRL + /* Get svm->current_vmcb->pa into RAX. */ - mov SVM_current_vmcb(%_ASM_ARG1), %_ASM_AX + mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX
/* Enter guest mode */ @@ -235,11 +323,16 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
2: cli
+ /* Pop @svm to RDI, guest registers have been saved already. */ + pop %_ASM_DI + #ifdef CONFIG_RETPOLINE /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE #endif
+ RESTORE_HOST_SPEC_CTRL + /* * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be * untrained as soon as we exit the VM and are back to the @@ -249,6 +342,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) */ UNTRAIN_RET
+ /* "Pop" @spec_ctrl_intercepted. */ + pop %_ASM_BX + pop %_ASM_BX
#ifdef CONFIG_X86_64 @@ -263,6 +359,8 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) pop %_ASM_BP RET
+ RESTORE_SPEC_CTRL_BODY + 3: cmpb $0, kvm_rebooting jne 2b ud2
On Tue, Nov 08, 2022, Paolo Bonzini wrote:
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 0a4272faf80f..a02eef724379 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -32,10 +32,70 @@ .section .noinstr.text, "ax" +.macro RESTORE_GUEST_SPEC_CTRL
- /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */
- ALTERNATIVE_2 "", \
"jmp 800f", X86_FEATURE_MSR_SPEC_CTRL, \
"", X86_FEATURE_V_SPEC_CTRL
+801: +.endm
+.macro RESTORE_HOST_SPEC_CTRL
- /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */
- ALTERNATIVE_2 "", \
"jmp 900f", X86_FEATURE_MSR_SPEC_CTRL, \
"", X86_FEATURE_V_SPEC_CTRL
+901: +.endm
+.macro RESTORE_SPEC_CTRL_BODY
Can we split these into separate macros? It's a bit more typing, but it's not immediately obvious that these are two independent chunks (I was expecting a JMP from the 800 section into the 900 section).
E.g. RESTORE_GUEST_SPEC_CTRL_BODY and RESTORE_HOST_SPEC_CTRL_BODY
+800:
Ugh, the multiple users makes it somewhat ugly, but rather than arbitrary numbers, what about using named labels to make it easier to understand the branches?
E.g.
--- arch/x86/kvm/svm/vmenter.S | 43 +++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 19 deletions(-)
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index a02eef724379..23fd7353f0d0 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -32,24 +32,24 @@
.section .noinstr.text, "ax"
-.macro RESTORE_GUEST_SPEC_CTRL +.macro RESTORE_GUEST_SPEC_CTRL name /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */ ALTERNATIVE_2 "", \ - "jmp 800f", X86_FEATURE_MSR_SPEC_CTRL, \ + "jmp .Lrestore_guest_spec_ctrl\name", X86_FEATURE_MSR_SPEC_CTRL, \ "", X86_FEATURE_V_SPEC_CTRL -801: +.Lpost_restore_guest_spec_ctrl\name: .endm
-.macro RESTORE_HOST_SPEC_CTRL +.macro RESTORE_HOST_SPEC_CTRL name /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */ ALTERNATIVE_2 "", \ - "jmp 900f", X86_FEATURE_MSR_SPEC_CTRL, \ + "jmp .Lrestore_host_spec_ctrl\name", X86_FEATURE_MSR_SPEC_CTRL, \ "", X86_FEATURE_V_SPEC_CTRL -901: +.Lpost_restore_host_spec_ctrl\name: .endm
-.macro RESTORE_SPEC_CTRL_BODY -800: +.macro RESTORE_GUEST_SPEC_CTRL_BODY name +.Lrestore_guest_spec_ctrl\name: /* * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the * host's, write the MSR. This is kept out-of-line so that the common @@ -61,13 +61,16 @@ */ movl SVM_spec_ctrl(%_ASM_DI), %eax cmp PER_CPU_VAR(x86_spec_ctrl_current), %eax - je 801b + je .Lpost_restore_guest_spec_ctrl\name + mov $MSR_IA32_SPEC_CTRL, %ecx xor %edx, %edx wrmsr - jmp 801b + jmp .Lpost_restore_guest_spec_ctrl\name +.endm
-900: +.macro RESTORE_HOST_SPEC_CTRL_BODY name +.Lrestore_host_spec_ctrl\name: /* Same for after vmexit. */ mov $MSR_IA32_SPEC_CTRL, %ecx
@@ -76,18 +79,18 @@ * if it was not intercepted during guest execution. */ cmpb $0, (%_ASM_SP) - jnz 998f + jnz .Lskip_save_guest_spec_ctrl\name rdmsr movl %eax, SVM_spec_ctrl(%_ASM_DI) -998:
+.Lskip_save_guest_spec_ctrl\name: /* Now restore the host value of the MSR if different from the guest's. */ movl PER_CPU_VAR(x86_spec_ctrl_current), %eax cmp SVM_spec_ctrl(%_ASM_DI), %eax - je 901b + je .Lpost_restore_host_spec_ctrl\name xor %edx, %edx wrmsr - jmp 901b + jmp .Lpost_restore_host_spec_ctrl\name .endm
@@ -259,7 +262,8 @@ SYM_FUNC_START(__svm_vcpu_run) pop %_ASM_BP RET
- RESTORE_SPEC_CTRL_BODY + RESTORE_GUEST_SPEC_CTRL_BODY + RESTORE_HOST_SPEC_CTRL_BODY
10: cmpb $0, kvm_rebooting jne 2b @@ -310,7 +314,7 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) mov %_ASM_ARG1, %_ASM_DI .endif
- RESTORE_GUEST_SPEC_CTRL + RESTORE_GUEST_SPEC_CTRL sev_es
/* Get svm->current_vmcb->pa into RAX. */ mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX @@ -331,7 +335,7 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE #endif
- RESTORE_HOST_SPEC_CTRL + RESTORE_HOST_SPEC_CTRL sev_es
/* * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be @@ -359,7 +363,8 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) pop %_ASM_BP RET
- RESTORE_SPEC_CTRL_BODY + RESTORE_GUEST_SPEC_CTRL_BODY sev_es + RESTORE_HOST_SPEC_CTRL_BODY sev_es
3: cmpb $0, kvm_rebooting jne 2b
base-commit: 0b242ada175d97a556866c48c80310860f634579
On 11/9/22 02:14, Sean Christopherson wrote:
+.macro RESTORE_SPEC_CTRL_BODY
Can we split these into separate macros? It's a bit more typing, but it's not immediately obvious that these are two independent chunks (I was expecting a JMP from the 800 section into the 900 section).
E.g. RESTORE_GUEST_SPEC_CTRL_BODY and RESTORE_HOST_SPEC_CTRL_BODY
Sure, I had it like that in an earlier version. I didn't see much benefit but it is indeed a bit more readable if you order the macros like
.macro RESTORE_GUEST_SPEC_CTRL .macro RESTORE_GUEST_SPEC_CTRL_BODY .macro RESTORE_HOST_SPEC_CTRL .macro RESTORE_HOST_SPEC_CTRL_BODY
+800:
Ugh, the multiple users makes it somewhat ugly, but rather than arbitrary numbers, what about using named labels to make it easier to understand the branches?
I think it's okay if we separate the macros.
Paolo
linux-stable-mirror@lists.linaro.org