On Mon, Nov 7, 2022 at 6:54 AM Paolo Bonzini pbonzini@redhat.com wrote:
Restoration of the host IA32_SPEC_CTRL value is probably too late with respect to the return thunk training sequence.
With respect to the user/kernel boundary, AMD says, "If software chooses to toggle STIBP (e.g., set STIBP on kernel entry, and clear it on kernel exit), software should set STIBP to 1 before executing the return thunk training sequence." I assume the same requirements apply to the guest/host boundary. The return thunk training sequence is in vmenter.S, quite close to the VM-exit. On hosts without V_SPEC_CTRL, however, the host's IA32_SPEC_CTRL value is not restored until much later.
To avoid this, move the restoration of host SPEC_CTRL to assembly and, for consistency, move the restoration of the guest SPEC_CTRL as well. This is not particularly difficult, apart from some care to cover both 32- and 64-bit, and to share code between SEV-ES and normal vmentry.
Cc: stable@vger.kernel.org Fixes: a149180fbcf3 ("x86: Add magic AMD return-thunk") Suggested-by: Jim Mattson jmattson@google.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com
arch/x86/kernel/asm-offsets.c | 1 + arch/x86/kernel/cpu/bugs.c | 13 ++--- arch/x86/kvm/svm/svm.c | 38 ++++++--------- arch/x86/kvm/svm/svm.h | 4 +- arch/x86/kvm/svm/vmenter.S | 92 ++++++++++++++++++++++++++++++++++- 5 files changed, 111 insertions(+), 37 deletions(-)
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 69d1fed51086..d0bd68af0a5a 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -115,6 +115,7 @@ static void __used common(void) OFFSET(SVM_vcpu_arch_regs, vcpu_svm, vcpu.arch.regs); OFFSET(SVM_vmcb01, vcpu_svm, vmcb01); OFFSET(SVM_current_vmcb, vcpu_svm, current_vmcb);
OFFSET(SVM_spec_ctrl, vcpu_svm, spec_ctrl); OFFSET(KVM_VMCB_pa, kvm_vmcb_info, pa); }
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index da7c361f47e0..6ec0b7ce7453 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -196,22 +196,15 @@ void __init check_bugs(void) }
/*
- NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is
- done in vmenter.S.
- NOTE: This function is *only* called for SVM, since Intel uses
*/
- MSR_IA32_SPEC_CTRL for SSBD.
void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) {
u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current();
u64 guestval, hostval; struct thread_info *ti = current_thread_info();
if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
if (hostval != guestval) {
msrval = setguest ? guestval : hostval;
wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
}
}
/* * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported.
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 381c7dcffe25..31aa158a2e10 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -731,6 +731,15 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) u32 offset; u32 *msrpm;
/*
* For non-nested case:
* If the L01 MSR bitmap does not intercept the MSR, then we need to
* save it.
*
* For nested case:
* If the L02 MSR bitmap does not intercept the MSR, then we need to
* save it.
*/ msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm: to_svm(vcpu)->msrpm;
@@ -3912,18 +3921,19 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) return EXIT_FASTPATH_NONE; }
-static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) +static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_intercepted) { struct vcpu_svm *svm = to_svm(vcpu);
guest_state_enter_irqoff(); if (sev_es_guest(vcpu->kvm)) {
__svm_sev_es_vcpu_run(svm);
__svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted); } else { struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
__svm_vcpu_run(svm, __sme_page_pa(sd->save_area));
__svm_vcpu_run(svm, __sme_page_pa(sd->save_area),
spec_ctrl_intercepted); } guest_state_exit_irqoff();
@@ -3932,6 +3942,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu);
bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL); trace_kvm_entry(vcpu);
@@ -3990,26 +4001,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL)) x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
svm_vcpu_enter_exit(vcpu);
/*
* We do not use IBRS in the kernel. If this vCPU has used the
* SPEC_CTRL MSR it may have left it on; save the value and
* turn it off. This is much more efficient than blindly adding
* it to the atomic save/restore list. Especially as the former
* (Saving guest MSRs on vmexit) doesn't even exist in KVM.
*
* For non-nested case:
* If the L01 MSR bitmap does not intercept the MSR, then we need to
* save it.
*
* For nested case:
* If the L02 MSR bitmap does not intercept the MSR, then we need to
* save it.
*/
if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL) &&
unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
svm_vcpu_enter_exit(vcpu, spec_ctrl_intercepted); if (!sev_es_guest(vcpu->kvm)) reload_tss(vcpu);
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 99410651f2a5..9d940d8736f0 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -483,7 +483,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm);
/* vmenter.S */
-void __svm_sev_es_vcpu_run(struct vcpu_svm *svm); -void __svm_vcpu_run(struct vcpu_svm *svm, unsigned long hsave_pa); +void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted); +void __svm_vcpu_run(struct vcpu_svm *svm, unsigned long hsave_pa, bool spec_ctrl_intercepted);
#endif diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 45a4bd002494..9e381386ffdc 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -32,10 +32,64 @@
.section .noinstr.text, "ax"
+.macro RESTORE_GUEST_SPEC_CTRL
/* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */
ALTERNATIVE_2 "jmp 999f", \
"", X86_FEATURE_MSR_SPEC_CTRL, \
"jmp 999f", X86_FEATURE_V_SPEC_CTRL
/*
* SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the
* host's, write the MSR.
*
* IMPORTANT: To avoid RSB underflow attacks and any other nastiness,
* there must not be any returns or indirect branches between this code
* and vmentry.
*/
movl SVM_spec_ctrl(%_ASM_DI), %eax
cmp PER_CPU_VAR(x86_spec_ctrl_current), %eax
je 999f
mov $MSR_IA32_SPEC_CTRL, %ecx
xor %edx, %edx
wrmsr
+999:
+.endm
+.macro RESTORE_HOST_SPEC_CTRL
/* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */
ALTERNATIVE_2 "jmp 999f", \
"", X86_FEATURE_MSR_SPEC_CTRL, \
"jmp 999f", X86_FEATURE_V_SPEC_CTRL
mov $MSR_IA32_SPEC_CTRL, %ecx
/*
* Load the value that the guest had written into MSR_IA32_SPEC_CTRL,
* if it was not intercepted during guest execution.
*/
cmpb $0, (%_ASM_SP)
jnz 998f
rdmsr
movl %eax, SVM_spec_ctrl(%_ASM_DI)
+998:
/* Now restore the host value of the MSR if different from the guest's. */
movl PER_CPU_VAR(x86_spec_ctrl_current), %eax
cmp SVM_spec_ctrl(%_ASM_DI), %eax
je 999f
xor %edx, %edx
wrmsr
+999:
+.endm
It seems unfortunate to have the unconditional branches in the more common cases.