The Hyper-V "EnlightenedNptTlb" enlightenment is always enabled when KVM is running on top of Hyper-V and Hyper-V exposes support for it (which is always). On AMD CPUs this enlightenment results in ASID invalidations not flushing TLB entries derived from the NPT. To force the underlying (L0) hypervisor to rebuild its shadow page tables, an explicit hypercall is needed.
The original KVM implementation of Hyper-V's "EnlightenedNptTlb" on SVM only added remote TLB flush hooks. This worked out fine for a while, as sufficient remote TLB flushes where being issued in KVM to mask the problem. Since v5.17, changes in the TDP code reduced the number of flushes and the out-of-sync TLB prevents guests from booting successfully.
Split svm_flush_tlb_current() into separate callbacks for the 3 cases (guest/all/current), and issue the required Hyper-V hypercall when a Hyper-V TLB flush is needed. The most important case where the TLB flush was missing is when loading a new PGD, which is followed by what is now svm_flush_tlb_current(). Since the hypercall acts on all CPUs, cache the last flushed root in kvm_arch->hv_root_tdp. This prevents the shadow NPTs from being unnecessarily rebuilt for multiple vcpus and when the same root is flushed multiple times in a row on a single vcpu.
Cc: stable@vger.kernel.org # v5.17+ Fixes: 1e0c7d40758b ("KVM: SVM: hyper-v: Remote TLB flush for SVM") Link: https://lore.kernel.org/lkml/43980946-7bbf-dcef-7e40-af904c456250@linux.micr... Suggested-by: Sean Christopherson seanjc@google.com Signed-off-by: Jeremi Piotrowski jpiotrowski@linux.microsoft.com --- arch/x86/kvm/kvm_onhyperv.c | 23 +++++++++++++++++++++++ arch/x86/kvm/kvm_onhyperv.h | 5 +++++ arch/x86/kvm/svm/svm.c | 18 +++++++++++++++--- 3 files changed, 43 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/kvm_onhyperv.c b/arch/x86/kvm/kvm_onhyperv.c index 482d6639ef88..036e04c0a161 100644 --- a/arch/x86/kvm/kvm_onhyperv.c +++ b/arch/x86/kvm/kvm_onhyperv.c @@ -94,6 +94,29 @@ int hv_remote_flush_tlb(struct kvm *kvm) } EXPORT_SYMBOL_GPL(hv_remote_flush_tlb);
+void hv_flush_tlb_current(struct kvm_vcpu *vcpu) +{ + struct kvm_arch *kvm_arch = &vcpu->kvm->arch; + hpa_t root_tdp = vcpu->arch.mmu->root.hpa; + + if (kvm_x86_ops.tlb_remote_flush == hv_remote_flush_tlb && VALID_PAGE(root_tdp)) { + spin_lock(&kvm_arch->hv_root_tdp_lock); + if (kvm_arch->hv_root_tdp != root_tdp) { + hyperv_flush_guest_mapping(root_tdp); + kvm_arch->hv_root_tdp = root_tdp; + } + spin_unlock(&kvm_arch->hv_root_tdp_lock); + } +} +EXPORT_SYMBOL_GPL(hv_flush_tlb_current); + +void hv_flush_tlb_all(struct kvm_vcpu *vcpu) +{ + if (WARN_ON_ONCE(kvm_x86_ops.tlb_remote_flush == hv_remote_flush_tlb)) + hv_remote_flush_tlb(vcpu->kvm); +} +EXPORT_SYMBOL_GPL(hv_flush_tlb_all); + void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp) { struct kvm_arch *kvm_arch = &vcpu->kvm->arch; diff --git a/arch/x86/kvm/kvm_onhyperv.h b/arch/x86/kvm/kvm_onhyperv.h index 287e98ef9df3..f24d0ca41d2b 100644 --- a/arch/x86/kvm/kvm_onhyperv.h +++ b/arch/x86/kvm/kvm_onhyperv.h @@ -10,11 +10,16 @@ int hv_remote_flush_tlb_with_range(struct kvm *kvm, struct kvm_tlb_range *range); int hv_remote_flush_tlb(struct kvm *kvm); +void hv_flush_tlb_current(struct kvm_vcpu *vcpu); +void hv_flush_tlb_all(struct kvm_vcpu *vcpu); void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp); #else /* !CONFIG_HYPERV */ static inline void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp) { } + +static inline void hv_flush_tlb_current(struct kvm_vcpu *vcpu) { } +static inline void hv_flush_tlb_all(struct kvm_vcpu *vcpu) { } #endif /* !CONFIG_HYPERV */
#endif diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 252e7f37e4e2..8da6740ef595 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3729,7 +3729,7 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu) svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); }
-static void svm_flush_tlb_current(struct kvm_vcpu *vcpu) +static void svm_flush_tlb_asid(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu);
@@ -3753,6 +3753,18 @@ static void svm_flush_tlb_current(struct kvm_vcpu *vcpu) svm->current_vmcb->asid_generation--; }
+static void svm_flush_tlb_current(struct kvm_vcpu *vcpu) +{ + hv_flush_tlb_current(vcpu); + svm_flush_tlb_asid(vcpu); +} + +static void svm_flush_tlb_all(struct kvm_vcpu *vcpu) +{ + hv_flush_tlb_all(vcpu); + svm_flush_tlb_asid(vcpu); +} + static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva) { struct vcpu_svm *svm = to_svm(vcpu); @@ -4745,10 +4757,10 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .set_rflags = svm_set_rflags, .get_if_flag = svm_get_if_flag,
- .flush_tlb_all = svm_flush_tlb_current, + .flush_tlb_all = svm_flush_tlb_all, .flush_tlb_current = svm_flush_tlb_current, .flush_tlb_gva = svm_flush_tlb_gva, - .flush_tlb_guest = svm_flush_tlb_current, + .flush_tlb_guest = svm_flush_tlb_asid,
.vcpu_pre_run = svm_vcpu_pre_run, .vcpu_run = svm_vcpu_run,