August 2025 - Linux-stable-mirror

FAILED: patch "[PATCH] KVM: VMX: Preserve host's DEBUGCTLMSR_FREEZE_IN_SMM while" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x 6b1dd26544d045f6a79e8c73572c0c0db3ef3c1a # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081236-platonic-plant-3b8f@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 6b1dd26544d045f6a79e8c73572c0c0db3ef3c1a Mon Sep 17 00:00:00 2001 From: Maxim Levitsky <mlevitsk(a)redhat.com> Date: Tue, 10 Jun 2025 16:20:10 -0700 Subject: [PATCH] KVM: VMX: Preserve host's DEBUGCTLMSR_FREEZE_IN_SMM while running the guest Set/clear DEBUGCTLMSR_FREEZE_IN_SMM in GUEST_IA32_DEBUGCTL based on the host's pre-VM-Enter value, i.e. preserve the host's FREEZE_IN_SMM setting while running the guest. When running with the "default treatment of SMIs" in effect (the only mode KVM supports), SMIs do not generate a VM-Exit that is visible to host (non-SMM) software, and instead transitions directly from VMX non-root to SMM. And critically, DEBUGCTL isn't context switched by hardware on SMI or RSM, i.e. SMM will run with whatever value was resident in hardware at the time of the SMI. Failure to preserve FREEZE_IN_SMM results in the PMU unexpectedly counting events while the CPU is executing in SMM, which can pollute profiling and potentially leak information into the guest. Check for changes in FREEZE_IN_SMM prior to every entry into KVM's inner run loop, as the bit can be toggled in IRQ context via IPI callback (SMP function call), by way of /sys/devices/cpu/freeze_on_smi. Add a field in kvm_x86_ops to communicate which DEBUGCTL bits need to be preserved, as FREEZE_IN_SMM is only supported and defined for Intel CPUs, i.e. explicitly checking FREEZE_IN_SMM in common x86 is at best weird, and at worst could lead to undesirable behavior in the future if AMD CPUs ever happened to pick up a collision with the bit. Exempt TDX vCPUs, i.e. protected guests, from the check, as the TDX Module owns and controls GUEST_IA32_DEBUGCTL. WARN in SVM if KVM_RUN_LOAD_DEBUGCTL is set, mostly to document that the lack of handling isn't a KVM bug (TDX already WARNs on any run_flag). Lastly, explicitly reload GUEST_IA32_DEBUGCTL on a VM-Fail that is missed by KVM but detected by hardware, i.e. in nested_vmx_restore_host_state(). Doing so avoids the need to track host_debugctl on a per-VMCS basis, as GUEST_IA32_DEBUGCTL is unconditionally written by prepare_vmcs02() and load_vmcs12_host_state(). For the VM-Fail case, even though KVM won't have actually entered the guest, vcpu_enter_guest() will have run with vmcs02 active and thus could result in vmcs01 being run with a stale value. Cc: stable(a)vger.kernel.org Signed-off-by: Maxim Levitsky <mlevitsk(a)redhat.com> Co-developed-by: Sean Christopherson <seanjc(a)google.com> Link: https://lore.kernel.org/r/20250610232010.162191-9-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc(a)google.com> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4f7ee2dbf10e..5e0415a8ee3f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1677,6 +1677,7 @@ static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical) enum kvm_x86_run_flags { KVM_RUN_FORCE_IMMEDIATE_EXIT = BIT(0), KVM_RUN_LOAD_GUEST_DR6 = BIT(1), + KVM_RUN_LOAD_DEBUGCTL = BIT(2), }; struct kvm_x86_ops { @@ -1707,6 +1708,12 @@ struct kvm_x86_ops { void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); + /* + * Mask of DEBUGCTL bits that are owned by the host, i.e. that need to + * match the host's value even while the guest is active. + */ + const u64 HOST_OWNED_DEBUGCTL; + void (*update_exception_bitmap)(struct kvm_vcpu *vcpu); int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index c85cbce6d2f6..4a6d4460f947 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -915,6 +915,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .vcpu_load = vt_op(vcpu_load), .vcpu_put = vt_op(vcpu_put), + .HOST_OWNED_DEBUGCTL = DEBUGCTLMSR_FREEZE_IN_SMM, + .update_exception_bitmap = vt_op(update_exception_bitmap), .get_feature_msr = vmx_get_feature_msr, .get_msr = vt_op(get_msr), diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index ef20184b8b11..c69df3aba8d1 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4861,6 +4861,9 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7))); } + /* Reload DEBUGCTL to ensure vmcs01 has a fresh FREEZE_IN_SMM value. */ + vmx_reload_guest_debugctl(vcpu); + /* * Note that calling vmx_set_{efer,cr0,cr4} is important as they * handle a variety of side effects to KVM's software model. diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a77d325fe78b..0db1bd383302 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7377,6 +7377,9 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) if (run_flags & KVM_RUN_LOAD_GUEST_DR6) set_debugreg(vcpu->arch.dr6, 6); + if (run_flags & KVM_RUN_LOAD_DEBUGCTL) + vmx_reload_guest_debugctl(vcpu); + /* * Refresh vmcs.HOST_CR3 if necessary. This must be done immediately * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index c20a4185d10a..076af78af151 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -419,12 +419,25 @@ bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated) static inline void vmx_guest_debugctl_write(struct kvm_vcpu *vcpu, u64 val) { + WARN_ON_ONCE(val & DEBUGCTLMSR_FREEZE_IN_SMM); + + val |= vcpu->arch.host_debugctl & DEBUGCTLMSR_FREEZE_IN_SMM; vmcs_write64(GUEST_IA32_DEBUGCTL, val); } static inline u64 vmx_guest_debugctl_read(void) { - return vmcs_read64(GUEST_IA32_DEBUGCTL); + return vmcs_read64(GUEST_IA32_DEBUGCTL) & ~DEBUGCTLMSR_FREEZE_IN_SMM; +} + +static inline void vmx_reload_guest_debugctl(struct kvm_vcpu *vcpu) +{ + u64 val = vmcs_read64(GUEST_IA32_DEBUGCTL); + + if (!((val ^ vcpu->arch.host_debugctl) & DEBUGCTLMSR_FREEZE_IN_SMM)) + return; + + vmx_guest_debugctl_write(vcpu, val & ~DEBUGCTLMSR_FREEZE_IN_SMM); } /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7fe3549bbd93..179c2c550c8d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10779,7 +10779,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) dm_request_for_irq_injection(vcpu) && kvm_cpu_accept_dm_intr(vcpu); fastpath_t exit_fastpath; - u64 run_flags; + u64 run_flags, debug_ctl; bool req_immediate_exit = false; @@ -11051,7 +11051,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) set_debugreg(0, 7); } - vcpu->arch.host_debugctl = get_debugctlmsr(); + /* + * Refresh the host DEBUGCTL snapshot after disabling IRQs, as DEBUGCTL + * can be modified in IRQ context, e.g. via SMP function calls. Inform + * vendor code if any host-owned bits were changed, e.g. so that the + * value loaded into hardware while running the guest can be updated. + */ + debug_ctl = get_debugctlmsr(); + if ((debug_ctl ^ vcpu->arch.host_debugctl) & kvm_x86_ops.HOST_OWNED_DEBUGCTL && + !vcpu->arch.guest_state_protected) + run_flags |= KVM_RUN_LOAD_DEBUGCTL; + vcpu->arch.host_debugctl = debug_ctl; guest_timing_enter_irqoff();

4 months, 2 weeks

1
0
0 0

FAILED: patch "[PATCH] KVM: VMX: Preserve host's DEBUGCTLMSR_FREEZE_IN_SMM while" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 6b1dd26544d045f6a79e8c73572c0c0db3ef3c1a # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081235-galleria-reapply-8a83@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 6b1dd26544d045f6a79e8c73572c0c0db3ef3c1a Mon Sep 17 00:00:00 2001 From: Maxim Levitsky <mlevitsk(a)redhat.com> Date: Tue, 10 Jun 2025 16:20:10 -0700 Subject: [PATCH] KVM: VMX: Preserve host's DEBUGCTLMSR_FREEZE_IN_SMM while running the guest Set/clear DEBUGCTLMSR_FREEZE_IN_SMM in GUEST_IA32_DEBUGCTL based on the host's pre-VM-Enter value, i.e. preserve the host's FREEZE_IN_SMM setting while running the guest. When running with the "default treatment of SMIs" in effect (the only mode KVM supports), SMIs do not generate a VM-Exit that is visible to host (non-SMM) software, and instead transitions directly from VMX non-root to SMM. And critically, DEBUGCTL isn't context switched by hardware on SMI or RSM, i.e. SMM will run with whatever value was resident in hardware at the time of the SMI. Failure to preserve FREEZE_IN_SMM results in the PMU unexpectedly counting events while the CPU is executing in SMM, which can pollute profiling and potentially leak information into the guest. Check for changes in FREEZE_IN_SMM prior to every entry into KVM's inner run loop, as the bit can be toggled in IRQ context via IPI callback (SMP function call), by way of /sys/devices/cpu/freeze_on_smi. Add a field in kvm_x86_ops to communicate which DEBUGCTL bits need to be preserved, as FREEZE_IN_SMM is only supported and defined for Intel CPUs, i.e. explicitly checking FREEZE_IN_SMM in common x86 is at best weird, and at worst could lead to undesirable behavior in the future if AMD CPUs ever happened to pick up a collision with the bit. Exempt TDX vCPUs, i.e. protected guests, from the check, as the TDX Module owns and controls GUEST_IA32_DEBUGCTL. WARN in SVM if KVM_RUN_LOAD_DEBUGCTL is set, mostly to document that the lack of handling isn't a KVM bug (TDX already WARNs on any run_flag). Lastly, explicitly reload GUEST_IA32_DEBUGCTL on a VM-Fail that is missed by KVM but detected by hardware, i.e. in nested_vmx_restore_host_state(). Doing so avoids the need to track host_debugctl on a per-VMCS basis, as GUEST_IA32_DEBUGCTL is unconditionally written by prepare_vmcs02() and load_vmcs12_host_state(). For the VM-Fail case, even though KVM won't have actually entered the guest, vcpu_enter_guest() will have run with vmcs02 active and thus could result in vmcs01 being run with a stale value. Cc: stable(a)vger.kernel.org Signed-off-by: Maxim Levitsky <mlevitsk(a)redhat.com> Co-developed-by: Sean Christopherson <seanjc(a)google.com> Link: https://lore.kernel.org/r/20250610232010.162191-9-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc(a)google.com> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4f7ee2dbf10e..5e0415a8ee3f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1677,6 +1677,7 @@ static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical) enum kvm_x86_run_flags { KVM_RUN_FORCE_IMMEDIATE_EXIT = BIT(0), KVM_RUN_LOAD_GUEST_DR6 = BIT(1), + KVM_RUN_LOAD_DEBUGCTL = BIT(2), }; struct kvm_x86_ops { @@ -1707,6 +1708,12 @@ struct kvm_x86_ops { void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); + /* + * Mask of DEBUGCTL bits that are owned by the host, i.e. that need to + * match the host's value even while the guest is active. + */ + const u64 HOST_OWNED_DEBUGCTL; + void (*update_exception_bitmap)(struct kvm_vcpu *vcpu); int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index c85cbce6d2f6..4a6d4460f947 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -915,6 +915,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .vcpu_load = vt_op(vcpu_load), .vcpu_put = vt_op(vcpu_put), + .HOST_OWNED_DEBUGCTL = DEBUGCTLMSR_FREEZE_IN_SMM, + .update_exception_bitmap = vt_op(update_exception_bitmap), .get_feature_msr = vmx_get_feature_msr, .get_msr = vt_op(get_msr), diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index ef20184b8b11..c69df3aba8d1 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4861,6 +4861,9 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7))); } + /* Reload DEBUGCTL to ensure vmcs01 has a fresh FREEZE_IN_SMM value. */ + vmx_reload_guest_debugctl(vcpu); + /* * Note that calling vmx_set_{efer,cr0,cr4} is important as they * handle a variety of side effects to KVM's software model. diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a77d325fe78b..0db1bd383302 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7377,6 +7377,9 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) if (run_flags & KVM_RUN_LOAD_GUEST_DR6) set_debugreg(vcpu->arch.dr6, 6); + if (run_flags & KVM_RUN_LOAD_DEBUGCTL) + vmx_reload_guest_debugctl(vcpu); + /* * Refresh vmcs.HOST_CR3 if necessary. This must be done immediately * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index c20a4185d10a..076af78af151 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -419,12 +419,25 @@ bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated) static inline void vmx_guest_debugctl_write(struct kvm_vcpu *vcpu, u64 val) { + WARN_ON_ONCE(val & DEBUGCTLMSR_FREEZE_IN_SMM); + + val |= vcpu->arch.host_debugctl & DEBUGCTLMSR_FREEZE_IN_SMM; vmcs_write64(GUEST_IA32_DEBUGCTL, val); } static inline u64 vmx_guest_debugctl_read(void) { - return vmcs_read64(GUEST_IA32_DEBUGCTL); + return vmcs_read64(GUEST_IA32_DEBUGCTL) & ~DEBUGCTLMSR_FREEZE_IN_SMM; +} + +static inline void vmx_reload_guest_debugctl(struct kvm_vcpu *vcpu) +{ + u64 val = vmcs_read64(GUEST_IA32_DEBUGCTL); + + if (!((val ^ vcpu->arch.host_debugctl) & DEBUGCTLMSR_FREEZE_IN_SMM)) + return; + + vmx_guest_debugctl_write(vcpu, val & ~DEBUGCTLMSR_FREEZE_IN_SMM); } /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7fe3549bbd93..179c2c550c8d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10779,7 +10779,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) dm_request_for_irq_injection(vcpu) && kvm_cpu_accept_dm_intr(vcpu); fastpath_t exit_fastpath; - u64 run_flags; + u64 run_flags, debug_ctl; bool req_immediate_exit = false; @@ -11051,7 +11051,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) set_debugreg(0, 7); } - vcpu->arch.host_debugctl = get_debugctlmsr(); + /* + * Refresh the host DEBUGCTL snapshot after disabling IRQs, as DEBUGCTL + * can be modified in IRQ context, e.g. via SMP function calls. Inform + * vendor code if any host-owned bits were changed, e.g. so that the + * value loaded into hardware while running the guest can be updated. + */ + debug_ctl = get_debugctlmsr(); + if ((debug_ctl ^ vcpu->arch.host_debugctl) & kvm_x86_ops.HOST_OWNED_DEBUGCTL && + !vcpu->arch.guest_state_protected) + run_flags |= KVM_RUN_LOAD_DEBUGCTL; + vcpu->arch.host_debugctl = debug_ctl; guest_timing_enter_irqoff();

4 months, 2 weeks

1
0
0 0

FAILED: patch "[PATCH] KVM: VMX: Preserve host's DEBUGCTLMSR_FREEZE_IN_SMM while" failed to apply to 6.6-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.6-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y git checkout FETCH_HEAD git cherry-pick -x 6b1dd26544d045f6a79e8c73572c0c0db3ef3c1a # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081234-labrador-unturned-2c21@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 6b1dd26544d045f6a79e8c73572c0c0db3ef3c1a Mon Sep 17 00:00:00 2001 From: Maxim Levitsky <mlevitsk(a)redhat.com> Date: Tue, 10 Jun 2025 16:20:10 -0700 Subject: [PATCH] KVM: VMX: Preserve host's DEBUGCTLMSR_FREEZE_IN_SMM while running the guest Set/clear DEBUGCTLMSR_FREEZE_IN_SMM in GUEST_IA32_DEBUGCTL based on the host's pre-VM-Enter value, i.e. preserve the host's FREEZE_IN_SMM setting while running the guest. When running with the "default treatment of SMIs" in effect (the only mode KVM supports), SMIs do not generate a VM-Exit that is visible to host (non-SMM) software, and instead transitions directly from VMX non-root to SMM. And critically, DEBUGCTL isn't context switched by hardware on SMI or RSM, i.e. SMM will run with whatever value was resident in hardware at the time of the SMI. Failure to preserve FREEZE_IN_SMM results in the PMU unexpectedly counting events while the CPU is executing in SMM, which can pollute profiling and potentially leak information into the guest. Check for changes in FREEZE_IN_SMM prior to every entry into KVM's inner run loop, as the bit can be toggled in IRQ context via IPI callback (SMP function call), by way of /sys/devices/cpu/freeze_on_smi. Add a field in kvm_x86_ops to communicate which DEBUGCTL bits need to be preserved, as FREEZE_IN_SMM is only supported and defined for Intel CPUs, i.e. explicitly checking FREEZE_IN_SMM in common x86 is at best weird, and at worst could lead to undesirable behavior in the future if AMD CPUs ever happened to pick up a collision with the bit. Exempt TDX vCPUs, i.e. protected guests, from the check, as the TDX Module owns and controls GUEST_IA32_DEBUGCTL. WARN in SVM if KVM_RUN_LOAD_DEBUGCTL is set, mostly to document that the lack of handling isn't a KVM bug (TDX already WARNs on any run_flag). Lastly, explicitly reload GUEST_IA32_DEBUGCTL on a VM-Fail that is missed by KVM but detected by hardware, i.e. in nested_vmx_restore_host_state(). Doing so avoids the need to track host_debugctl on a per-VMCS basis, as GUEST_IA32_DEBUGCTL is unconditionally written by prepare_vmcs02() and load_vmcs12_host_state(). For the VM-Fail case, even though KVM won't have actually entered the guest, vcpu_enter_guest() will have run with vmcs02 active and thus could result in vmcs01 being run with a stale value. Cc: stable(a)vger.kernel.org Signed-off-by: Maxim Levitsky <mlevitsk(a)redhat.com> Co-developed-by: Sean Christopherson <seanjc(a)google.com> Link: https://lore.kernel.org/r/20250610232010.162191-9-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc(a)google.com> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4f7ee2dbf10e..5e0415a8ee3f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1677,6 +1677,7 @@ static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical) enum kvm_x86_run_flags { KVM_RUN_FORCE_IMMEDIATE_EXIT = BIT(0), KVM_RUN_LOAD_GUEST_DR6 = BIT(1), + KVM_RUN_LOAD_DEBUGCTL = BIT(2), }; struct kvm_x86_ops { @@ -1707,6 +1708,12 @@ struct kvm_x86_ops { void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); + /* + * Mask of DEBUGCTL bits that are owned by the host, i.e. that need to + * match the host's value even while the guest is active. + */ + const u64 HOST_OWNED_DEBUGCTL; + void (*update_exception_bitmap)(struct kvm_vcpu *vcpu); int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index c85cbce6d2f6..4a6d4460f947 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -915,6 +915,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .vcpu_load = vt_op(vcpu_load), .vcpu_put = vt_op(vcpu_put), + .HOST_OWNED_DEBUGCTL = DEBUGCTLMSR_FREEZE_IN_SMM, + .update_exception_bitmap = vt_op(update_exception_bitmap), .get_feature_msr = vmx_get_feature_msr, .get_msr = vt_op(get_msr), diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index ef20184b8b11..c69df3aba8d1 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4861,6 +4861,9 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7))); } + /* Reload DEBUGCTL to ensure vmcs01 has a fresh FREEZE_IN_SMM value. */ + vmx_reload_guest_debugctl(vcpu); + /* * Note that calling vmx_set_{efer,cr0,cr4} is important as they * handle a variety of side effects to KVM's software model. diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a77d325fe78b..0db1bd383302 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7377,6 +7377,9 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) if (run_flags & KVM_RUN_LOAD_GUEST_DR6) set_debugreg(vcpu->arch.dr6, 6); + if (run_flags & KVM_RUN_LOAD_DEBUGCTL) + vmx_reload_guest_debugctl(vcpu); + /* * Refresh vmcs.HOST_CR3 if necessary. This must be done immediately * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index c20a4185d10a..076af78af151 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -419,12 +419,25 @@ bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated) static inline void vmx_guest_debugctl_write(struct kvm_vcpu *vcpu, u64 val) { + WARN_ON_ONCE(val & DEBUGCTLMSR_FREEZE_IN_SMM); + + val |= vcpu->arch.host_debugctl & DEBUGCTLMSR_FREEZE_IN_SMM; vmcs_write64(GUEST_IA32_DEBUGCTL, val); } static inline u64 vmx_guest_debugctl_read(void) { - return vmcs_read64(GUEST_IA32_DEBUGCTL); + return vmcs_read64(GUEST_IA32_DEBUGCTL) & ~DEBUGCTLMSR_FREEZE_IN_SMM; +} + +static inline void vmx_reload_guest_debugctl(struct kvm_vcpu *vcpu) +{ + u64 val = vmcs_read64(GUEST_IA32_DEBUGCTL); + + if (!((val ^ vcpu->arch.host_debugctl) & DEBUGCTLMSR_FREEZE_IN_SMM)) + return; + + vmx_guest_debugctl_write(vcpu, val & ~DEBUGCTLMSR_FREEZE_IN_SMM); } /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7fe3549bbd93..179c2c550c8d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10779,7 +10779,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) dm_request_for_irq_injection(vcpu) && kvm_cpu_accept_dm_intr(vcpu); fastpath_t exit_fastpath; - u64 run_flags; + u64 run_flags, debug_ctl; bool req_immediate_exit = false; @@ -11051,7 +11051,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) set_debugreg(0, 7); } - vcpu->arch.host_debugctl = get_debugctlmsr(); + /* + * Refresh the host DEBUGCTL snapshot after disabling IRQs, as DEBUGCTL + * can be modified in IRQ context, e.g. via SMP function calls. Inform + * vendor code if any host-owned bits were changed, e.g. so that the + * value loaded into hardware while running the guest can be updated. + */ + debug_ctl = get_debugctlmsr(); + if ((debug_ctl ^ vcpu->arch.host_debugctl) & kvm_x86_ops.HOST_OWNED_DEBUGCTL && + !vcpu->arch.guest_state_protected) + run_flags |= KVM_RUN_LOAD_DEBUGCTL; + vcpu->arch.host_debugctl = debug_ctl; guest_timing_enter_irqoff();

4 months, 2 weeks

1
0
0 0

FAILED: patch "[PATCH] KVM: VMX: Preserve host's DEBUGCTLMSR_FREEZE_IN_SMM while" failed to apply to 6.12-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.12-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y git checkout FETCH_HEAD git cherry-pick -x 6b1dd26544d045f6a79e8c73572c0c0db3ef3c1a # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081233-clang-reusable-6ef9@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 6b1dd26544d045f6a79e8c73572c0c0db3ef3c1a Mon Sep 17 00:00:00 2001 From: Maxim Levitsky <mlevitsk(a)redhat.com> Date: Tue, 10 Jun 2025 16:20:10 -0700 Subject: [PATCH] KVM: VMX: Preserve host's DEBUGCTLMSR_FREEZE_IN_SMM while running the guest Set/clear DEBUGCTLMSR_FREEZE_IN_SMM in GUEST_IA32_DEBUGCTL based on the host's pre-VM-Enter value, i.e. preserve the host's FREEZE_IN_SMM setting while running the guest. When running with the "default treatment of SMIs" in effect (the only mode KVM supports), SMIs do not generate a VM-Exit that is visible to host (non-SMM) software, and instead transitions directly from VMX non-root to SMM. And critically, DEBUGCTL isn't context switched by hardware on SMI or RSM, i.e. SMM will run with whatever value was resident in hardware at the time of the SMI. Failure to preserve FREEZE_IN_SMM results in the PMU unexpectedly counting events while the CPU is executing in SMM, which can pollute profiling and potentially leak information into the guest. Check for changes in FREEZE_IN_SMM prior to every entry into KVM's inner run loop, as the bit can be toggled in IRQ context via IPI callback (SMP function call), by way of /sys/devices/cpu/freeze_on_smi. Add a field in kvm_x86_ops to communicate which DEBUGCTL bits need to be preserved, as FREEZE_IN_SMM is only supported and defined for Intel CPUs, i.e. explicitly checking FREEZE_IN_SMM in common x86 is at best weird, and at worst could lead to undesirable behavior in the future if AMD CPUs ever happened to pick up a collision with the bit. Exempt TDX vCPUs, i.e. protected guests, from the check, as the TDX Module owns and controls GUEST_IA32_DEBUGCTL. WARN in SVM if KVM_RUN_LOAD_DEBUGCTL is set, mostly to document that the lack of handling isn't a KVM bug (TDX already WARNs on any run_flag). Lastly, explicitly reload GUEST_IA32_DEBUGCTL on a VM-Fail that is missed by KVM but detected by hardware, i.e. in nested_vmx_restore_host_state(). Doing so avoids the need to track host_debugctl on a per-VMCS basis, as GUEST_IA32_DEBUGCTL is unconditionally written by prepare_vmcs02() and load_vmcs12_host_state(). For the VM-Fail case, even though KVM won't have actually entered the guest, vcpu_enter_guest() will have run with vmcs02 active and thus could result in vmcs01 being run with a stale value. Cc: stable(a)vger.kernel.org Signed-off-by: Maxim Levitsky <mlevitsk(a)redhat.com> Co-developed-by: Sean Christopherson <seanjc(a)google.com> Link: https://lore.kernel.org/r/20250610232010.162191-9-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc(a)google.com> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4f7ee2dbf10e..5e0415a8ee3f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1677,6 +1677,7 @@ static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical) enum kvm_x86_run_flags { KVM_RUN_FORCE_IMMEDIATE_EXIT = BIT(0), KVM_RUN_LOAD_GUEST_DR6 = BIT(1), + KVM_RUN_LOAD_DEBUGCTL = BIT(2), }; struct kvm_x86_ops { @@ -1707,6 +1708,12 @@ struct kvm_x86_ops { void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); + /* + * Mask of DEBUGCTL bits that are owned by the host, i.e. that need to + * match the host's value even while the guest is active. + */ + const u64 HOST_OWNED_DEBUGCTL; + void (*update_exception_bitmap)(struct kvm_vcpu *vcpu); int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index c85cbce6d2f6..4a6d4460f947 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -915,6 +915,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .vcpu_load = vt_op(vcpu_load), .vcpu_put = vt_op(vcpu_put), + .HOST_OWNED_DEBUGCTL = DEBUGCTLMSR_FREEZE_IN_SMM, + .update_exception_bitmap = vt_op(update_exception_bitmap), .get_feature_msr = vmx_get_feature_msr, .get_msr = vt_op(get_msr), diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index ef20184b8b11..c69df3aba8d1 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4861,6 +4861,9 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7))); } + /* Reload DEBUGCTL to ensure vmcs01 has a fresh FREEZE_IN_SMM value. */ + vmx_reload_guest_debugctl(vcpu); + /* * Note that calling vmx_set_{efer,cr0,cr4} is important as they * handle a variety of side effects to KVM's software model. diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a77d325fe78b..0db1bd383302 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7377,6 +7377,9 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) if (run_flags & KVM_RUN_LOAD_GUEST_DR6) set_debugreg(vcpu->arch.dr6, 6); + if (run_flags & KVM_RUN_LOAD_DEBUGCTL) + vmx_reload_guest_debugctl(vcpu); + /* * Refresh vmcs.HOST_CR3 if necessary. This must be done immediately * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index c20a4185d10a..076af78af151 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -419,12 +419,25 @@ bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated) static inline void vmx_guest_debugctl_write(struct kvm_vcpu *vcpu, u64 val) { + WARN_ON_ONCE(val & DEBUGCTLMSR_FREEZE_IN_SMM); + + val |= vcpu->arch.host_debugctl & DEBUGCTLMSR_FREEZE_IN_SMM; vmcs_write64(GUEST_IA32_DEBUGCTL, val); } static inline u64 vmx_guest_debugctl_read(void) { - return vmcs_read64(GUEST_IA32_DEBUGCTL); + return vmcs_read64(GUEST_IA32_DEBUGCTL) & ~DEBUGCTLMSR_FREEZE_IN_SMM; +} + +static inline void vmx_reload_guest_debugctl(struct kvm_vcpu *vcpu) +{ + u64 val = vmcs_read64(GUEST_IA32_DEBUGCTL); + + if (!((val ^ vcpu->arch.host_debugctl) & DEBUGCTLMSR_FREEZE_IN_SMM)) + return; + + vmx_guest_debugctl_write(vcpu, val & ~DEBUGCTLMSR_FREEZE_IN_SMM); } /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7fe3549bbd93..179c2c550c8d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10779,7 +10779,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) dm_request_for_irq_injection(vcpu) && kvm_cpu_accept_dm_intr(vcpu); fastpath_t exit_fastpath; - u64 run_flags; + u64 run_flags, debug_ctl; bool req_immediate_exit = false; @@ -11051,7 +11051,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) set_debugreg(0, 7); } - vcpu->arch.host_debugctl = get_debugctlmsr(); + /* + * Refresh the host DEBUGCTL snapshot after disabling IRQs, as DEBUGCTL + * can be modified in IRQ context, e.g. via SMP function calls. Inform + * vendor code if any host-owned bits were changed, e.g. so that the + * value loaded into hardware while running the guest can be updated. + */ + debug_ctl = get_debugctlmsr(); + if ((debug_ctl ^ vcpu->arch.host_debugctl) & kvm_x86_ops.HOST_OWNED_DEBUGCTL && + !vcpu->arch.guest_state_protected) + run_flags |= KVM_RUN_LOAD_DEBUGCTL; + vcpu->arch.host_debugctl = debug_ctl; guest_timing_enter_irqoff();

4 months, 2 weeks

1
0
0 0

FAILED: patch "[PATCH] KVM: VMX: Preserve host's DEBUGCTLMSR_FREEZE_IN_SMM while" failed to apply to 6.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.15.y git checkout FETCH_HEAD git cherry-pick -x 6b1dd26544d045f6a79e8c73572c0c0db3ef3c1a # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081232-language-safehouse-4f99@gregkh' --subject-prefix 'PATCH 6.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 6b1dd26544d045f6a79e8c73572c0c0db3ef3c1a Mon Sep 17 00:00:00 2001 From: Maxim Levitsky <mlevitsk(a)redhat.com> Date: Tue, 10 Jun 2025 16:20:10 -0700 Subject: [PATCH] KVM: VMX: Preserve host's DEBUGCTLMSR_FREEZE_IN_SMM while running the guest Set/clear DEBUGCTLMSR_FREEZE_IN_SMM in GUEST_IA32_DEBUGCTL based on the host's pre-VM-Enter value, i.e. preserve the host's FREEZE_IN_SMM setting while running the guest. When running with the "default treatment of SMIs" in effect (the only mode KVM supports), SMIs do not generate a VM-Exit that is visible to host (non-SMM) software, and instead transitions directly from VMX non-root to SMM. And critically, DEBUGCTL isn't context switched by hardware on SMI or RSM, i.e. SMM will run with whatever value was resident in hardware at the time of the SMI. Failure to preserve FREEZE_IN_SMM results in the PMU unexpectedly counting events while the CPU is executing in SMM, which can pollute profiling and potentially leak information into the guest. Check for changes in FREEZE_IN_SMM prior to every entry into KVM's inner run loop, as the bit can be toggled in IRQ context via IPI callback (SMP function call), by way of /sys/devices/cpu/freeze_on_smi. Add a field in kvm_x86_ops to communicate which DEBUGCTL bits need to be preserved, as FREEZE_IN_SMM is only supported and defined for Intel CPUs, i.e. explicitly checking FREEZE_IN_SMM in common x86 is at best weird, and at worst could lead to undesirable behavior in the future if AMD CPUs ever happened to pick up a collision with the bit. Exempt TDX vCPUs, i.e. protected guests, from the check, as the TDX Module owns and controls GUEST_IA32_DEBUGCTL. WARN in SVM if KVM_RUN_LOAD_DEBUGCTL is set, mostly to document that the lack of handling isn't a KVM bug (TDX already WARNs on any run_flag). Lastly, explicitly reload GUEST_IA32_DEBUGCTL on a VM-Fail that is missed by KVM but detected by hardware, i.e. in nested_vmx_restore_host_state(). Doing so avoids the need to track host_debugctl on a per-VMCS basis, as GUEST_IA32_DEBUGCTL is unconditionally written by prepare_vmcs02() and load_vmcs12_host_state(). For the VM-Fail case, even though KVM won't have actually entered the guest, vcpu_enter_guest() will have run with vmcs02 active and thus could result in vmcs01 being run with a stale value. Cc: stable(a)vger.kernel.org Signed-off-by: Maxim Levitsky <mlevitsk(a)redhat.com> Co-developed-by: Sean Christopherson <seanjc(a)google.com> Link: https://lore.kernel.org/r/20250610232010.162191-9-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc(a)google.com> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4f7ee2dbf10e..5e0415a8ee3f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1677,6 +1677,7 @@ static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical) enum kvm_x86_run_flags { KVM_RUN_FORCE_IMMEDIATE_EXIT = BIT(0), KVM_RUN_LOAD_GUEST_DR6 = BIT(1), + KVM_RUN_LOAD_DEBUGCTL = BIT(2), }; struct kvm_x86_ops { @@ -1707,6 +1708,12 @@ struct kvm_x86_ops { void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); + /* + * Mask of DEBUGCTL bits that are owned by the host, i.e. that need to + * match the host's value even while the guest is active. + */ + const u64 HOST_OWNED_DEBUGCTL; + void (*update_exception_bitmap)(struct kvm_vcpu *vcpu); int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index c85cbce6d2f6..4a6d4460f947 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -915,6 +915,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .vcpu_load = vt_op(vcpu_load), .vcpu_put = vt_op(vcpu_put), + .HOST_OWNED_DEBUGCTL = DEBUGCTLMSR_FREEZE_IN_SMM, + .update_exception_bitmap = vt_op(update_exception_bitmap), .get_feature_msr = vmx_get_feature_msr, .get_msr = vt_op(get_msr), diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index ef20184b8b11..c69df3aba8d1 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4861,6 +4861,9 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7))); } + /* Reload DEBUGCTL to ensure vmcs01 has a fresh FREEZE_IN_SMM value. */ + vmx_reload_guest_debugctl(vcpu); + /* * Note that calling vmx_set_{efer,cr0,cr4} is important as they * handle a variety of side effects to KVM's software model. diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a77d325fe78b..0db1bd383302 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7377,6 +7377,9 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) if (run_flags & KVM_RUN_LOAD_GUEST_DR6) set_debugreg(vcpu->arch.dr6, 6); + if (run_flags & KVM_RUN_LOAD_DEBUGCTL) + vmx_reload_guest_debugctl(vcpu); + /* * Refresh vmcs.HOST_CR3 if necessary. This must be done immediately * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index c20a4185d10a..076af78af151 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -419,12 +419,25 @@ bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated) static inline void vmx_guest_debugctl_write(struct kvm_vcpu *vcpu, u64 val) { + WARN_ON_ONCE(val & DEBUGCTLMSR_FREEZE_IN_SMM); + + val |= vcpu->arch.host_debugctl & DEBUGCTLMSR_FREEZE_IN_SMM; vmcs_write64(GUEST_IA32_DEBUGCTL, val); } static inline u64 vmx_guest_debugctl_read(void) { - return vmcs_read64(GUEST_IA32_DEBUGCTL); + return vmcs_read64(GUEST_IA32_DEBUGCTL) & ~DEBUGCTLMSR_FREEZE_IN_SMM; +} + +static inline void vmx_reload_guest_debugctl(struct kvm_vcpu *vcpu) +{ + u64 val = vmcs_read64(GUEST_IA32_DEBUGCTL); + + if (!((val ^ vcpu->arch.host_debugctl) & DEBUGCTLMSR_FREEZE_IN_SMM)) + return; + + vmx_guest_debugctl_write(vcpu, val & ~DEBUGCTLMSR_FREEZE_IN_SMM); } /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7fe3549bbd93..179c2c550c8d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10779,7 +10779,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) dm_request_for_irq_injection(vcpu) && kvm_cpu_accept_dm_intr(vcpu); fastpath_t exit_fastpath; - u64 run_flags; + u64 run_flags, debug_ctl; bool req_immediate_exit = false; @@ -11051,7 +11051,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) set_debugreg(0, 7); } - vcpu->arch.host_debugctl = get_debugctlmsr(); + /* + * Refresh the host DEBUGCTL snapshot after disabling IRQs, as DEBUGCTL + * can be modified in IRQ context, e.g. via SMP function calls. Inform + * vendor code if any host-owned bits were changed, e.g. so that the + * value loaded into hardware while running the guest can be updated. + */ + debug_ctl = get_debugctlmsr(); + if ((debug_ctl ^ vcpu->arch.host_debugctl) & kvm_x86_ops.HOST_OWNED_DEBUGCTL && + !vcpu->arch.guest_state_protected) + run_flags |= KVM_RUN_LOAD_DEBUGCTL; + vcpu->arch.host_debugctl = debug_ctl; guest_timing_enter_irqoff();

4 months, 2 weeks

1
0
0 0

FAILED: patch "[PATCH] KVM: nVMX: Check vmcs12->guest_ia32_debugctl on nested" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y git checkout FETCH_HEAD git cherry-pick -x 095686e6fcb4150f0a55b1a25987fad3d8af58d6 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081226-sublease-shoptalk-bc18@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 095686e6fcb4150f0a55b1a25987fad3d8af58d6 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky <mlevitsk(a)redhat.com> Date: Tue, 10 Jun 2025 16:20:08 -0700 Subject: [PATCH] KVM: nVMX: Check vmcs12->guest_ia32_debugctl on nested VM-Enter Add a consistency check for L2's guest_ia32_debugctl, as KVM only supports a subset of hardware functionality, i.e. KVM can't rely on hardware to detect illegal/unsupported values. Failure to check the vmcs12 value would allow the guest to load any harware-supported value while running L2. Take care to exempt BTF and LBR from the validity check in order to match KVM's behavior for writes via WRMSR, but without clobbering vmcs12. Even if VM_EXIT_SAVE_DEBUG_CONTROLS is set in vmcs12, L1 can reasonably expect that vmcs12->guest_ia32_debugctl will not be modified if writes to the MSR are being intercepted. Arguably, KVM _should_ update vmcs12 if VM_EXIT_SAVE_DEBUG_CONTROLS is set *and* writes to MSR_IA32_DEBUGCTLMSR are not being intercepted by L1, but that would incur non-trivial complexity and wouldn't change the fact that KVM's handling of DEBUGCTL is blatantly broken. I.e. the extra complexity is not worth carrying. Cc: stable(a)vger.kernel.org Signed-off-by: Maxim Levitsky <mlevitsk(a)redhat.com> Co-developed-by: Sean Christopherson <seanjc(a)google.com> Link: https://lore.kernel.org/r/20250610232010.162191-7-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc(a)google.com> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 7211c71d4241..1b8b0642fc2d 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2663,7 +2663,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (vmx->nested.nested_run_pending && (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) { kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); - vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); + vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl & + vmx_get_supported_debugctl(vcpu, false)); } else { kvm_set_dr(vcpu, 7, vcpu->arch.dr7); vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.pre_vmenter_debugctl); @@ -3156,7 +3157,8 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, return -EINVAL; if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) && - CC(!kvm_dr7_valid(vmcs12->guest_dr7))) + (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) || + CC(!vmx_is_valid_debugctl(vcpu, vmcs12->guest_ia32_debugctl, false)))) return -EINVAL; if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) && @@ -4608,6 +4610,12 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); + /* + * Note! Save DR7, but intentionally don't grab DEBUGCTL from vmcs02. + * Writes to DEBUGCTL that aren't intercepted by L1 are immediately + * propagated to vmcs12 (see vmx_set_msr()), as the value loaded into + * vmcs02 doesn't strictly track vmcs12. + */ if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) vmcs12->guest_dr7 = vcpu->arch.dr7; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4f827a75d980..6a8b78e954cd 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2174,7 +2174,7 @@ static u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu, return (unsigned long)data; } -static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated) +u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated) { u64 debugctl = 0; @@ -2193,8 +2193,7 @@ static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated return debugctl; } -static bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, - bool host_initiated) +bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated) { u64 invalid; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index b5758c33c60f..392e66c7e5fe 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -414,6 +414,9 @@ static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu); +u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated); +bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated); + /* * Note, early Intel manuals have the write-low and read-high bitmap offsets * the wrong way round. The bitmaps control MSRs 0x00000000-0x00001fff and

4 months, 2 weeks

1
0
0 0

FAILED: patch "[PATCH] KVM: nVMX: Check vmcs12->guest_ia32_debugctl on nested" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y git checkout FETCH_HEAD git cherry-pick -x 095686e6fcb4150f0a55b1a25987fad3d8af58d6 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081225-unsettled-hardener-28aa@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 095686e6fcb4150f0a55b1a25987fad3d8af58d6 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky <mlevitsk(a)redhat.com> Date: Tue, 10 Jun 2025 16:20:08 -0700 Subject: [PATCH] KVM: nVMX: Check vmcs12->guest_ia32_debugctl on nested VM-Enter Add a consistency check for L2's guest_ia32_debugctl, as KVM only supports a subset of hardware functionality, i.e. KVM can't rely on hardware to detect illegal/unsupported values. Failure to check the vmcs12 value would allow the guest to load any harware-supported value while running L2. Take care to exempt BTF and LBR from the validity check in order to match KVM's behavior for writes via WRMSR, but without clobbering vmcs12. Even if VM_EXIT_SAVE_DEBUG_CONTROLS is set in vmcs12, L1 can reasonably expect that vmcs12->guest_ia32_debugctl will not be modified if writes to the MSR are being intercepted. Arguably, KVM _should_ update vmcs12 if VM_EXIT_SAVE_DEBUG_CONTROLS is set *and* writes to MSR_IA32_DEBUGCTLMSR are not being intercepted by L1, but that would incur non-trivial complexity and wouldn't change the fact that KVM's handling of DEBUGCTL is blatantly broken. I.e. the extra complexity is not worth carrying. Cc: stable(a)vger.kernel.org Signed-off-by: Maxim Levitsky <mlevitsk(a)redhat.com> Co-developed-by: Sean Christopherson <seanjc(a)google.com> Link: https://lore.kernel.org/r/20250610232010.162191-7-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc(a)google.com> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 7211c71d4241..1b8b0642fc2d 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2663,7 +2663,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (vmx->nested.nested_run_pending && (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) { kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); - vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); + vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl & + vmx_get_supported_debugctl(vcpu, false)); } else { kvm_set_dr(vcpu, 7, vcpu->arch.dr7); vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.pre_vmenter_debugctl); @@ -3156,7 +3157,8 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, return -EINVAL; if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) && - CC(!kvm_dr7_valid(vmcs12->guest_dr7))) + (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) || + CC(!vmx_is_valid_debugctl(vcpu, vmcs12->guest_ia32_debugctl, false)))) return -EINVAL; if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) && @@ -4608,6 +4610,12 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); + /* + * Note! Save DR7, but intentionally don't grab DEBUGCTL from vmcs02. + * Writes to DEBUGCTL that aren't intercepted by L1 are immediately + * propagated to vmcs12 (see vmx_set_msr()), as the value loaded into + * vmcs02 doesn't strictly track vmcs12. + */ if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) vmcs12->guest_dr7 = vcpu->arch.dr7; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4f827a75d980..6a8b78e954cd 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2174,7 +2174,7 @@ static u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu, return (unsigned long)data; } -static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated) +u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated) { u64 debugctl = 0; @@ -2193,8 +2193,7 @@ static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated return debugctl; } -static bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, - bool host_initiated) +bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated) { u64 invalid; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index b5758c33c60f..392e66c7e5fe 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -414,6 +414,9 @@ static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu); +u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated); +bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated); + /* * Note, early Intel manuals have the write-low and read-high bitmap offsets * the wrong way round. The bitmaps control MSRs 0x00000000-0x00001fff and

4 months, 2 weeks

1
0
0 0

FAILED: patch "[PATCH] KVM: nVMX: Check vmcs12->guest_ia32_debugctl on nested" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x 095686e6fcb4150f0a55b1a25987fad3d8af58d6 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081224-earlobe-tilt-6881@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 095686e6fcb4150f0a55b1a25987fad3d8af58d6 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky <mlevitsk(a)redhat.com> Date: Tue, 10 Jun 2025 16:20:08 -0700 Subject: [PATCH] KVM: nVMX: Check vmcs12->guest_ia32_debugctl on nested VM-Enter Add a consistency check for L2's guest_ia32_debugctl, as KVM only supports a subset of hardware functionality, i.e. KVM can't rely on hardware to detect illegal/unsupported values. Failure to check the vmcs12 value would allow the guest to load any harware-supported value while running L2. Take care to exempt BTF and LBR from the validity check in order to match KVM's behavior for writes via WRMSR, but without clobbering vmcs12. Even if VM_EXIT_SAVE_DEBUG_CONTROLS is set in vmcs12, L1 can reasonably expect that vmcs12->guest_ia32_debugctl will not be modified if writes to the MSR are being intercepted. Arguably, KVM _should_ update vmcs12 if VM_EXIT_SAVE_DEBUG_CONTROLS is set *and* writes to MSR_IA32_DEBUGCTLMSR are not being intercepted by L1, but that would incur non-trivial complexity and wouldn't change the fact that KVM's handling of DEBUGCTL is blatantly broken. I.e. the extra complexity is not worth carrying. Cc: stable(a)vger.kernel.org Signed-off-by: Maxim Levitsky <mlevitsk(a)redhat.com> Co-developed-by: Sean Christopherson <seanjc(a)google.com> Link: https://lore.kernel.org/r/20250610232010.162191-7-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc(a)google.com> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 7211c71d4241..1b8b0642fc2d 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2663,7 +2663,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (vmx->nested.nested_run_pending && (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) { kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); - vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); + vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl & + vmx_get_supported_debugctl(vcpu, false)); } else { kvm_set_dr(vcpu, 7, vcpu->arch.dr7); vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.pre_vmenter_debugctl); @@ -3156,7 +3157,8 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, return -EINVAL; if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) && - CC(!kvm_dr7_valid(vmcs12->guest_dr7))) + (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) || + CC(!vmx_is_valid_debugctl(vcpu, vmcs12->guest_ia32_debugctl, false)))) return -EINVAL; if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) && @@ -4608,6 +4610,12 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); + /* + * Note! Save DR7, but intentionally don't grab DEBUGCTL from vmcs02. + * Writes to DEBUGCTL that aren't intercepted by L1 are immediately + * propagated to vmcs12 (see vmx_set_msr()), as the value loaded into + * vmcs02 doesn't strictly track vmcs12. + */ if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) vmcs12->guest_dr7 = vcpu->arch.dr7; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4f827a75d980..6a8b78e954cd 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2174,7 +2174,7 @@ static u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu, return (unsigned long)data; } -static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated) +u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated) { u64 debugctl = 0; @@ -2193,8 +2193,7 @@ static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated return debugctl; } -static bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, - bool host_initiated) +bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated) { u64 invalid; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index b5758c33c60f..392e66c7e5fe 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -414,6 +414,9 @@ static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu); +u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated); +bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated); + /* * Note, early Intel manuals have the write-low and read-high bitmap offsets * the wrong way round. The bitmaps control MSRs 0x00000000-0x00001fff and

4 months, 2 weeks

1
0
0 0

FAILED: patch "[PATCH] KVM: VMX: Wrap all accesses to IA32_DEBUGCTL with" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y git checkout FETCH_HEAD git cherry-pick -x 7d0cce6cbe71af6e9c1831bff101a2b9c249c4a2 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081216-unruffled-starry-0b41@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 7d0cce6cbe71af6e9c1831bff101a2b9c249c4a2 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky <mlevitsk(a)redhat.com> Date: Tue, 10 Jun 2025 16:20:09 -0700 Subject: [PATCH] KVM: VMX: Wrap all accesses to IA32_DEBUGCTL with getter/setter APIs Introduce vmx_guest_debugctl_{read,write}() to handle all accesses to vmcs.GUEST_IA32_DEBUGCTL. This will allow stuffing FREEZE_IN_SMM into GUEST_IA32_DEBUGCTL based on the host setting without bleeding the state into the guest, and without needing to copy+paste the FREEZE_IN_SMM logic into every patch that accesses GUEST_IA32_DEBUGCTL. No functional change intended. Cc: stable(a)vger.kernel.org Signed-off-by: Maxim Levitsky <mlevitsk(a)redhat.com> [sean: massage changelog, make inline, use in all prepare_vmcs02() cases] Reviewed-by: Dapeng Mi <dapeng1.mi(a)linux.intel.com> Link: https://lore.kernel.org/r/20250610232010.162191-8-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc(a)google.com> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 1b8b0642fc2d..ef20184b8b11 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2663,11 +2663,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (vmx->nested.nested_run_pending && (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) { kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); - vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl & - vmx_get_supported_debugctl(vcpu, false)); + vmx_guest_debugctl_write(vcpu, vmcs12->guest_ia32_debugctl & + vmx_get_supported_debugctl(vcpu, false)); } else { kvm_set_dr(vcpu, 7, vcpu->arch.dr7); - vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.pre_vmenter_debugctl); + vmx_guest_debugctl_write(vcpu, vmx->nested.pre_vmenter_debugctl); } if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending || !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))) @@ -3532,7 +3532,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, if (!vmx->nested.nested_run_pending || !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) - vmx->nested.pre_vmenter_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); + vmx->nested.pre_vmenter_debugctl = vmx_guest_debugctl_read(); if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending || !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))) @@ -4806,7 +4806,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, __vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR); kvm_set_dr(vcpu, 7, 0x400); - vmcs_write64(GUEST_IA32_DEBUGCTL, 0); + vmx_guest_debugctl_write(vcpu, 0); if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, vmcs12->vm_exit_msr_load_count)) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index bbf4509f32d0..0b173602821b 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -653,11 +653,11 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu) */ static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu) { - u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL); + u64 data = vmx_guest_debugctl_read(); if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) { data &= ~DEBUGCTLMSR_LBR; - vmcs_write64(GUEST_IA32_DEBUGCTL, data); + vmx_guest_debugctl_write(vcpu, data); } } @@ -730,7 +730,7 @@ void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu) if (!lbr_desc->event) { vmx_disable_lbr_msrs_passthrough(vcpu); - if (vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR) + if (vmx_guest_debugctl_read() & DEBUGCTLMSR_LBR) goto warn; if (test_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use)) goto warn; @@ -752,7 +752,7 @@ void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu) static void intel_pmu_cleanup(struct kvm_vcpu *vcpu) { - if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR)) + if (!(vmx_guest_debugctl_read() & DEBUGCTLMSR_LBR)) intel_pmu_release_guest_lbr_event(vcpu); } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 6a8b78e954cd..a77d325fe78b 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2149,7 +2149,7 @@ int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vmx->pt_desc.guest.addr_a[index / 2]; break; case MSR_IA32_DEBUGCTLMSR: - msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL); + msr_info->data = vmx_guest_debugctl_read(); break; default: find_uret_msr: @@ -2283,7 +2283,8 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) VM_EXIT_SAVE_DEBUG_CONTROLS) get_vmcs12(vcpu)->guest_ia32_debugctl = data; - vmcs_write64(GUEST_IA32_DEBUGCTL, data); + vmx_guest_debugctl_write(vcpu, data); + if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event && (data & DEBUGCTLMSR_LBR)) intel_pmu_create_guest_lbr_event(vcpu); @@ -4798,7 +4799,8 @@ static void init_vmcs(struct vcpu_vmx *vmx) vmcs_write32(GUEST_SYSENTER_CS, 0); vmcs_writel(GUEST_SYSENTER_ESP, 0); vmcs_writel(GUEST_SYSENTER_EIP, 0); - vmcs_write64(GUEST_IA32_DEBUGCTL, 0); + + vmx_guest_debugctl_write(&vmx->vcpu, 0); if (cpu_has_vmx_tpr_shadow()) { vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 392e66c7e5fe..c20a4185d10a 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -417,6 +417,16 @@ void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu); u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated); bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated); +static inline void vmx_guest_debugctl_write(struct kvm_vcpu *vcpu, u64 val) +{ + vmcs_write64(GUEST_IA32_DEBUGCTL, val); +} + +static inline u64 vmx_guest_debugctl_read(void) +{ + return vmcs_read64(GUEST_IA32_DEBUGCTL); +} + /* * Note, early Intel manuals have the write-low and read-high bitmap offsets * the wrong way round. The bitmaps control MSRs 0x00000000-0x00001fff and

4 months, 2 weeks

1
0
0 0

FAILED: patch "[PATCH] KVM: VMX: Wrap all accesses to IA32_DEBUGCTL with" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y git checkout FETCH_HEAD git cherry-pick -x 7d0cce6cbe71af6e9c1831bff101a2b9c249c4a2 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081216-ointment-common-3721@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 7d0cce6cbe71af6e9c1831bff101a2b9c249c4a2 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky <mlevitsk(a)redhat.com> Date: Tue, 10 Jun 2025 16:20:09 -0700 Subject: [PATCH] KVM: VMX: Wrap all accesses to IA32_DEBUGCTL with getter/setter APIs Introduce vmx_guest_debugctl_{read,write}() to handle all accesses to vmcs.GUEST_IA32_DEBUGCTL. This will allow stuffing FREEZE_IN_SMM into GUEST_IA32_DEBUGCTL based on the host setting without bleeding the state into the guest, and without needing to copy+paste the FREEZE_IN_SMM logic into every patch that accesses GUEST_IA32_DEBUGCTL. No functional change intended. Cc: stable(a)vger.kernel.org Signed-off-by: Maxim Levitsky <mlevitsk(a)redhat.com> [sean: massage changelog, make inline, use in all prepare_vmcs02() cases] Reviewed-by: Dapeng Mi <dapeng1.mi(a)linux.intel.com> Link: https://lore.kernel.org/r/20250610232010.162191-8-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc(a)google.com> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 1b8b0642fc2d..ef20184b8b11 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2663,11 +2663,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (vmx->nested.nested_run_pending && (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) { kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); - vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl & - vmx_get_supported_debugctl(vcpu, false)); + vmx_guest_debugctl_write(vcpu, vmcs12->guest_ia32_debugctl & + vmx_get_supported_debugctl(vcpu, false)); } else { kvm_set_dr(vcpu, 7, vcpu->arch.dr7); - vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.pre_vmenter_debugctl); + vmx_guest_debugctl_write(vcpu, vmx->nested.pre_vmenter_debugctl); } if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending || !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))) @@ -3532,7 +3532,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, if (!vmx->nested.nested_run_pending || !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) - vmx->nested.pre_vmenter_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); + vmx->nested.pre_vmenter_debugctl = vmx_guest_debugctl_read(); if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending || !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))) @@ -4806,7 +4806,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, __vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR); kvm_set_dr(vcpu, 7, 0x400); - vmcs_write64(GUEST_IA32_DEBUGCTL, 0); + vmx_guest_debugctl_write(vcpu, 0); if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, vmcs12->vm_exit_msr_load_count)) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index bbf4509f32d0..0b173602821b 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -653,11 +653,11 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu) */ static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu) { - u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL); + u64 data = vmx_guest_debugctl_read(); if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) { data &= ~DEBUGCTLMSR_LBR; - vmcs_write64(GUEST_IA32_DEBUGCTL, data); + vmx_guest_debugctl_write(vcpu, data); } } @@ -730,7 +730,7 @@ void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu) if (!lbr_desc->event) { vmx_disable_lbr_msrs_passthrough(vcpu); - if (vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR) + if (vmx_guest_debugctl_read() & DEBUGCTLMSR_LBR) goto warn; if (test_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use)) goto warn; @@ -752,7 +752,7 @@ void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu) static void intel_pmu_cleanup(struct kvm_vcpu *vcpu) { - if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR)) + if (!(vmx_guest_debugctl_read() & DEBUGCTLMSR_LBR)) intel_pmu_release_guest_lbr_event(vcpu); } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 6a8b78e954cd..a77d325fe78b 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2149,7 +2149,7 @@ int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vmx->pt_desc.guest.addr_a[index / 2]; break; case MSR_IA32_DEBUGCTLMSR: - msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL); + msr_info->data = vmx_guest_debugctl_read(); break; default: find_uret_msr: @@ -2283,7 +2283,8 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) VM_EXIT_SAVE_DEBUG_CONTROLS) get_vmcs12(vcpu)->guest_ia32_debugctl = data; - vmcs_write64(GUEST_IA32_DEBUGCTL, data); + vmx_guest_debugctl_write(vcpu, data); + if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event && (data & DEBUGCTLMSR_LBR)) intel_pmu_create_guest_lbr_event(vcpu); @@ -4798,7 +4799,8 @@ static void init_vmcs(struct vcpu_vmx *vmx) vmcs_write32(GUEST_SYSENTER_CS, 0); vmcs_writel(GUEST_SYSENTER_ESP, 0); vmcs_writel(GUEST_SYSENTER_EIP, 0); - vmcs_write64(GUEST_IA32_DEBUGCTL, 0); + + vmx_guest_debugctl_write(&vmx->vcpu, 0); if (cpu_has_vmx_tpr_shadow()) { vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 392e66c7e5fe..c20a4185d10a 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -417,6 +417,16 @@ void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu); u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated); bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated); +static inline void vmx_guest_debugctl_write(struct kvm_vcpu *vcpu, u64 val) +{ + vmcs_write64(GUEST_IA32_DEBUGCTL, val); +} + +static inline u64 vmx_guest_debugctl_read(void) +{ + return vmcs_read64(GUEST_IA32_DEBUGCTL); +} + /* * Note, early Intel manuals have the write-low and read-high bitmap offsets * the wrong way round. The bitmaps control MSRs 0x00000000-0x00001fff and

4 months, 2 weeks

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror August 2025