The patch below does not apply to the 6.6-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to stable@vger.kernel.org.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y git checkout FETCH_HEAD git cherry-pick -x 9f98a4f4e7216dbe366010b4cdcab6b220f229c4 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to 'stable@vger.kernel.org' --in-reply-to '2025040844-busload-dumpling-45ff@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9f98a4f4e7216dbe366010b4cdcab6b220f229c4 Mon Sep 17 00:00:00 2001 From: Vishal Annapurve vannapurve@google.com Date: Fri, 28 Feb 2025 01:44:15 +0000 Subject: [PATCH] x86/tdx: Fix arch_safe_halt() execution for TDX VMs
Direct HLT instruction execution causes #VEs for TDX VMs which is routed to hypervisor via TDCALL. If HLT is executed in STI-shadow, resulting #VE handler will enable interrupts before TDCALL is routed to hypervisor leading to missed wakeup events, as current TDX spec doesn't expose interruptibility state information to allow #VE handler to selectively enable interrupts.
Commit bfe6ed0c6727 ("x86/tdx: Add HLT support for TDX guests") prevented the idle routines from executing HLT instruction in STI-shadow. But it missed the paravirt routine which can be reached via this path as an example:
kvm_wait() => safe_halt() => raw_safe_halt() => arch_safe_halt() => irq.safe_halt() => pv_native_safe_halt()
To reliably handle arch_safe_halt() for TDX VMs, introduce explicit dependency on CONFIG_PARAVIRT and override paravirt halt()/safe_halt() routines with TDX-safe versions that execute direct TDCALL and needed interrupt flag updates. Executing direct TDCALL brings in additional benefit of avoiding HLT related #VEs altogether.
As tested by Ryan Afranji:
"Tested with the specjbb2015 benchmark. It has heavy lock contention which leads to many halt calls. TDX VMs suffered a poor score before this patchset.
Verified the major performance improvement with this patchset applied."
Fixes: bfe6ed0c6727 ("x86/tdx: Add HLT support for TDX guests") Signed-off-by: Vishal Annapurve vannapurve@google.com Signed-off-by: Ingo Molnar mingo@kernel.org Reviewed-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Tested-by: Ryan Afranji afranji@google.com Cc: Andy Lutomirski luto@kernel.org Cc: Brian Gerst brgerst@gmail.com Cc: Juergen Gross jgross@suse.com Cc: H. Peter Anvin hpa@zytor.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Josh Poimboeuf jpoimboe@redhat.com Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250228014416.3925664-3-vannapurve@google.com
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 05b4eca156cf..f614c0522a0b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -878,6 +878,7 @@ config INTEL_TDX_GUEST depends on X86_64 && CPU_SUP_INTEL depends on X86_X2APIC depends on EFI_STUB + depends on PARAVIRT select ARCH_HAS_CC_PLATFORM select X86_MEM_ENCRYPT select X86_MCE diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 7772b01ab738..aa0eb4057226 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -14,6 +14,7 @@ #include <asm/ia32.h> #include <asm/insn.h> #include <asm/insn-eval.h> +#include <asm/paravirt_types.h> #include <asm/pgtable.h> #include <asm/set_memory.h> #include <asm/traps.h> @@ -398,7 +399,7 @@ static int handle_halt(struct ve_info *ve) return ve_instr_len(ve); }
-void __cpuidle tdx_safe_halt(void) +void __cpuidle tdx_halt(void) { const bool irq_disabled = false;
@@ -409,6 +410,16 @@ void __cpuidle tdx_safe_halt(void) WARN_ONCE(1, "HLT instruction emulation failed\n"); }
+static void __cpuidle tdx_safe_halt(void) +{ + tdx_halt(); + /* + * "__cpuidle" section doesn't support instrumentation, so stick + * with raw_* variant that avoids tracing hooks. + */ + raw_local_irq_enable(); +} + static int read_msr(struct pt_regs *regs, struct ve_info *ve) { struct tdx_module_args args = { @@ -1109,6 +1120,19 @@ void __init tdx_early_init(void) x86_platform.guest.enc_kexec_begin = tdx_kexec_begin; x86_platform.guest.enc_kexec_finish = tdx_kexec_finish;
+ /* + * Avoid "sti;hlt" execution in TDX guests as HLT induces a #VE that + * will enable interrupts before HLT TDCALL invocation if executed + * in STI-shadow, possibly resulting in missed wakeup events. + * + * Modify all possible HLT execution paths to use TDX specific routines + * that directly execute TDCALL and toggle the interrupt state as + * needed after TDCALL completion. This also reduces HLT related #VEs + * in addition to having a reliable halt logic execution. + */ + pv_ops.irq.safe_halt = tdx_safe_halt; + pv_ops.irq.halt = tdx_halt; + /* * TDX intercepts the RDMSR to read the X2APIC ID in the parallel * bringup low level code. That raises #VE which cannot be handled diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 65394aa9b49f..4a1922ec80cf 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -58,7 +58,7 @@ void tdx_get_ve_info(struct ve_info *ve);
bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve);
-void tdx_safe_halt(void); +void tdx_halt(void);
bool tdx_early_handle_ve(struct pt_regs *regs);
@@ -72,7 +72,7 @@ void __init tdx_dump_td_ctls(u64 td_ctls); #else
static inline void tdx_early_init(void) { }; -static inline void tdx_safe_halt(void) { }; +static inline void tdx_halt(void) { };
static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; }
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 91f6ff618852..962c3ce39323 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -939,7 +939,7 @@ void __init select_idle_routine(void) static_call_update(x86_idle, mwait_idle); } else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) { pr_info("using TDX aware idle routine\n"); - static_call_update(x86_idle, tdx_safe_halt); + static_call_update(x86_idle, tdx_halt); } else { static_call_update(x86_idle, default_idle); }
Direct HLT instruction execution causes #VEs for TDX VMs which is routed to hypervisor via TDCALL. If HLT is executed in STI-shadow, resulting #VE handler will enable interrupts before TDCALL is routed to hypervisor leading to missed wakeup events, as current TDX spec doesn't expose interruptibility state information to allow #VE handler to selectively enable interrupts.
Commit bfe6ed0c6727 ("x86/tdx: Add HLT support for TDX guests") prevented the idle routines from executing HLT instruction in STI-shadow. But it missed the paravirt routine which can be reached via this path as an example:
kvm_wait() => safe_halt() => raw_safe_halt() => arch_safe_halt() => irq.safe_halt() => pv_native_safe_halt()
To reliably handle arch_safe_halt() for TDX VMs, introduce explicit dependency on CONFIG_PARAVIRT and override paravirt halt()/safe_halt() routines with TDX-safe versions that execute direct TDCALL and needed interrupt flag updates. Executing direct TDCALL brings in additional benefit of avoiding HLT related #VEs altogether.
As tested by Ryan Afranji:
"Tested with the specjbb2015 benchmark. It has heavy lock contention which leads to many halt calls. TDX VMs suffered a poor score before this patchset.
Verified the major performance improvement with this patchset applied."
Fixes: bfe6ed0c6727 ("x86/tdx: Add HLT support for TDX guests") Signed-off-by: Vishal Annapurve vannapurve@google.com Signed-off-by: Ingo Molnar mingo@kernel.org Reviewed-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Tested-by: Ryan Afranji afranji@google.com Cc: Andy Lutomirski luto@kernel.org Cc: Brian Gerst brgerst@gmail.com Cc: Juergen Gross jgross@suse.com Cc: H. Peter Anvin hpa@zytor.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Josh Poimboeuf jpoimboe@redhat.com Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250228014416.3925664-3-vannapurve@google.com (cherry picked from commit 9f98a4f4e7216dbe366010b4cdcab6b220f229c4) --- arch/x86/Kconfig | 1 + arch/x86/coco/tdx/tdx.c | 26 +++++++++++++++++++++++++- arch/x86/include/asm/tdx.h | 4 ++-- arch/x86/kernel/process.c | 2 +- 4 files changed, 29 insertions(+), 4 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a06fab5016fd..be36ee4f6616 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -881,6 +881,7 @@ config INTEL_TDX_GUEST depends on X86_64 && CPU_SUP_INTEL depends on X86_X2APIC depends on EFI_STUB + depends on PARAVIRT select ARCH_HAS_CC_PLATFORM select X86_MEM_ENCRYPT select X86_MCE diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 2f67e196a2ea..98d0ee9600eb 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -13,6 +13,7 @@ #include <asm/ia32.h> #include <asm/insn.h> #include <asm/insn-eval.h> +#include <asm/paravirt_types.h> #include <asm/pgtable.h> #include <asm/traps.h>
@@ -334,7 +335,7 @@ static int handle_halt(struct ve_info *ve) return ve_instr_len(ve); }
-void __cpuidle tdx_safe_halt(void) +void __cpuidle tdx_halt(void) { const bool irq_disabled = false;
@@ -345,6 +346,16 @@ void __cpuidle tdx_safe_halt(void) WARN_ONCE(1, "HLT instruction emulation failed\n"); }
+static void __cpuidle tdx_safe_halt(void) +{ + tdx_halt(); + /* + * "__cpuidle" section doesn't support instrumentation, so stick + * with raw_* variant that avoids tracing hooks. + */ + raw_local_irq_enable(); +} + static int read_msr(struct pt_regs *regs, struct ve_info *ve) { struct tdx_hypercall_args args = { @@ -888,6 +899,19 @@ void __init tdx_early_init(void) x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required; x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
+ /* + * Avoid "sti;hlt" execution in TDX guests as HLT induces a #VE that + * will enable interrupts before HLT TDCALL invocation if executed + * in STI-shadow, possibly resulting in missed wakeup events. + * + * Modify all possible HLT execution paths to use TDX specific routines + * that directly execute TDCALL and toggle the interrupt state as + * needed after TDCALL completion. This also reduces HLT related #VEs + * in addition to having a reliable halt logic execution. + */ + pv_ops.irq.safe_halt = tdx_safe_halt; + pv_ops.irq.halt = tdx_halt; + /* * TDX intercepts the RDMSR to read the X2APIC ID in the parallel * bringup low level code. That raises #VE which cannot be handled diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 603e6d1e9d4a..c632f09f0c97 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -46,7 +46,7 @@ void tdx_get_ve_info(struct ve_info *ve);
bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve);
-void tdx_safe_halt(void); +void tdx_halt(void);
bool tdx_early_handle_ve(struct pt_regs *regs);
@@ -55,7 +55,7 @@ int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport); #else
static inline void tdx_early_init(void) { }; -static inline void tdx_safe_halt(void) { }; +static inline void tdx_halt(void) { };
static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; }
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5351f293f770..64128a501446 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -950,7 +950,7 @@ void select_idle_routine(const struct cpuinfo_x86 *c) static_call_update(x86_idle, mwait_idle); } else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) { pr_info("using TDX aware idle routine\n"); - static_call_update(x86_idle, tdx_safe_halt); + static_call_update(x86_idle, tdx_halt); } else static_call_update(x86_idle, default_idle); }
[ Sasha's backport helper bot ]
Hi,
Summary of potential issues: ⚠️ Found matching upstream commit but patch is missing proper reference to it
Found matching upstream commit: 9f98a4f4e7216dbe366010b4cdcab6b220f229c4
Status in newer kernel trees: 6.14.y | Present (different SHA1: 8defd0d8678b) 6.13.y | Present (different SHA1: f88759f8f742) 6.12.y | Present (different SHA1: 7aff5ffe2c87)
Note: The patch differs from the upstream commit: --- 1: 9f98a4f4e7216 ! 1: 882c95e29bbba x86/tdx: Fix arch_safe_halt() execution for TDX VMs @@ Commit message Cc: Josh Poimboeuf jpoimboe@redhat.com Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250228014416.3925664-3-vannapurve@google.com + (cherry picked from commit 9f98a4f4e7216dbe366010b4cdcab6b220f229c4)
## arch/x86/Kconfig ## @@ arch/x86/Kconfig: config INTEL_TDX_GUEST @@ arch/x86/coco/tdx/tdx.c #include <asm/insn-eval.h> +#include <asm/paravirt_types.h> #include <asm/pgtable.h> - #include <asm/set_memory.h> #include <asm/traps.h> + @@ arch/x86/coco/tdx/tdx.c: static int handle_halt(struct ve_info *ve) return ve_instr_len(ve); } @@ arch/x86/coco/tdx/tdx.c: void __cpuidle tdx_safe_halt(void) + static int read_msr(struct pt_regs *regs, struct ve_info *ve) { - struct tdx_module_args args = { + struct tdx_hypercall_args args = { @@ arch/x86/coco/tdx/tdx.c: void __init tdx_early_init(void) - x86_platform.guest.enc_kexec_begin = tdx_kexec_begin; - x86_platform.guest.enc_kexec_finish = tdx_kexec_finish; + x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required; + x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
+ /* + * Avoid "sti;hlt" execution in TDX guests as HLT induces a #VE that @@ arch/x86/include/asm/tdx.h: void tdx_get_ve_info(struct ve_info *ve);
bool tdx_early_handle_ve(struct pt_regs *regs);
-@@ arch/x86/include/asm/tdx.h: void __init tdx_dump_td_ctls(u64 td_ctls); +@@ arch/x86/include/asm/tdx.h: int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport); #else
static inline void tdx_early_init(void) { }; @@ arch/x86/include/asm/tdx.h: void __init tdx_dump_td_ctls(u64 td_ctls);
## arch/x86/kernel/process.c ## -@@ arch/x86/kernel/process.c: void __init select_idle_routine(void) +@@ arch/x86/kernel/process.c: void select_idle_routine(const struct cpuinfo_x86 *c) static_call_update(x86_idle, mwait_idle); } else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) { pr_info("using TDX aware idle routine\n"); - static_call_update(x86_idle, tdx_safe_halt); + static_call_update(x86_idle, tdx_halt); - } else { + } else static_call_update(x86_idle, default_idle); - } + } ---
Results of testing on various branches:
| Branch | Patch Apply | Build Test | |---------------------------|-------------|------------| | stable/linux-6.6.y | Success | Success |
linux-stable-mirror@lists.linaro.org