From: Len Brown len.brown@intel.com
Under some conditions, MONITOR wakeups on Lunar Lake processors can be lost, resulting in significant user-visible delays.
Add LunarLake to X86_BUG_MONITOR so that wake_up_idle_cpu() always sends an IPI, avoiding this potential delay.
Also update the X86_BUG_MONITOR workaround to handle the new smp_kick_mwait_play_dead() path.
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219364
Cc: stable@vger.kernel.org # 6.11 Signed-off-by: Len Brown len.brown@intel.com --- arch/x86/kernel/cpu/intel.c | 3 ++- arch/x86/kernel/smpboot.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index e7656cbef68d..aa63f5f780a0 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -586,7 +586,8 @@ static void init_intel(struct cpuinfo_x86 *c) c->x86_vfm == INTEL_WESTMERE_EX)) set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
- if (boot_cpu_has(X86_FEATURE_MWAIT) && c->x86_vfm == INTEL_ATOM_GOLDMONT) + if (boot_cpu_has(X86_FEATURE_MWAIT) && + (c->x86_vfm == INTEL_ATOM_GOLDMONT || c->x86_vfm == INTEL_LUNARLAKE_M)) set_cpu_bug(c, X86_BUG_MONITOR);
#ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 766f092dab80..910cb2d72c13 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1377,6 +1377,9 @@ void smp_kick_mwait_play_dead(void) for (i = 0; READ_ONCE(md->status) != newstate && i < 1000; i++) { /* Bring it out of mwait */ WRITE_ONCE(md->control, newstate); + /* If MONITOR unreliable, send IPI */ + if (boot_cpu_has_bug(X86_BUG_MONITOR)) + __apic_send_IPI(cpu, RESCHEDULE_VECTOR); udelay(5); }
On Fri, Nov 8, 2024 at 2:52 PM Len Brown lenb@kernel.org wrote:
From: Len Brown len.brown@intel.com
Under some conditions, MONITOR wakeups on Lunar Lake processors can be lost, resulting in significant user-visible delays.
Add LunarLake to X86_BUG_MONITOR so that wake_up_idle_cpu() always sends an IPI, avoiding this potential delay.
Also update the X86_BUG_MONITOR workaround to handle the new smp_kick_mwait_play_dead() path.
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219364
Cc: stable@vger.kernel.org # 6.11 Signed-off-by: Len Brown len.brown@intel.com
Overall
Reviewed-by: Rafael J. Wysocki rafael.j.wysocki@intel.com
arch/x86/kernel/cpu/intel.c | 3 ++- arch/x86/kernel/smpboot.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index e7656cbef68d..aa63f5f780a0 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -586,7 +586,8 @@ static void init_intel(struct cpuinfo_x86 *c) c->x86_vfm == INTEL_WESTMERE_EX)) set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
if (boot_cpu_has(X86_FEATURE_MWAIT) && c->x86_vfm == INTEL_ATOM_GOLDMONT)
if (boot_cpu_has(X86_FEATURE_MWAIT) &&
(c->x86_vfm == INTEL_ATOM_GOLDMONT || c->x86_vfm == INTEL_LUNARLAKE_M)) set_cpu_bug(c, X86_BUG_MONITOR);
#ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 766f092dab80..910cb2d72c13 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1377,6 +1377,9 @@ void smp_kick_mwait_play_dead(void) for (i = 0; READ_ONCE(md->status) != newstate && i < 1000; i++) { /* Bring it out of mwait */ WRITE_ONCE(md->control, newstate);
/* If MONITOR unreliable, send IPI */
if (boot_cpu_has_bug(X86_BUG_MONITOR))
__apic_send_IPI(cpu, RESCHEDULE_VECTOR);
The __apic_send_IPI() call could be wrapped into something like __native_smp_send_reschedule() to underline the analogy between this and what happens in native_smp_send_reschedule().
It is still fine as is though IMV.
udelay(5); }
--
On Fri, Nov 08, 2024 at 08:49:31AM -0500, Len Brown wrote:
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 766f092dab80..910cb2d72c13 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1377,6 +1377,9 @@ void smp_kick_mwait_play_dead(void) for (i = 0; READ_ONCE(md->status) != newstate && i < 1000; i++) { /* Bring it out of mwait */ WRITE_ONCE(md->control, newstate);
/* If MONITOR unreliable, send IPI */
if (boot_cpu_has_bug(X86_BUG_MONITOR))
}__apic_send_IPI(cpu, RESCHEDULE_VECTOR); udelay(5);
Going over that code again, mwait_play_dead() is doing __mwait(.exc=0) with IRQs disabled.
So that IPI you're trying to send there won't do no nothing :-/
Now that comment there says MCE/NMI/SMI are still open (non-maskable etc.) so perhaps prod it on the NMI vector?
This does seem to suggest the above code path wasn't actually tested. Perhaps mark your local machine with BUG_MONITOR, remove the md->control WRITE_ONCE() and try kexec to test it?
Thomas, any other thoughts?
On Mon, Nov 11 2024 at 17:23, Peter Zijlstra wrote:
On Fri, Nov 08, 2024 at 08:49:31AM -0500, Len Brown wrote:
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 766f092dab80..910cb2d72c13 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1377,6 +1377,9 @@ void smp_kick_mwait_play_dead(void) for (i = 0; READ_ONCE(md->status) != newstate && i < 1000; i++) { /* Bring it out of mwait */ WRITE_ONCE(md->control, newstate);
/* If MONITOR unreliable, send IPI */
if (boot_cpu_has_bug(X86_BUG_MONITOR))
}__apic_send_IPI(cpu, RESCHEDULE_VECTOR); udelay(5);
Going over that code again, mwait_play_dead() is doing __mwait(.exc=0) with IRQs disabled.
And the APIC is shut down. So it won't react on the IPI either.
So that IPI you're trying to send there won't do no nothing :-/
Now that comment there says MCE/NMI/SMI are still open (non-maskable etc.) so perhaps prod it on the NMI vector?
This does seem to suggest the above code path wasn't actually tested.
I'm not sure whether that's just a suggestion :)
Perhaps mark your local machine with BUG_MONITOR, remove the md->control WRITE_ONCE() and try kexec to test it?
Thomas, any other thoughts?
NMI should work. See exc_nmi():
if (arch_cpu_is_offline(smp_processor_id())) { if (microcode_nmi_handler_enabled()) microcode_offline_nmi_handler(); return; }
Thanks,
tglx
On Fri, Nov 08 2024 at 08:49, Len Brown wrote:
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index e7656cbef68d..aa63f5f780a0 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -586,7 +586,8 @@ static void init_intel(struct cpuinfo_x86 *c) c->x86_vfm == INTEL_WESTMERE_EX)) set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
- if (boot_cpu_has(X86_FEATURE_MWAIT) && c->x86_vfm == INTEL_ATOM_GOLDMONT)
- if (boot_cpu_has(X86_FEATURE_MWAIT) &&
(c->x86_vfm == INTEL_ATOM_GOLDMONT || c->x86_vfm == INTEL_LUNARLAKE_M))
This indentation is bogus.
set_cpu_bug(c, X86_BUG_MONITOR);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 766f092dab80..910cb2d72c13 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1377,6 +1377,9 @@ void smp_kick_mwait_play_dead(void) for (i = 0; READ_ONCE(md->status) != newstate && i < 1000; i++) { /* Bring it out of mwait */ WRITE_ONCE(md->control, newstate);
/* If MONITOR unreliable, send IPI */
if (boot_cpu_has_bug(X86_BUG_MONITOR))
__apic_send_IPI(cpu, RESCHEDULE_VECTOR);
How is this supposed to work?
The local APIC of the offline CPU is shut down and only responds to INIT, NMI, SMI, and SIPI.
Even if the APIC would react to the IPI, then the offline CPU would not notice as is has interrupts disabled when it reaches mwait_play_dead().
Seriously?
Thanks,
tglx
linux-stable-mirror@lists.linaro.org