The following commit has been merged into the x86/core branch of tip:
Commit-ID: 1f5e7eb7868e42227ac426c96d437117e6e06e8e
Gitweb: https://git.kernel.org/tip/1f5e7eb7868e42227ac426c96d437117e6e06e8e
Author: Thomas Gleixner <tglx(a)linutronix.de>
AuthorDate: Wed, 26 Apr 2023 18:37:00 +02:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Tue, 20 Jun 2023 14:51:46 +02:00
x86/smp: Make stop_other_cpus() more robust
Tony reported intermittent lockups on poweroff. His analysis identified the
wbinvd() in stop_this_cpu() as the culprit. This was added to ensure that
on SME enabled machines a kexec() does not leave any stale data in the
caches when switching from encrypted to non-encrypted mode or vice versa.
That wbinvd() is conditional on the SME feature bit which is read directly
from CPUID. But that readout does not check whether the CPUID leaf is
available or not. If it's not available the CPU will return the value of
the highest supported leaf instead. Depending on the content the "SME" bit
might be set or not.
That's incorrect but harmless. Making the CPUID readout conditional makes
the observed hangs go away, but it does not fix the underlying problem:
CPU0 CPU1
stop_other_cpus()
send_IPIs(REBOOT); stop_this_cpu()
while (num_online_cpus() > 1); set_online(false);
proceed... -> hang
wbinvd()
WBINVD is an expensive operation and if multiple CPUs issue it at the same
time the resulting delays are even larger.
But CPU0 already observed num_online_cpus() going down to 1 and proceeds
which causes the system to hang.
This issue exists independent of WBINVD, but the delays caused by WBINVD
make it more prominent.
Make this more robust by adding a cpumask which is initialized to the
online CPU mask before sending the IPIs and CPUs clear their bit in
stop_this_cpu() after the WBINVD completed. Check for that cpumask to
become empty in stop_other_cpus() instead of watching num_online_cpus().
The cpumask cannot plug all holes either, but it's better than a raw
counter and allows to restrict the NMI fallback IPI to be sent only the
CPUs which have not reported within the timeout window.
Fixes: 08f253ec3767 ("x86/cpu: Clear SME feature flag when not in use")
Reported-by: Tony Battersby <tonyb(a)cybernetics.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Reviewed-by: Ashok Raj <ashok.raj(a)intel.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/3817d810-e0f1-8ef8-0bbd-663b919ca49b@cybernetic…
Link: https://lore.kernel.org/r/87h6r770bv.ffs@tglx
---
arch/x86/include/asm/cpu.h | 2 +-
arch/x86/kernel/process.c | 23 ++++++++++++--
arch/x86/kernel/smp.c | 62 ++++++++++++++++++++++++-------------
3 files changed, 64 insertions(+), 23 deletions(-)
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index 78796b9..9ba3c3d 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -98,4 +98,6 @@ extern u64 x86_read_arch_cap_msr(void);
int intel_find_matching_signature(void *mc, unsigned int csig, int cpf);
int intel_microcode_sanity_check(void *mc, bool print_err, int hdr_type);
+extern struct cpumask cpus_stop_mask;
+
#endif /* _ASM_X86_CPU_H */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index dac41a0..05924bc 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -759,13 +759,23 @@ bool xen_set_default_idle(void)
}
#endif
+struct cpumask cpus_stop_mask;
+
void __noreturn stop_this_cpu(void *dummy)
{
+ unsigned int cpu = smp_processor_id();
+
local_irq_disable();
+
/*
- * Remove this CPU:
+ * Remove this CPU from the online mask and disable it
+ * unconditionally. This might be redundant in case that the reboot
+ * vector was handled late and stop_other_cpus() sent an NMI.
+ *
+ * According to SDM and APM NMIs can be accepted even after soft
+ * disabling the local APIC.
*/
- set_cpu_online(smp_processor_id(), false);
+ set_cpu_online(cpu, false);
disable_local_APIC();
mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
@@ -783,6 +793,15 @@ void __noreturn stop_this_cpu(void *dummy)
*/
if (cpuid_eax(0x8000001f) & BIT(0))
native_wbinvd();
+
+ /*
+ * This brings a cache line back and dirties it, but
+ * native_stop_other_cpus() will overwrite cpus_stop_mask after it
+ * observed that all CPUs reported stop. This write will invalidate
+ * the related cache line on this CPU.
+ */
+ cpumask_clear_cpu(cpu, &cpus_stop_mask);
+
for (;;) {
/*
* Use native_halt() so that memory contents don't change
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 375b33e..935bc65 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -27,6 +27,7 @@
#include <asm/mmu_context.h>
#include <asm/proto.h>
#include <asm/apic.h>
+#include <asm/cpu.h>
#include <asm/idtentry.h>
#include <asm/nmi.h>
#include <asm/mce.h>
@@ -146,31 +147,43 @@ static int register_stop_handler(void)
static void native_stop_other_cpus(int wait)
{
- unsigned long flags;
- unsigned long timeout;
+ unsigned int cpu = smp_processor_id();
+ unsigned long flags, timeout;
if (reboot_force)
return;
- /*
- * Use an own vector here because smp_call_function
- * does lots of things not suitable in a panic situation.
- */
+ /* Only proceed if this is the first CPU to reach this code */
+ if (atomic_cmpxchg(&stopping_cpu, -1, cpu) != -1)
+ return;
/*
- * We start by using the REBOOT_VECTOR irq.
- * The irq is treated as a sync point to allow critical
- * regions of code on other cpus to release their spin locks
- * and re-enable irqs. Jumping straight to an NMI might
- * accidentally cause deadlocks with further shutdown/panic
- * code. By syncing, we give the cpus up to one second to
- * finish their work before we force them off with the NMI.
+ * 1) Send an IPI on the reboot vector to all other CPUs.
+ *
+ * The other CPUs should react on it after leaving critical
+ * sections and re-enabling interrupts. They might still hold
+ * locks, but there is nothing which can be done about that.
+ *
+ * 2) Wait for all other CPUs to report that they reached the
+ * HLT loop in stop_this_cpu()
+ *
+ * 3) If #2 timed out send an NMI to the CPUs which did not
+ * yet report
+ *
+ * 4) Wait for all other CPUs to report that they reached the
+ * HLT loop in stop_this_cpu()
+ *
+ * #3 can obviously race against a CPU reaching the HLT loop late.
+ * That CPU will have reported already and the "have all CPUs
+ * reached HLT" condition will be true despite the fact that the
+ * other CPU is still handling the NMI. Again, there is no
+ * protection against that as "disabled" APICs still respond to
+ * NMIs.
*/
- if (num_online_cpus() > 1) {
- /* did someone beat us here? */
- if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id()) != -1)
- return;
+ cpumask_copy(&cpus_stop_mask, cpu_online_mask);
+ cpumask_clear_cpu(cpu, &cpus_stop_mask);
+ if (!cpumask_empty(&cpus_stop_mask)) {
/* sync above data before sending IRQ */
wmb();
@@ -183,12 +196,12 @@ static void native_stop_other_cpus(int wait)
* CPUs reach shutdown state.
*/
timeout = USEC_PER_SEC;
- while (num_online_cpus() > 1 && timeout--)
+ while (!cpumask_empty(&cpus_stop_mask) && timeout--)
udelay(1);
}
/* if the REBOOT_VECTOR didn't work, try with the NMI */
- if (num_online_cpus() > 1) {
+ if (!cpumask_empty(&cpus_stop_mask)) {
/*
* If NMI IPI is enabled, try to register the stop handler
* and send the IPI. In any case try to wait for the other
@@ -200,7 +213,8 @@ static void native_stop_other_cpus(int wait)
pr_emerg("Shutting down cpus with NMI\n");
- apic_send_IPI_allbutself(NMI_VECTOR);
+ for_each_cpu(cpu, &cpus_stop_mask)
+ apic->send_IPI(cpu, NMI_VECTOR);
}
/*
* Don't wait longer than 10 ms if the caller didn't
@@ -208,7 +222,7 @@ static void native_stop_other_cpus(int wait)
* one or more CPUs do not reach shutdown state.
*/
timeout = USEC_PER_MSEC * 10;
- while (num_online_cpus() > 1 && (wait || timeout--))
+ while (!cpumask_empty(&cpus_stop_mask) && (wait || timeout--))
udelay(1);
}
@@ -216,6 +230,12 @@ static void native_stop_other_cpus(int wait)
disable_local_APIC();
mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
local_irq_restore(flags);
+
+ /*
+ * Ensure that the cpus_stop_mask cache lines are invalidated on
+ * the other CPUs. See comment vs. SME in stop_this_cpu().
+ */
+ cpumask_clear(&cpus_stop_mask);
}
/*
The following commit has been merged into the x86/core branch of tip:
Commit-ID: 9b040453d4440659f33dc6f0aa26af418ebfe70b
Gitweb: https://git.kernel.org/tip/9b040453d4440659f33dc6f0aa26af418ebfe70b
Author: Tony Battersby <tonyb(a)cybernetics.com>
AuthorDate: Thu, 15 Jun 2023 22:33:52 +02:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Tue, 20 Jun 2023 14:51:46 +02:00
x86/smp: Dont access non-existing CPUID leaf
stop_this_cpu() tests CPUID leaf 0x8000001f::EAX unconditionally. Intel
CPUs return the content of the highest supported leaf when a non-existing
leaf is read, while AMD CPUs return all zeros for unsupported leafs.
So the result of the test on Intel CPUs is lottery.
While harmless it's incorrect and causes the conditional wbinvd() to be
issued where not required.
Check whether the leaf is supported before reading it.
[ tglx: Adjusted changelog ]
Fixes: 08f253ec3767 ("x86/cpu: Clear SME feature flag when not in use")
Signed-off-by: Tony Battersby <tonyb(a)cybernetics.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Mario Limonciello <mario.limonciello(a)amd.com>
Reviewed-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/3817d810-e0f1-8ef8-0bbd-663b919ca49b@cybernetics.…
Link: https://lore.kernel.org/r/20230615193330.322186388@linutronix.de
---
arch/x86/kernel/process.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 05924bc..ff9b80a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -763,6 +763,7 @@ struct cpumask cpus_stop_mask;
void __noreturn stop_this_cpu(void *dummy)
{
+ struct cpuinfo_x86 *c = this_cpu_ptr(&cpu_info);
unsigned int cpu = smp_processor_id();
local_irq_disable();
@@ -777,7 +778,7 @@ void __noreturn stop_this_cpu(void *dummy)
*/
set_cpu_online(cpu, false);
disable_local_APIC();
- mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
+ mcheck_cpu_clear(c);
/*
* Use wbinvd on processors that support SME. This provides support
@@ -791,7 +792,7 @@ void __noreturn stop_this_cpu(void *dummy)
* Test the CPUID bit directly because the machine might've cleared
* X86_FEATURE_SME due to cmdline options.
*/
- if (cpuid_eax(0x8000001f) & BIT(0))
+ if (c->extended_cpuid_level >= 0x8000001f && (cpuid_eax(0x8000001f) & BIT(0)))
native_wbinvd();
/*
The following commit has been merged into the x86/core branch of tip:
Commit-ID: 2affa6d6db28855e6340b060b809c23477aa546e
Gitweb: https://git.kernel.org/tip/2affa6d6db28855e6340b060b809c23477aa546e
Author: Thomas Gleixner <tglx(a)linutronix.de>
AuthorDate: Thu, 15 Jun 2023 22:33:54 +02:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Tue, 20 Jun 2023 14:51:46 +02:00
x86/smp: Remove pointless wmb()s from native_stop_other_cpus()
The wmb()s before sending the IPIs are not synchronizing anything.
If at all then the apic IPI functions have to provide or act as appropriate
barriers.
Remove these cargo cult barriers which have no explanation of what they are
synchronizing.
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20230615193330.378358382@linutronix.de
---
arch/x86/kernel/smp.c | 6 ------
1 file changed, 6 deletions(-)
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 935bc65..d842875 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -184,9 +184,6 @@ static void native_stop_other_cpus(int wait)
cpumask_clear_cpu(cpu, &cpus_stop_mask);
if (!cpumask_empty(&cpus_stop_mask)) {
- /* sync above data before sending IRQ */
- wmb();
-
apic_send_IPI_allbutself(REBOOT_VECTOR);
/*
@@ -208,9 +205,6 @@ static void native_stop_other_cpus(int wait)
* CPUs to stop.
*/
if (!smp_no_nmi_ipi && !register_stop_handler()) {
- /* Sync above data before sending IRQ */
- wmb();
-
pr_emerg("Shutting down cpus with NMI\n");
for_each_cpu(cpu, &cpus_stop_mask)
The following commit has been merged into the x86/core branch of tip:
Commit-ID: f9c9987bf52f4e42e940ae217333ebb5a4c3b506
Gitweb: https://git.kernel.org/tip/f9c9987bf52f4e42e940ae217333ebb5a4c3b506
Author: Thomas Gleixner <tglx(a)linutronix.de>
AuthorDate: Thu, 15 Jun 2023 22:33:55 +02:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Tue, 20 Jun 2023 14:51:47 +02:00
x86/smp: Use dedicated cache-line for mwait_play_dead()
Monitoring idletask::thread_info::flags in mwait_play_dead() has been an
obvious choice as all what is needed is a cache line which is not written
by other CPUs.
But there is a use case where a "dead" CPU needs to be brought out of
MWAIT: kexec().
This is required as kexec() can overwrite text, pagetables, stacks and the
monitored cacheline of the original kernel. The latter causes MWAIT to
resume execution which obviously causes havoc on the kexec kernel which
results usually in triple faults.
Use a dedicated per CPU storage to prepare for that.
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Ashok Raj <ashok.raj(a)intel.com>
Reviewed-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20230615193330.434553750@linutronix.de
---
arch/x86/kernel/smpboot.c | 24 ++++++++++++++----------
1 file changed, 14 insertions(+), 10 deletions(-)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 352f0ce..c5ac5d7 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -101,6 +101,17 @@ EXPORT_PER_CPU_SYMBOL(cpu_die_map);
DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
+struct mwait_cpu_dead {
+ unsigned int control;
+ unsigned int status;
+};
+
+/*
+ * Cache line aligned data for mwait_play_dead(). Separate on purpose so
+ * that it's unlikely to be touched by other CPUs.
+ */
+static DEFINE_PER_CPU_ALIGNED(struct mwait_cpu_dead, mwait_cpu_dead);
+
/* Logical package management. We might want to allocate that dynamically */
unsigned int __max_logical_packages __read_mostly;
EXPORT_SYMBOL(__max_logical_packages);
@@ -1758,10 +1769,10 @@ EXPORT_SYMBOL_GPL(cond_wakeup_cpu0);
*/
static inline void mwait_play_dead(void)
{
+ struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead);
unsigned int eax, ebx, ecx, edx;
unsigned int highest_cstate = 0;
unsigned int highest_subcstate = 0;
- void *mwait_ptr;
int i;
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
@@ -1796,13 +1807,6 @@ static inline void mwait_play_dead(void)
(highest_subcstate - 1);
}
- /*
- * This should be a memory location in a cache line which is
- * unlikely to be touched by other processors. The actual
- * content is immaterial as it is not actually modified in any way.
- */
- mwait_ptr = ¤t_thread_info()->flags;
-
wbinvd();
while (1) {
@@ -1814,9 +1818,9 @@ static inline void mwait_play_dead(void)
* case where we return around the loop.
*/
mb();
- clflush(mwait_ptr);
+ clflush(md);
mb();
- __monitor(mwait_ptr, 0, 0);
+ __monitor(md, 0, 0);
mb();
__mwait(eax, 0);
The following commit has been merged into the x86/core branch of tip:
Commit-ID: d7893093a7417527c0d73c9832244e65c9d0114f
Gitweb: https://git.kernel.org/tip/d7893093a7417527c0d73c9832244e65c9d0114f
Author: Thomas Gleixner <tglx(a)linutronix.de>
AuthorDate: Thu, 15 Jun 2023 22:33:57 +02:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Tue, 20 Jun 2023 14:51:47 +02:00
x86/smp: Cure kexec() vs. mwait_play_dead() breakage
TLDR: It's a mess.
When kexec() is executed on a system with offline CPUs, which are parked in
mwait_play_dead() it can end up in a triple fault during the bootup of the
kexec kernel or cause hard to diagnose data corruption.
The reason is that kexec() eventually overwrites the previous kernel's text,
page tables, data and stack. If it writes to the cache line which is
monitored by a previously offlined CPU, MWAIT resumes execution and ends
up executing the wrong text, dereferencing overwritten page tables or
corrupting the kexec kernels data.
Cure this by bringing the offlined CPUs out of MWAIT into HLT.
Write to the monitored cache line of each offline CPU, which makes MWAIT
resume execution. The written control word tells the offlined CPUs to issue
HLT, which does not have the MWAIT problem.
That does not help, if a stray NMI, MCE or SMI hits the offlined CPUs as
those make it come out of HLT.
A follow up change will put them into INIT, which protects at least against
NMI and SMI.
Fixes: ea53069231f9 ("x86, hotplug: Use mwait to offline a processor, fix the legacy case")
Reported-by: Ashok Raj <ashok.raj(a)intel.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Tested-by: Ashok Raj <ashok.raj(a)intel.com>
Reviewed-by: Ashok Raj <ashok.raj(a)intel.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20230615193330.492257119@linutronix.de
---
arch/x86/include/asm/smp.h | 2 +-
arch/x86/kernel/smp.c | 5 +++-
arch/x86/kernel/smpboot.c | 59 +++++++++++++++++++++++++++++++++++++-
3 files changed, 66 insertions(+)
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 4e91054..d4ce5cb 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -132,6 +132,8 @@ void wbinvd_on_cpu(int cpu);
int wbinvd_on_all_cpus(void);
void cond_wakeup_cpu0(void);
+void smp_kick_mwait_play_dead(void);
+
void native_smp_send_reschedule(int cpu);
void native_send_call_func_ipi(const struct cpumask *mask);
void native_send_call_func_single_ipi(int cpu);
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index d842875..174d623 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -21,6 +21,7 @@
#include <linux/interrupt.h>
#include <linux/cpu.h>
#include <linux/gfp.h>
+#include <linux/kexec.h>
#include <asm/mtrr.h>
#include <asm/tlbflush.h>
@@ -157,6 +158,10 @@ static void native_stop_other_cpus(int wait)
if (atomic_cmpxchg(&stopping_cpu, -1, cpu) != -1)
return;
+ /* For kexec, ensure that offline CPUs are out of MWAIT and in HLT */
+ if (kexec_in_progress)
+ smp_kick_mwait_play_dead();
+
/*
* 1) Send an IPI on the reboot vector to all other CPUs.
*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c5ac5d7..483df04 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -53,6 +53,7 @@
#include <linux/tboot.h>
#include <linux/gfp.h>
#include <linux/cpuidle.h>
+#include <linux/kexec.h>
#include <linux/numa.h>
#include <linux/pgtable.h>
#include <linux/overflow.h>
@@ -106,6 +107,9 @@ struct mwait_cpu_dead {
unsigned int status;
};
+#define CPUDEAD_MWAIT_WAIT 0xDEADBEEF
+#define CPUDEAD_MWAIT_KEXEC_HLT 0x4A17DEAD
+
/*
* Cache line aligned data for mwait_play_dead(). Separate on purpose so
* that it's unlikely to be touched by other CPUs.
@@ -173,6 +177,10 @@ static void smp_callin(void)
{
int cpuid;
+ /* Mop up eventual mwait_play_dead() wreckage */
+ this_cpu_write(mwait_cpu_dead.status, 0);
+ this_cpu_write(mwait_cpu_dead.control, 0);
+
/*
* If waken up by an INIT in an 82489DX configuration
* cpu_callout_mask guarantees we don't get here before
@@ -1807,6 +1815,10 @@ static inline void mwait_play_dead(void)
(highest_subcstate - 1);
}
+ /* Set up state for the kexec() hack below */
+ md->status = CPUDEAD_MWAIT_WAIT;
+ md->control = CPUDEAD_MWAIT_WAIT;
+
wbinvd();
while (1) {
@@ -1824,10 +1836,57 @@ static inline void mwait_play_dead(void)
mb();
__mwait(eax, 0);
+ if (READ_ONCE(md->control) == CPUDEAD_MWAIT_KEXEC_HLT) {
+ /*
+ * Kexec is about to happen. Don't go back into mwait() as
+ * the kexec kernel might overwrite text and data including
+ * page tables and stack. So mwait() would resume when the
+ * monitor cache line is written to and then the CPU goes
+ * south due to overwritten text, page tables and stack.
+ *
+ * Note: This does _NOT_ protect against a stray MCE, NMI,
+ * SMI. They will resume execution at the instruction
+ * following the HLT instruction and run into the problem
+ * which this is trying to prevent.
+ */
+ WRITE_ONCE(md->status, CPUDEAD_MWAIT_KEXEC_HLT);
+ while(1)
+ native_halt();
+ }
+
cond_wakeup_cpu0();
}
}
+/*
+ * Kick all "offline" CPUs out of mwait on kexec(). See comment in
+ * mwait_play_dead().
+ */
+void smp_kick_mwait_play_dead(void)
+{
+ u32 newstate = CPUDEAD_MWAIT_KEXEC_HLT;
+ struct mwait_cpu_dead *md;
+ unsigned int cpu, i;
+
+ for_each_cpu_andnot(cpu, cpu_present_mask, cpu_online_mask) {
+ md = per_cpu_ptr(&mwait_cpu_dead, cpu);
+
+ /* Does it sit in mwait_play_dead() ? */
+ if (READ_ONCE(md->status) != CPUDEAD_MWAIT_WAIT)
+ continue;
+
+ /* Wait up to 5ms */
+ for (i = 0; READ_ONCE(md->status) != newstate && i < 1000; i++) {
+ /* Bring it out of mwait */
+ WRITE_ONCE(md->control, newstate);
+ udelay(5);
+ }
+
+ if (READ_ONCE(md->status) != newstate)
+ pr_err_once("CPU%u is stuck in mwait_play_dead()\n", cpu);
+ }
+}
+
void __noreturn hlt_play_dead(void)
{
if (__this_cpu_read(cpu_info.x86) >= 4)
The errata sheets for both ksz9477 and ksz9567 begin with
IMPORTANT NOTE
Multiple errata workarounds in this document call for changing PHY
registers for each PHY port. PHY registers 0x0 to 0x1F are in the
address range 0xN100 to 0xN13F, while indirect (MMD) PHY registers
are accessed via the PHY MMD Setup Register and the PHY MMD Data
Register.
Before configuring the PHY MMD registers, it is necessary to set the
PHY to 100 Mbps speed with auto-negotiation disabled by writing to
register 0xN100-0xN101. After writing the MMD registers, and after
all errata workarounds that involve PHY register settings, write
register 0xN100-0xN101 again to enable and restart auto-negotiation.
Without that explicit auto-neg restart, we do sometimes have problems
establishing link.
Rather than writing back the hardcoded 0x1340 value the errata sheet
suggests (which likely just corresponds to the most common strap
configuration), restore the original value, setting the
PORT_AUTO_NEG_RESTART bit if PORT_AUTO_NEG_ENABLE is set.
Fixes: 1fc33199185d ("net: dsa: microchip: Add PHY errata workarounds")
Cc: stable(a)vger.kernel.org
Signed-off-by: Rasmus Villemoes <linux(a)rasmusvillemoes.dk>
---
While I do believe this is a fix, I don't think it's post-rc7
material, hence targeting net-next with cc stable.
drivers/net/dsa/microchip/ksz9477.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
index bf13d47c26cf..9a712ea71ee7 100644
--- a/drivers/net/dsa/microchip/ksz9477.c
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -902,6 +902,16 @@ static void ksz9477_port_mmd_write(struct ksz_device *dev, int port,
static void ksz9477_phy_errata_setup(struct ksz_device *dev, int port)
{
+ u16 cr;
+
+ /* Errata document says the PHY must be configured to 100Mbps
+ * with auto-neg disabled before configuring the PHY MMD
+ * registers.
+ */
+ ksz_pread16(dev, port, REG_PORT_PHY_CTRL, &cr);
+ ksz_pwrite16(dev, port, REG_PORT_PHY_CTRL,
+ PORT_SPEED_100MBIT | PORT_FULL_DUPLEX);
+
/* Apply PHY settings to address errata listed in
* KSZ9477, KSZ9897, KSZ9896, KSZ9567, KSZ8565
* Silicon Errata and Data Sheet Clarification documents:
@@ -943,6 +953,13 @@ static void ksz9477_phy_errata_setup(struct ksz_device *dev, int port)
ksz9477_port_mmd_write(dev, port, 0x1c, 0x1d, 0xe7ff);
ksz9477_port_mmd_write(dev, port, 0x1c, 0x1e, 0xefff);
ksz9477_port_mmd_write(dev, port, 0x1c, 0x20, 0xeeee);
+
+ /* Restore PHY CTRL register, restart auto-negotiation if
+ * enabled in the original value.
+ */
+ if (cr & PORT_AUTO_NEG_ENABLE)
+ cr |= PORT_AUTO_NEG_RESTART;
+ ksz_pwrite16(dev, port, REG_PORT_PHY_CTRL, cr);
}
void ksz9477_get_caps(struct ksz_device *dev, int port,
--
2.37.2
On Sun 2023-06-18 07:43:10, Manuel Lauss wrote:
> On Fri, Jun 16, 2023 at 9:33 PM Pavel Machek <pavel(a)denx.de> wrote:
>
> > Hi!
> >
> > > From: Manuel Lauss <manuel.lauss(a)gmail.com>
> > >
> > > [ Upstream commit 2d645604f69f3a772d58ead702f9a8e84ab2b342 ]
> > >
> > > Various fixes for the Au1200/Au1550/Au1300 DBDMA2 code:
> > >
> > > - skip cache invalidation if chip has working coherency circuitry.
> > > - invalidate KSEG0-portion of the (physical) data address.
> > > - force the dma channel doorbell write out to bus immediately with
> > > a sync.
> > >
> > > Signed-off-by: Thomas Bogendoerfer <tsbogend(a)alpha.franken.de>
> > > Signed-off-by: Sasha Levin <sashal(a)kernel.org>
> >
> > I believe author's signoff is missing here.
> >
>
> As the author, I say this patch should not be applied to 4.xx at all. Same
> for my other 2 MIPS patches.
Thanks for info, where is the threshold, do we need them for 5.10?
Sasha, please drop.
Best regards,
Pavel
--
DENX Software Engineering GmbH, Managing Director: Erika Unter
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany
From: "Steven Rostedt (Google)" <rostedt(a)goodmis.org>
commit e18eb8783ec4949adebc7d7b0fdb65f65bfeefd9 upstream.
Currently the tracing_reset_all_online_cpus() requires the
trace_types_lock held. But only one caller of this function actually has
that lock held before calling it, and the other just takes the lock so
that it can call it. More users of this function is needed where the lock
is not held.
Add a tracing_reset_all_online_cpus_unlocked() function for the one use
case that calls it without being held, and also add a lockdep_assert to
make sure it is held when called.
Then have tracing_reset_all_online_cpus() take the lock internally, such
that callers do not need to worry about taking it.
Link: https://lkml.kernel.org/r/20221123192741.658273220@goodmis.org
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Zheng Yejian <zhengyejian1(a)huawei.com>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
[Refers to commit message of 1603feac154ff38514e8354e3079a455eb4801e2,
this patch is pre-depended, and tracing_reset_all_online_cpus() should
be called after trace_types_lock is held as its comment describes.]
Fixes: 1603feac154f ("tracing: Free buffers when a used dynamic event is removed")
Signed-off-by: Zheng Yejian <zhengyejian1(a)huawei.com>
---
kernel/trace/trace.c | 11 ++++++++++-
kernel/trace/trace.h | 1 +
kernel/trace/trace_events.c | 2 +-
3 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d068124815bc..219cd2c81936 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1931,10 +1931,12 @@ void tracing_reset_online_cpus(struct trace_buffer *buf)
}
/* Must have trace_types_lock held */
-void tracing_reset_all_online_cpus(void)
+void tracing_reset_all_online_cpus_unlocked(void)
{
struct trace_array *tr;
+ lockdep_assert_held(&trace_types_lock);
+
list_for_each_entry(tr, &ftrace_trace_arrays, list) {
if (!tr->clear_trace)
continue;
@@ -1946,6 +1948,13 @@ void tracing_reset_all_online_cpus(void)
}
}
+void tracing_reset_all_online_cpus(void)
+{
+ mutex_lock(&trace_types_lock);
+ tracing_reset_all_online_cpus_unlocked();
+ mutex_unlock(&trace_types_lock);
+}
+
/*
* The tgid_map array maps from pid to tgid; i.e. the value stored at index i
* is the tgid last observed corresponding to pid=i.
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f2ff39353e03..edc17a640ab3 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -677,6 +677,7 @@ int tracing_is_enabled(void);
void tracing_reset_online_cpus(struct trace_buffer *buf);
void tracing_reset_current(int cpu);
void tracing_reset_all_online_cpus(void);
+void tracing_reset_all_online_cpus_unlocked(void);
int tracing_open_generic(struct inode *inode, struct file *filp);
int tracing_open_generic_tr(struct inode *inode, struct file *filp);
bool tracing_is_disabled(void);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 8f2cbc9ebb6e..a0675ecc8142 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2440,7 +2440,7 @@ static void trace_module_remove_events(struct module *mod)
* over from this module may be passed to the new module events and
* unexpected results may occur.
*/
- tracing_reset_all_online_cpus();
+ tracing_reset_all_online_cpus_unlocked();
}
static int trace_module_notify(struct notifier_block *self,
--
2.25.1