Hi, Unfortunately, One of my tests showed the following after 10 minutes of running the TREE01 scenario (even though TREE04 got fixed). Let us hold off on these 3 patches. I need to be sure there's absolutely no new issue introduced. So more work to do.
Thank you!
[ 667.823113] CPU: 2 PID: 27 Comm: migration/2 Not tainted 5.15.126-rc1+ #26 [ 667.904999] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 [ 668.063061] Stopper: multi_cpu_stop+0x0/0x170 <- stop_machine_from_inactive_cpu+0x118/0x170 [ 668.179708] RIP: 0010:multi_cpu_stop+0x150/0x170 [ 668.231838] Code: 35 49 dd a1 01 48 c7 c7 60 51 95 bc 49 c7 c6 60 51 95 bc e8 62 c0 31 00 39 44 24 04 41 0f 94 c7 e9 ed fe ff ff e8 10 82 fc ff <48> 8b 0d 89 69 0e 45 8b 74 24 04 48 c7 c7 6d c4 35 bc 48 29 c8 48 [ 668.563146] RSP: 0000:ffffb28f0023be68 EFLAGS: 00010216 [ 668.646854] RAX: 0000009b1b04be30 RBX: 0000000000000001 RCX: 0000000000000017 [ 668.733241] RDX: 00000ef798000000 RSI: 00000000000e4546 RDI: 0001d5f268800000 [ 668.851846] RBP: ffffb28f000c7e90 R08: 0000009c17aaf41a R09: 7fffffffffffffff [ 668.966445] R10: 0000009aa37f6a00 R11: 00000000012679f7 R12: 000000000001e7c0 [ 669.056865] R13: 0000000000000002 R14: ffff8ec49ffb4f00 R15: ffffb28f000c7e01 [ 669.172537] FS: 0000000000000000(0000) GS:ffff8ec49ea80000(0000) knlGS:0000000000000000 [ 669.318440] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 669.395012] CR2: 000000000001e7c0 CR3: 0000000019e0c000 CR4: 00000000000006e0 [ 669.505733] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 669.588303] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 669.702863] Call Trace: [ 669.742668] <TASK> [ 669.789840] ? __die_body.cold+0x1a/0x1f [ 669.836098] ? page_fault_oops+0xcd/0x210 [ 669.922700] ? exc_page_fault+0x60/0x140 [ 669.968411] ? asm_exc_page_fault+0x22/0x30 [ 670.012394] ? multi_cpu_stop+0x150/0x170 [ 670.115211] ? multi_cpu_stop+0x150/0x170 [ 670.153099] ? stop_machine_yield+0x10/0x10 [ 670.236328] cpu_stopper_thread+0x85/0x130 [ 670.288173] smpboot_thread_fn+0x183/0x220 [ 670.398452] ? smpboot_register_percpu_thread+0xd0/0xd0 [ 670.482244] kthread+0x12d/0x160 [ 670.527783] ? set_kthread_struct+0x40/0x40 [ 670.606296] ret_from_fork+0x22/0x30 [ 670.651658] </TASK> [ 670.695138] Modules linked in: [ 670.740198] CR2: 000000000001e7c0
On Thu, Aug 10, 2023 at 6:31 PM Joel Fernandes (Google) joel@joelfernandes.org wrote:
From: Frederic Weisbecker frederic@kernel.org
[ Upstream commit a1ff03cd6fb9c501fff63a4a2bface9adcfa81cd ]
tick: Detect and fix jiffies update stall
On some rare cases, the timekeeper CPU may be delaying its jiffies update duty for a while. Known causes include:
The timekeeper is waiting on stop_machine in a MULTI_STOP_DISABLE_IRQ or MULTI_STOP_RUN state. Disabled interrupts prevent from timekeeping updates while waiting for the target CPU to complete its stop_machine() callback.
The timekeeper vcpu has VMEXIT'ed for a long while due to some overload on the host.
Detect and fix these situations with emergency timekeeping catchups.
Original-patch-by: Paul E. McKenney paulmck@kernel.org Signed-off-by: Frederic Weisbecker frederic@kernel.org Cc: Thomas Gleixner tglx@linutronix.de Signed-off-by: Joel Fernandes (Google) joel@joelfernandes.org
kernel/time/tick-sched.c | 17 +++++++++++++++++ kernel/time/tick-sched.h | 4 ++++ 2 files changed, 21 insertions(+)
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f42d0776bc84..7701c720dc1f 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -180,6 +180,8 @@ static ktime_t tick_init_jiffy_update(void) return period; }
+#define MAX_STALLED_JIFFIES 5
static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) { int cpu = smp_processor_id(); @@ -207,6 +209,21 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) if (tick_do_timer_cpu == cpu) tick_do_update_jiffies64(now);
/*
* If jiffies update stalled for too long (timekeeper in stop_machine()
* or VMEXIT'ed for several msecs), force an update.
*/
if (ts->last_tick_jiffies != jiffies) {
ts->stalled_jiffies = 0;
ts->last_tick_jiffies = READ_ONCE(jiffies);
} else {
if (++ts->stalled_jiffies == MAX_STALLED_JIFFIES) {
tick_do_update_jiffies64(now);
ts->stalled_jiffies = 0;
ts->last_tick_jiffies = READ_ONCE(jiffies);
}
}
if (ts->inidle) ts->got_idle_tick = 1;
} diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index d952ae393423..504649513399 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -49,6 +49,8 @@ enum tick_nohz_mode {
- @timer_expires_base: Base time clock monotonic for @timer_expires
- @next_timer: Expiry time of next expiring timer for debugging purpose only
- @tick_dep_mask: Tick dependency mask - is set, if someone needs the tick
- @last_tick_jiffies: Value of jiffies seen on last tick
*/
- @stalled_jiffies: Number of stalled jiffies detected across ticks
struct tick_sched { struct hrtimer sched_timer; @@ -77,6 +79,8 @@ struct tick_sched { u64 next_timer; ktime_t idle_expires; atomic_t tick_dep_mask;
unsigned long last_tick_jiffies;
unsigned int stalled_jiffies;
};
extern struct tick_sched *tick_get_tick_sched(int cpu);
2.41.0.640.ga95def55d0-goog