From: Steven Rostedt rostedt@goodmis.org
raw_spin_locks can be traced by lockdep or tracing itself. Atomic64 operations can be used in the tracing infrastructure. When an architecture does not have true atomic64 operations it can use the generic version that disables interrupts and uses spin_locks.
The tracing ring buffer code uses atomic64 operations for the time keeping. But because some architectures use the default operations, the locking inside the atomic operations can cause an infinite recursion.
As atomic64 is an architecture specific operation, it should not be using raw_spin_locks() but instead arch_spin_locks as that is the purpose of arch_spin_locks. To be used in architecture specific implementations of generic infrastructure like atomic64 operations.
Cc: stable@vger.kernel.org Cc: Mark Rutland mark.rutland@arm.com Cc: Mathieu Desnoyers mathieu.desnoyers@efficios.com Cc: Andrew Morton akpm@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Andreas Larsson andreas@gaisler.com Link: https://lore.kernel.org/20250120235721.574973242@goodmis.org Fixes: c84897c0ff592 ("ring-buffer: Remove 32bit timestamp logic") Closes: https://lore.kernel.org/all/86fb4f86-a0e4-45a2-a2df-3154acc4f086@gaisler.com... Reported-by: Ludwig Rydberg ludwig.rydberg@gaisler.com Signed-off-by: Steven Rostedt (Google) rostedt@goodmis.org Reviewed-by: Masami Hiramatsu (Google) mhiramat@kernel.org Signed-off-by: Steven Rostedt (Google) rostedt@goodmis.org --- lib/atomic64.c | 78 +++++++++++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 30 deletions(-)
diff --git a/lib/atomic64.c b/lib/atomic64.c index caf895789a1e..1a72bba36d24 100644 --- a/lib/atomic64.c +++ b/lib/atomic64.c @@ -25,15 +25,15 @@ * Ensure each lock is in a separate cacheline. */ static union { - raw_spinlock_t lock; + arch_spinlock_t lock; char pad[L1_CACHE_BYTES]; } atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp = { [0 ... (NR_LOCKS - 1)] = { - .lock = __RAW_SPIN_LOCK_UNLOCKED(atomic64_lock.lock), + .lock = __ARCH_SPIN_LOCK_UNLOCKED, }, };
-static inline raw_spinlock_t *lock_addr(const atomic64_t *v) +static inline arch_spinlock_t *lock_addr(const atomic64_t *v) { unsigned long addr = (unsigned long) v;
@@ -45,12 +45,14 @@ static inline raw_spinlock_t *lock_addr(const atomic64_t *v) s64 generic_atomic64_read(const atomic64_t *v) { unsigned long flags; - raw_spinlock_t *lock = lock_addr(v); + arch_spinlock_t *lock = lock_addr(v); s64 val;
- raw_spin_lock_irqsave(lock, flags); + local_irq_save(flags); + arch_spin_lock(lock); val = v->counter; - raw_spin_unlock_irqrestore(lock, flags); + arch_spin_unlock(lock); + local_irq_restore(flags); return val; } EXPORT_SYMBOL(generic_atomic64_read); @@ -58,11 +60,13 @@ EXPORT_SYMBOL(generic_atomic64_read); void generic_atomic64_set(atomic64_t *v, s64 i) { unsigned long flags; - raw_spinlock_t *lock = lock_addr(v); + arch_spinlock_t *lock = lock_addr(v);
- raw_spin_lock_irqsave(lock, flags); + local_irq_save(flags); + arch_spin_lock(lock); v->counter = i; - raw_spin_unlock_irqrestore(lock, flags); + arch_spin_unlock(lock); + local_irq_restore(flags); } EXPORT_SYMBOL(generic_atomic64_set);
@@ -70,11 +74,13 @@ EXPORT_SYMBOL(generic_atomic64_set); void generic_atomic64_##op(s64 a, atomic64_t *v) \ { \ unsigned long flags; \ - raw_spinlock_t *lock = lock_addr(v); \ + arch_spinlock_t *lock = lock_addr(v); \ \ - raw_spin_lock_irqsave(lock, flags); \ + local_irq_save(flags); \ + arch_spin_lock(lock); \ v->counter c_op a; \ - raw_spin_unlock_irqrestore(lock, flags); \ + arch_spin_unlock(lock); \ + local_irq_restore(flags); \ } \ EXPORT_SYMBOL(generic_atomic64_##op);
@@ -82,12 +88,14 @@ EXPORT_SYMBOL(generic_atomic64_##op); s64 generic_atomic64_##op##_return(s64 a, atomic64_t *v) \ { \ unsigned long flags; \ - raw_spinlock_t *lock = lock_addr(v); \ + arch_spinlock_t *lock = lock_addr(v); \ s64 val; \ \ - raw_spin_lock_irqsave(lock, flags); \ + local_irq_save(flags); \ + arch_spin_lock(lock); \ val = (v->counter c_op a); \ - raw_spin_unlock_irqrestore(lock, flags); \ + arch_spin_unlock(lock); \ + local_irq_restore(flags); \ return val; \ } \ EXPORT_SYMBOL(generic_atomic64_##op##_return); @@ -96,13 +104,15 @@ EXPORT_SYMBOL(generic_atomic64_##op##_return); s64 generic_atomic64_fetch_##op(s64 a, atomic64_t *v) \ { \ unsigned long flags; \ - raw_spinlock_t *lock = lock_addr(v); \ + arch_spinlock_t *lock = lock_addr(v); \ s64 val; \ \ - raw_spin_lock_irqsave(lock, flags); \ + local_irq_save(flags); \ + arch_spin_lock(lock); \ val = v->counter; \ v->counter c_op a; \ - raw_spin_unlock_irqrestore(lock, flags); \ + arch_spin_unlock(lock); \ + local_irq_restore(flags); \ return val; \ } \ EXPORT_SYMBOL(generic_atomic64_fetch_##op); @@ -131,14 +141,16 @@ ATOMIC64_OPS(xor, ^=) s64 generic_atomic64_dec_if_positive(atomic64_t *v) { unsigned long flags; - raw_spinlock_t *lock = lock_addr(v); + arch_spinlock_t *lock = lock_addr(v); s64 val;
- raw_spin_lock_irqsave(lock, flags); + local_irq_save(flags); + arch_spin_lock(lock); val = v->counter - 1; if (val >= 0) v->counter = val; - raw_spin_unlock_irqrestore(lock, flags); + arch_spin_unlock(lock); + local_irq_restore(flags); return val; } EXPORT_SYMBOL(generic_atomic64_dec_if_positive); @@ -146,14 +158,16 @@ EXPORT_SYMBOL(generic_atomic64_dec_if_positive); s64 generic_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n) { unsigned long flags; - raw_spinlock_t *lock = lock_addr(v); + arch_spinlock_t *lock = lock_addr(v); s64 val;
- raw_spin_lock_irqsave(lock, flags); + local_irq_save(flags); + arch_spin_lock(lock); val = v->counter; if (val == o) v->counter = n; - raw_spin_unlock_irqrestore(lock, flags); + arch_spin_unlock(lock); + local_irq_restore(flags); return val; } EXPORT_SYMBOL(generic_atomic64_cmpxchg); @@ -161,13 +175,15 @@ EXPORT_SYMBOL(generic_atomic64_cmpxchg); s64 generic_atomic64_xchg(atomic64_t *v, s64 new) { unsigned long flags; - raw_spinlock_t *lock = lock_addr(v); + arch_spinlock_t *lock = lock_addr(v); s64 val;
- raw_spin_lock_irqsave(lock, flags); + local_irq_save(flags); + arch_spin_lock(lock); val = v->counter; v->counter = new; - raw_spin_unlock_irqrestore(lock, flags); + arch_spin_unlock(lock); + local_irq_restore(flags); return val; } EXPORT_SYMBOL(generic_atomic64_xchg); @@ -175,14 +191,16 @@ EXPORT_SYMBOL(generic_atomic64_xchg); s64 generic_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) { unsigned long flags; - raw_spinlock_t *lock = lock_addr(v); + arch_spinlock_t *lock = lock_addr(v); s64 val;
- raw_spin_lock_irqsave(lock, flags); + local_irq_save(flags); + arch_spin_lock(lock); val = v->counter; if (val != u) v->counter += a; - raw_spin_unlock_irqrestore(lock, flags); + arch_spin_unlock(lock); + local_irq_restore(flags);
return val; }
On Tue, Jan 21, 2025 at 03:19:44PM -0500, Steven Rostedt wrote:
From: Steven Rostedt rostedt@goodmis.org
raw_spin_locks can be traced by lockdep or tracing itself. Atomic64 operations can be used in the tracing infrastructure. When an architecture does not have true atomic64 operations it can use the generic version that disables interrupts and uses spin_locks.
The tracing ring buffer code uses atomic64 operations for the time keeping. But because some architectures use the default operations, the locking inside the atomic operations can cause an infinite recursion.
As atomic64 is an architecture specific operation, it should not
used in generic code :-)
be using raw_spin_locks() but instead arch_spin_locks as that is the purpose of arch_spin_locks. To be used in architecture specific implementations of generic infrastructure like atomic64 operations.
Urgh.. this is horrible. This is why you shouldn't be using atomic64 in generic code much :/
Why not just drop support for those cummy archs? Or drop whatever trace feature depends on this.
s64 generic_atomic64_read(const atomic64_t *v) { unsigned long flags;
- raw_spinlock_t *lock = lock_addr(v);
- arch_spinlock_t *lock = lock_addr(v); s64 val;
- raw_spin_lock_irqsave(lock, flags);
- local_irq_save(flags);
- arch_spin_lock(lock);
Note that this is not an equivalent change. It's probably sufficient, but at the very least the Changelog should call out what went missing and how that is okay.
On Wed, 22 Jan 2025 11:14:57 +0100 Peter Zijlstra peterz@infradead.org wrote:
On Tue, Jan 21, 2025 at 03:19:44PM -0500, Steven Rostedt wrote:
From: Steven Rostedt rostedt@goodmis.org
raw_spin_locks can be traced by lockdep or tracing itself. Atomic64 operations can be used in the tracing infrastructure. When an architecture does not have true atomic64 operations it can use the generic version that disables interrupts and uses spin_locks.
The tracing ring buffer code uses atomic64 operations for the time keeping. But because some architectures use the default operations, the locking inside the atomic operations can cause an infinite recursion.
As atomic64 is an architecture specific operation, it should not
used in generic code :-)
Yes, but the atomic64 implementation is architecture specific. I could change that to be:
"As atomic64 implementation is architecture specific, it should not"
be using raw_spin_locks() but instead arch_spin_locks as that is the purpose of arch_spin_locks. To be used in architecture specific implementations of generic infrastructure like atomic64 operations.
Urgh.. this is horrible. This is why you shouldn't be using atomic64 in generic code much :/
Why not just drop support for those cummy archs? Or drop whatever trace feature depends on this.
Can't that would be a regression. Here's the history. As the timestamps of events are related to each other, as one event only has the delta from the previous event (yeah, this causes issues, but it was recommended to do it this way when it was created, and it can't change now). And as the ring buffer is lockless, it can be preempted by interrupts and NMIs that can inject their own timestamps, it use to be that an interrupted event would just have a zero delta. If an interrupt came in while an event was being written, and it created events, all its events would have the same timestamp as the event it interrupted.
But this caused issues due to not being able to see timings of events from interrupts that interrupted an event in progress.
I fixed this, but that required doing a 64 bit cmpxchg on the timestamp when the race occurred. I originally did not use atomic64, and instead for 32bit architectures, it used a "special" timestamp that was broken into multiple 32bit words, and there was special logic to try to keep them in sync when this occurred. But that started becoming too complex with some corner cases, so I decided to simply let these 32 bit architectures us atomic64. That worked fine for architectures that have 64 bit atomics and do not rely on spinlocks.
Then I started getting reports of the tracing system causing deadlocks. That is, because raw_spin_lock() is traced. And it should be, as locks do cause issues and tracing them can help debug those issues. Lockdep and tracing both use arch_spin_lock() so that it doesn't recurse into itself. Even RCU uses it. So I don't see why there would be any issue with the atomic64 implementation using it as it is an even more basic operation than RCU is.
s64 generic_atomic64_read(const atomic64_t *v) { unsigned long flags;
- raw_spinlock_t *lock = lock_addr(v);
- arch_spinlock_t *lock = lock_addr(v); s64 val;
- raw_spin_lock_irqsave(lock, flags);
- local_irq_save(flags);
- arch_spin_lock(lock);
Note that this is not an equivalent change. It's probably sufficient, but at the very least the Changelog should call out what went missing and how that is okay.
What exactly is the difference here that you are talking about? I know that raw_spin_lock_irqsave() has lots of different variants depending on the config options, but I'm not sure which you are talking about? Is it the fact that you can't do the different variants with this?
Or is it because it's not checked by lockdep? Hmm, I mentioned that in the cover letter, but I failed to mention it here in this change log. I can definitely add that, if that's what you are referring to.
-- Steve
On Wed, Jan 22, 2025 at 10:55:17AM -0500, Steven Rostedt wrote:
s64 generic_atomic64_read(const atomic64_t *v) { unsigned long flags;
- raw_spinlock_t *lock = lock_addr(v);
- arch_spinlock_t *lock = lock_addr(v); s64 val;
- raw_spin_lock_irqsave(lock, flags);
- local_irq_save(flags);
- arch_spin_lock(lock);
Note that this is not an equivalent change. It's probably sufficient, but at the very least the Changelog should call out what went missing and how that is okay.
What exactly is the difference here that you are talking about? I know that raw_spin_lock_irqsave() has lots of different variants depending on the config options, but I'm not sure which you are talking about? Is it the fact that you can't do the different variants with this?
If I followed the maze right, then I get something like:
raw_spin_lock_irqsave(lock, flags) local_irq_save(flags); preempt_disable(); arch_spin_lock(lock); mmiowb_spin_lock();
And here you leave out that preempt_disable() and mmiowb stuff. The former is fine because local_irq_save() already makes things non-preemptible and there are no irq-state games. The mmiowb thing is fine because nothing inside this critical section cares about mmio.
On Wed, 22 Jan 2025 18:57:01 +0100 Peter Zijlstra peterz@infradead.org wrote:
If I followed the maze right, then I get something like:
raw_spin_lock_irqsave(lock, flags) local_irq_save(flags); preempt_disable(); arch_spin_lock(lock); mmiowb_spin_lock();
And here you leave out that preempt_disable() and mmiowb stuff. The former is fine because local_irq_save() already makes things non-preemptible and there are no irq-state games. The mmiowb thing is fine because nothing inside this critical section cares about mmio.
Ah, yeah. OK, I don't plan on adding the preempt_disable() either as again, this is really just an emulation of atomic64 for architectures that do not support it.
I'll resend this with an updated change log.
Thanks for the review.
-- Steve
linux-stable-mirror@lists.linaro.org