Hi,
Please find the upcoming changes in SRCU for v6.15. The changes can also be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux.git srcu.2025.02.05a
Regards, Boqun
Paul E. McKenney (20): srcu: Make Tiny SRCU able to operate in preemptible kernels srcu: Define SRCU_READ_FLAVOR_ALL in terms of symbols srcu: Use ->srcu_gp_seq for rcutorture reader batch srcu: Pull ->srcu_{un,}lock_count into a new srcu_ctr structure srcu: Make SRCU readers use ->srcu_ctrs for counter selection srcu: Make Tree SRCU updates independent of ->srcu_idx srcu: Force synchronization for srcu_get_delay() srcu: Rename srcu_check_read_flavor_lite() to srcu_check_read_flavor_force() srcu: Add SRCU_READ_FLAVOR_SLOWGP to flag need for synchronize_rcu() srcu: Pull pointer-to-integer conversion into __srcu_ptr_to_ctr() srcu: Pull integer-to-pointer conversion into __srcu_ctr_to_ptr() srcu: Move SRCU Tree/Tiny definitions from srcu.h srcu: Add SRCU-fast readers rcutorture: Add ability to test srcu_read_{,un}lock_fast() refscale: Add srcu_read_lock_fast() support using "srcu-fast" rcutorture: Make scenario SRCU-P use srcu_read_lock_fast() srcu: Fix srcu_read_unlock_{lite,nmisafe}() kernel-doc srcu: Document that srcu_{read_lock,down_read}() can share srcu_struct srcu: Add srcu_down_read_fast() and srcu_up_read_fast() srcu: Make SRCU-fast also be NMI-safe
include/linux/srcu.h | 102 +++++++-- include/linux/srcutiny.h | 27 ++- include/linux/srcutree.h | 98 +++++++-- kernel/rcu/rcu.h | 9 +- kernel/rcu/rcutorture.c | 11 + kernel/rcu/refscale.c | 32 ++- kernel/rcu/srcutiny.c | 6 + kernel/rcu/srcutree.c | 199 +++++++++--------- .../rcutorture/configs/rcu/SRCU-P.boot | 1 + 9 files changed, 359 insertions(+), 126 deletions(-)
From: "Paul E. McKenney" paulmck@kernel.org
Given that SRCU allows its read-side critical sections are not just preemptible, but also allow general blocking, there is not much reason to restrict Tiny SRCU to non-preemptible kernels. This commit therefore removes Tiny SRCU dependencies on non-preemptibility, primarily surrounding its interaction with rcutorture and early boot.
Signed-off-by: Paul E. McKenney paulmck@kernel.org Cc: Ankur Arora ankur.a.arora@oracle.com Cc: Alexei Starovoitov ast@kernel.org Cc: Andrii Nakryiko andrii@kernel.org Cc: Peter Zijlstra peterz@infradead.org Cc: Kent Overstreet kent.overstreet@linux.dev Cc: bpf@vger.kernel.org Signed-off-by: Boqun Feng boqun.feng@gmail.com --- kernel/rcu/rcu.h | 9 ++++++--- kernel/rcu/srcutiny.c | 6 ++++++ 2 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index feb3ac1dc5d5..2909662c805f 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -611,8 +611,6 @@ void srcutorture_get_gp_data(struct srcu_struct *sp, int *flags, static inline bool rcu_watching_zero_in_eqs(int cpu, int *vp) { return false; } static inline unsigned long rcu_get_gp_seq(void) { return 0; } static inline unsigned long rcu_exp_batches_completed(void) { return 0; } -static inline unsigned long -srcu_batches_completed(struct srcu_struct *sp) { return 0; } static inline void rcu_force_quiescent_state(void) { } static inline bool rcu_check_boost_fail(unsigned long gp_state, int *cpup) { return true; } static inline void show_rcu_gp_kthreads(void) { } @@ -624,7 +622,6 @@ static inline void rcu_gp_slow_unregister(atomic_t *rgssp) { } bool rcu_watching_zero_in_eqs(int cpu, int *vp); unsigned long rcu_get_gp_seq(void); unsigned long rcu_exp_batches_completed(void); -unsigned long srcu_batches_completed(struct srcu_struct *sp); bool rcu_check_boost_fail(unsigned long gp_state, int *cpup); void show_rcu_gp_kthreads(void); int rcu_get_gp_kthreads_prio(void); @@ -636,6 +633,12 @@ void rcu_gp_slow_register(atomic_t *rgssp); void rcu_gp_slow_unregister(atomic_t *rgssp); #endif /* #else #ifdef CONFIG_TINY_RCU */
+#ifdef CONFIG_TINY_SRCU +static inline unsigned long srcu_batches_completed(struct srcu_struct *sp) { return 0; } +#else // #ifdef CONFIG_TINY_SRCU +unsigned long srcu_batches_completed(struct srcu_struct *sp); +#endif // #else // #ifdef CONFIG_TINY_SRCU + #ifdef CONFIG_RCU_NOCB_CPU void rcu_bind_current_to_nocb(void); #else diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 4dcbf8aa80ff..2a94f0e65606 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -20,7 +20,11 @@ #include "rcu_segcblist.h" #include "rcu.h"
+#ifndef CONFIG_TREE_RCU int rcu_scheduler_active __read_mostly; +#else // #ifndef CONFIG_TREE_RCU +extern int rcu_scheduler_active; +#endif // #else // #ifndef CONFIG_TREE_RCU static LIST_HEAD(srcu_boot_list); static bool srcu_init_done;
@@ -282,11 +286,13 @@ bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie) } EXPORT_SYMBOL_GPL(poll_state_synchronize_srcu);
+#ifndef CONFIG_TREE_RCU /* Lockdep diagnostics. */ void __init rcu_scheduler_starting(void) { rcu_scheduler_active = RCU_SCHEDULER_RUNNING; } +#endif // #ifndef CONFIG_TREE_RCU
/* * Queue work for srcu_struct structures with early boot callbacks.
From: "Paul E. McKenney" paulmck@kernel.org
This commit defines SRCU_READ_FLAVOR_ALL in terms of the SRCU_READ_FLAVOR_* definitions instead of a hexadecimal constant.
Suggested-by: Neeraj Upadhyay Neeraj.Upadhyay@amd.com Signed-off-by: Paul E. McKenney paulmck@kernel.org Cc: Alexei Starovoitov ast@kernel.org Cc: Andrii Nakryiko andrii@kernel.org Cc: Peter Zijlstra peterz@infradead.org Cc: Kent Overstreet kent.overstreet@linux.dev Cc: bpf@vger.kernel.org Signed-off-by: Boqun Feng boqun.feng@gmail.com --- include/linux/srcu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h index d7ba46e74f58..f6f779b9d9ff 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -47,7 +47,8 @@ int init_srcu_struct(struct srcu_struct *ssp); #define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock(). #define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe(). #define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite(). -#define SRCU_READ_FLAVOR_ALL 0x7 // All of the above. +#define SRCU_READ_FLAVOR_ALL (SRCU_READ_FLAVOR_NORMAL | SRCU_READ_FLAVOR_NMI | \ + SRCU_READ_FLAVOR_LITE) // All of the above.
#ifdef CONFIG_TINY_SRCU #include <linux/srcutiny.h>
From: "Paul E. McKenney" paulmck@kernel.org
This commit stops using ->srcu_idx for rcutorture's reader-batch consistency checking, using ->srcu_gp_seq instead. This is a first step towards a faster srcu_read_{,un}lock_lite() that avoids the array accesses that use ->srcu_idx.
Signed-off-by: Paul E. McKenney paulmck@kernel.org Cc: Alexei Starovoitov ast@kernel.org Cc: Andrii Nakryiko andrii@kernel.org Cc: Peter Zijlstra peterz@infradead.org Cc: Kent Overstreet kent.overstreet@linux.dev Cc: bpf@vger.kernel.org Signed-off-by: Boqun Feng boqun.feng@gmail.com --- kernel/rcu/rcutorture.c | 2 ++ kernel/rcu/srcutree.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index d26fb1d33ed9..1d2de50fb5d6 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -791,6 +791,7 @@ static struct rcu_torture_ops srcu_ops = { .readunlock = srcu_torture_read_unlock, .readlock_held = torture_srcu_read_lock_held, .get_gp_seq = srcu_torture_completed, + .gp_diff = rcu_seq_diff, .deferred_free = srcu_torture_deferred_free, .sync = srcu_torture_synchronize, .exp_sync = srcu_torture_synchronize_expedited, @@ -834,6 +835,7 @@ static struct rcu_torture_ops srcud_ops = { .readunlock = srcu_torture_read_unlock, .readlock_held = torture_srcu_read_lock_held, .get_gp_seq = srcu_torture_completed, + .gp_diff = rcu_seq_diff, .deferred_free = srcu_torture_deferred_free, .sync = srcu_torture_synchronize, .exp_sync = srcu_torture_synchronize_expedited, diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index b83c74c4dcc0..e69ce9d59abf 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -1675,7 +1675,7 @@ EXPORT_SYMBOL_GPL(srcu_barrier); */ unsigned long srcu_batches_completed(struct srcu_struct *ssp) { - return READ_ONCE(ssp->srcu_idx); + return READ_ONCE(ssp->srcu_sup->srcu_gp_seq); } EXPORT_SYMBOL_GPL(srcu_batches_completed);
From: "Paul E. McKenney" paulmck@kernel.org
This commit prepares for array-index-free srcu_read_lock*() by moving the ->srcu_{un,}lock_count fields into a new srcu_ctr structure. This will permit ->srcu_index to be replaced by a per-CPU pointer to this structure.
Signed-off-by: Paul E. McKenney paulmck@kernel.org Cc: Alexei Starovoitov ast@kernel.org Cc: Andrii Nakryiko andrii@kernel.org Cc: Peter Zijlstra peterz@infradead.org Cc: Kent Overstreet kent.overstreet@linux.dev Cc: bpf@vger.kernel.org Signed-off-by: Boqun Feng boqun.feng@gmail.com --- include/linux/srcutree.h | 13 +++-- kernel/rcu/srcutree.c | 115 +++++++++++++++++++-------------------- 2 files changed, 66 insertions(+), 62 deletions(-)
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index b17814c9d1c7..c794d599db5c 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -17,14 +17,19 @@ struct srcu_node; struct srcu_struct;
+/* One element of the srcu_data srcu_ctrs array. */ +struct srcu_ctr { + atomic_long_t srcu_locks; /* Locks per CPU. */ + atomic_long_t srcu_unlocks; /* Unlocks per CPU. */ +}; + /* * Per-CPU structure feeding into leaf srcu_node, similar in function * to rcu_node. */ struct srcu_data { /* Read-side state. */ - atomic_long_t srcu_lock_count[2]; /* Locks per CPU. */ - atomic_long_t srcu_unlock_count[2]; /* Unlocks per CPU. */ + struct srcu_ctr srcu_ctrs[2]; /* Locks and unlocks per CPU. */ int srcu_reader_flavor; /* Reader flavor for srcu_struct structure? */ /* Values: SRCU_READ_FLAVOR_.* */
@@ -221,7 +226,7 @@ static inline int __srcu_read_lock_lite(struct srcu_struct *ssp)
RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite()."); idx = READ_ONCE(ssp->srcu_idx) & 0x1; - this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter); /* Y */ + this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter); /* Y */ barrier(); /* Avoid leaking the critical section. */ return idx; } @@ -240,7 +245,7 @@ static inline int __srcu_read_lock_lite(struct srcu_struct *ssp) static inline void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) { barrier(); /* Avoid leaking the critical section. */ - this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter); /* Z */ + this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_unlocks.counter); /* Z */ RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_lite()."); }
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index e69ce9d59abf..d7ee2f345e19 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -116,8 +116,9 @@ do { \ /* * Initialize SRCU per-CPU data. Note that statically allocated * srcu_struct structures might already have srcu_read_lock() and - * srcu_read_unlock() running against them. So if the is_static parameter - * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[]. + * srcu_read_unlock() running against them. So if the is_static + * parameter is set, don't initialize ->srcu_ctrs[].srcu_locks and + * ->srcu_ctrs[].srcu_unlocks. */ static void init_srcu_struct_data(struct srcu_struct *ssp) { @@ -128,8 +129,6 @@ static void init_srcu_struct_data(struct srcu_struct *ssp) * Initialize the per-CPU srcu_data array, which feeds into the * leaves of the srcu_node tree. */ - BUILD_BUG_ON(ARRAY_SIZE(sdp->srcu_lock_count) != - ARRAY_SIZE(sdp->srcu_unlock_count)); for_each_possible_cpu(cpu) { sdp = per_cpu_ptr(ssp->sda, cpu); spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); @@ -429,10 +428,10 @@ static bool srcu_gp_is_expedited(struct srcu_struct *ssp) }
/* - * Computes approximate total of the readers' ->srcu_lock_count[] values - * for the rank of per-CPU counters specified by idx, and returns true if - * the caller did the proper barrier (gp), and if the count of the locks - * matches that of the unlocks passed in. + * Computes approximate total of the readers' ->srcu_ctrs[].srcu_locks + * values for the rank of per-CPU counters specified by idx, and returns + * true if the caller did the proper barrier (gp), and if the count of + * the locks matches that of the unlocks passed in. */ static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, unsigned long unlocks) { @@ -443,7 +442,7 @@ static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, uns for_each_possible_cpu(cpu) { struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
- sum += atomic_long_read(&sdp->srcu_lock_count[idx]); + sum += atomic_long_read(&sdp->srcu_ctrs[idx].srcu_locks); if (IS_ENABLED(CONFIG_PROVE_RCU)) mask = mask | READ_ONCE(sdp->srcu_reader_flavor); } @@ -455,8 +454,8 @@ static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, uns }
/* - * Returns approximate total of the readers' ->srcu_unlock_count[] values - * for the rank of per-CPU counters specified by idx. + * Returns approximate total of the readers' ->srcu_ctrs[].srcu_unlocks + * values for the rank of per-CPU counters specified by idx. */ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx, unsigned long *rdm) { @@ -467,7 +466,7 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx, u for_each_possible_cpu(cpu) { struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
- sum += atomic_long_read(&sdp->srcu_unlock_count[idx]); + sum += atomic_long_read(&sdp->srcu_ctrs[idx].srcu_unlocks); mask = mask | READ_ONCE(sdp->srcu_reader_flavor); } WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)), @@ -510,9 +509,9 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) * been no readers on this index at some point in this function. * But there might be more readers, as a task might have read * the current ->srcu_idx but not yet have incremented its CPU's - * ->srcu_lock_count[idx] counter. In fact, it is possible + * ->srcu_ctrs[idx].srcu_locks counter. In fact, it is possible * that most of the tasks have been preempted between fetching - * ->srcu_idx and incrementing ->srcu_lock_count[idx]. And there + * ->srcu_idx and incrementing ->srcu_ctrs[idx].srcu_locks. And there * could be almost (ULONG_MAX / sizeof(struct task_struct)) tasks * in a system whose address space was fully populated with memory. * Call this quantity Nt. @@ -521,36 +520,36 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) * code for a long time. That now-preempted updater has already * flipped ->srcu_idx (possibly during the preceding grace period), * done an smp_mb() (again, possibly during the preceding grace - * period), and summed up the ->srcu_unlock_count[idx] counters. + * period), and summed up the ->srcu_ctrs[idx].srcu_unlocks counters. * How many times can a given one of the aforementioned Nt tasks - * increment the old ->srcu_idx value's ->srcu_lock_count[idx] + * increment the old ->srcu_idx value's ->srcu_ctrs[idx].srcu_locks * counter, in the absence of nesting? * * It can clearly do so once, given that it has already fetched - * the old value of ->srcu_idx and is just about to use that value - * to index its increment of ->srcu_lock_count[idx]. But as soon as - * it leaves that SRCU read-side critical section, it will increment - * ->srcu_unlock_count[idx], which must follow the updater's above - * read from that same value. Thus, as soon the reading task does - * an smp_mb() and a later fetch from ->srcu_idx, that task will be - * guaranteed to get the new index. Except that the increment of - * ->srcu_unlock_count[idx] in __srcu_read_unlock() is after the - * smp_mb(), and the fetch from ->srcu_idx in __srcu_read_lock() - * is before the smp_mb(). Thus, that task might not see the new - * value of ->srcu_idx until the -second- __srcu_read_lock(), - * which in turn means that this task might well increment - * ->srcu_lock_count[idx] for the old value of ->srcu_idx twice, - * not just once. + * the old value of ->srcu_idx and is just about to use that + * value to index its increment of ->srcu_ctrs[idx].srcu_locks. + * But as soon as it leaves that SRCU read-side critical section, + * it will increment ->srcu_ctrs[idx].srcu_unlocks, which must + * follow the updater's above read from that same value. Thus, + * as soon the reading task does an smp_mb() and a later fetch from + * ->srcu_idx, that task will be guaranteed to get the new index. + * Except that the increment of ->srcu_ctrs[idx].srcu_unlocks + * in __srcu_read_unlock() is after the smp_mb(), and the fetch + * from ->srcu_idx in __srcu_read_lock() is before the smp_mb(). + * Thus, that task might not see the new value of ->srcu_idx until + * the -second- __srcu_read_lock(), which in turn means that this + * task might well increment ->srcu_ctrs[idx].srcu_locks for the + * old value of ->srcu_idx twice, not just once. * * However, it is important to note that a given smp_mb() takes * effect not just for the task executing it, but also for any * later task running on that same CPU. * - * That is, there can be almost Nt + Nc further increments of - * ->srcu_lock_count[idx] for the old index, where Nc is the number - * of CPUs. But this is OK because the size of the task_struct - * structure limits the value of Nt and current systems limit Nc - * to a few thousand. + * That is, there can be almost Nt + Nc further increments + * of ->srcu_ctrs[idx].srcu_locks for the old index, where Nc + * is the number of CPUs. But this is OK because the size of + * the task_struct structure limits the value of Nt and current + * systems limit Nc to a few thousand. * * OK, but what about nesting? This does impose a limit on * nesting of half of the size of the task_struct structure @@ -581,10 +580,10 @@ static bool srcu_readers_active(struct srcu_struct *ssp) for_each_possible_cpu(cpu) { struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
- sum += atomic_long_read(&sdp->srcu_lock_count[0]); - sum += atomic_long_read(&sdp->srcu_lock_count[1]); - sum -= atomic_long_read(&sdp->srcu_unlock_count[0]); - sum -= atomic_long_read(&sdp->srcu_unlock_count[1]); + sum += atomic_long_read(&sdp->srcu_ctrs[0].srcu_locks); + sum += atomic_long_read(&sdp->srcu_ctrs[1].srcu_locks); + sum -= atomic_long_read(&sdp->srcu_ctrs[0].srcu_unlocks); + sum -= atomic_long_read(&sdp->srcu_ctrs[1].srcu_unlocks); } return sum; } @@ -746,7 +745,7 @@ int __srcu_read_lock(struct srcu_struct *ssp) int idx;
idx = READ_ONCE(ssp->srcu_idx) & 0x1; - this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter); + this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter); smp_mb(); /* B */ /* Avoid leaking the critical section. */ return idx; } @@ -760,7 +759,7 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock); void __srcu_read_unlock(struct srcu_struct *ssp, int idx) { smp_mb(); /* C */ /* Avoid leaking the critical section. */ - this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter); + this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_unlocks.counter); } EXPORT_SYMBOL_GPL(__srcu_read_unlock);
@@ -777,7 +776,7 @@ int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
idx = READ_ONCE(ssp->srcu_idx) & 0x1; - atomic_long_inc(&sdp->srcu_lock_count[idx]); + atomic_long_inc(&sdp->srcu_ctrs[idx].srcu_locks); smp_mb__after_atomic(); /* B */ /* Avoid leaking the critical section. */ return idx; } @@ -793,7 +792,7 @@ void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
smp_mb__before_atomic(); /* C */ /* Avoid leaking the critical section. */ - atomic_long_inc(&sdp->srcu_unlock_count[idx]); + atomic_long_inc(&sdp->srcu_ctrs[idx].srcu_unlocks); } EXPORT_SYMBOL_GPL(__srcu_read_unlock_nmisafe);
@@ -1123,17 +1122,17 @@ static void srcu_flip(struct srcu_struct *ssp) /* * Because the flip of ->srcu_idx is executed only if the * preceding call to srcu_readers_active_idx_check() found that - * the ->srcu_unlock_count[] and ->srcu_lock_count[] sums matched - * and because that summing uses atomic_long_read(), there is - * ordering due to a control dependency between that summing and - * the WRITE_ONCE() in this call to srcu_flip(). This ordering - * ensures that if this updater saw a given reader's increment from - * __srcu_read_lock(), that reader was using a value of ->srcu_idx - * from before the previous call to srcu_flip(), which should be - * quite rare. This ordering thus helps forward progress because - * the grace period could otherwise be delayed by additional - * calls to __srcu_read_lock() using that old (soon to be new) - * value of ->srcu_idx. + * the ->srcu_ctrs[].srcu_unlocks and ->srcu_ctrs[].srcu_locks sums + * matched and because that summing uses atomic_long_read(), + * there is ordering due to a control dependency between that + * summing and the WRITE_ONCE() in this call to srcu_flip(). + * This ordering ensures that if this updater saw a given reader's + * increment from __srcu_read_lock(), that reader was using a value + * of ->srcu_idx from before the previous call to srcu_flip(), + * which should be quite rare. This ordering thus helps forward + * progress because the grace period could otherwise be delayed + * by additional calls to __srcu_read_lock() using that old (soon + * to be new) value of ->srcu_idx. * * This sum-equality check and ordering also ensures that if * a given call to __srcu_read_lock() uses the new value of @@ -1914,8 +1913,8 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf) struct srcu_data *sdp;
sdp = per_cpu_ptr(ssp->sda, cpu); - u0 = data_race(atomic_long_read(&sdp->srcu_unlock_count[!idx])); - u1 = data_race(atomic_long_read(&sdp->srcu_unlock_count[idx])); + u0 = data_race(atomic_long_read(&sdp->srcu_ctrs[!idx].srcu_unlocks)); + u1 = data_race(atomic_long_read(&sdp->srcu_ctrs[idx].srcu_unlocks));
/* * Make sure that a lock is always counted if the corresponding @@ -1923,8 +1922,8 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf) */ smp_rmb();
- l0 = data_race(atomic_long_read(&sdp->srcu_lock_count[!idx])); - l1 = data_race(atomic_long_read(&sdp->srcu_lock_count[idx])); + l0 = data_race(atomic_long_read(&sdp->srcu_ctrs[!idx].srcu_locks)); + l1 = data_race(atomic_long_read(&sdp->srcu_ctrs[idx].srcu_locks));
c0 = l0 - u0; c1 = l1 - u1;
From: "Paul E. McKenney" paulmck@kernel.org
This commit causes SRCU readers to use ->srcu_ctrs for counter selection instead of ->srcu_idx. This takes another step towards array-indexing-free SRCU readers.
[ paulmck: Apply kernel test robot feedback. ]
Co-developed-by: Z qiang qiang.zhang1211@gmail.com Signed-off-by: Z qiang qiang.zhang1211@gmail.com Signed-off-by: Paul E. McKenney paulmck@kernel.org Tested-by: kernel test robot oliver.sang@intel.com Cc: Alexei Starovoitov ast@kernel.org Cc: Andrii Nakryiko andrii@kernel.org Cc: Peter Zijlstra peterz@infradead.org Cc: Kent Overstreet kent.overstreet@linux.dev Cc: bpf@vger.kernel.org Signed-off-by: Boqun Feng boqun.feng@gmail.com --- include/linux/srcutree.h | 9 +++++---- kernel/rcu/srcutree.c | 23 +++++++++++++---------- 2 files changed, 18 insertions(+), 14 deletions(-)
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index c794d599db5c..1b01ced61a45 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -101,6 +101,7 @@ struct srcu_usage { */ struct srcu_struct { unsigned int srcu_idx; /* Current rdr array element. */ + struct srcu_ctr __percpu *srcu_ctrp; struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */ struct lockdep_map dep_map; struct srcu_usage *srcu_sup; /* Update-side data. */ @@ -167,6 +168,7 @@ struct srcu_struct { #define __SRCU_STRUCT_INIT(name, usage_name, pcpu_name) \ { \ .sda = &pcpu_name, \ + .srcu_ctrp = &pcpu_name.srcu_ctrs[0], \ __SRCU_STRUCT_INIT_COMMON(name, usage_name) \ }
@@ -222,13 +224,12 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf); */ static inline int __srcu_read_lock_lite(struct srcu_struct *ssp) { - int idx; + struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite()."); - idx = READ_ONCE(ssp->srcu_idx) & 0x1; - this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter); /* Y */ + this_cpu_inc(scp->srcu_locks.counter); /* Y */ barrier(); /* Avoid leaking the critical section. */ - return idx; + return scp - &ssp->sda->srcu_ctrs[0]; }
/* diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index d7ee2f345e19..7efde1a2344e 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -253,8 +253,10 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0); INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu); ssp->srcu_sup->sda_is_static = is_static; - if (!is_static) + if (!is_static) { ssp->sda = alloc_percpu(struct srcu_data); + ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0]; + } if (!ssp->sda) goto err_free_sup; init_srcu_struct_data(ssp); @@ -742,12 +744,11 @@ EXPORT_SYMBOL_GPL(__srcu_check_read_flavor); */ int __srcu_read_lock(struct srcu_struct *ssp) { - int idx; + struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
- idx = READ_ONCE(ssp->srcu_idx) & 0x1; - this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_locks.counter); + this_cpu_inc(scp->srcu_locks.counter); smp_mb(); /* B */ /* Avoid leaking the critical section. */ - return idx; + return scp - &ssp->sda->srcu_ctrs[0]; } EXPORT_SYMBOL_GPL(__srcu_read_lock);
@@ -772,13 +773,12 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock); */ int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) { - int idx; - struct srcu_data *sdp = raw_cpu_ptr(ssp->sda); + struct srcu_ctr __percpu *scpp = READ_ONCE(ssp->srcu_ctrp); + struct srcu_ctr *scp = raw_cpu_ptr(scpp);
- idx = READ_ONCE(ssp->srcu_idx) & 0x1; - atomic_long_inc(&sdp->srcu_ctrs[idx].srcu_locks); + atomic_long_inc(&scp->srcu_locks); smp_mb__after_atomic(); /* B */ /* Avoid leaking the critical section. */ - return idx; + return scpp - &ssp->sda->srcu_ctrs[0]; } EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe);
@@ -1152,6 +1152,8 @@ static void srcu_flip(struct srcu_struct *ssp) smp_mb(); /* E */ /* Pairs with B and C. */
WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); // Flip the counter. + WRITE_ONCE(ssp->srcu_ctrp, + &ssp->sda->srcu_ctrs[!(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0])]);
/* * Ensure that if the updater misses an __srcu_read_unlock() @@ -2000,6 +2002,7 @@ static int srcu_module_coming(struct module *mod) ssp->sda = alloc_percpu(struct srcu_data); if (WARN_ON_ONCE(!ssp->sda)) return -ENOMEM; + ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0]; } return 0; }
From: "Paul E. McKenney" paulmck@kernel.org
This commit makes Tree SRCU updates independent of ->srcu_idx, then drop ->srcu_idx.
Signed-off-by: Paul E. McKenney paulmck@kernel.org Cc: Alexei Starovoitov ast@kernel.org Cc: Andrii Nakryiko andrii@kernel.org Cc: Peter Zijlstra peterz@infradead.org Cc: Kent Overstreet kent.overstreet@linux.dev Cc: bpf@vger.kernel.org Signed-off-by: Boqun Feng boqun.feng@gmail.com --- include/linux/srcutree.h | 1 - kernel/rcu/srcutree.c | 68 ++++++++++++++++++++-------------------- 2 files changed, 34 insertions(+), 35 deletions(-)
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 1b01ced61a45..6b7eba59f384 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -100,7 +100,6 @@ struct srcu_usage { * Per-SRCU-domain structure, similar in function to rcu_state. */ struct srcu_struct { - unsigned int srcu_idx; /* Current rdr array element. */ struct srcu_ctr __percpu *srcu_ctrp; struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */ struct lockdep_map dep_map; diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 7efde1a2344e..247bdf42fb54 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -246,7 +246,6 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) ssp->srcu_sup->node = NULL; mutex_init(&ssp->srcu_sup->srcu_cb_mutex); mutex_init(&ssp->srcu_sup->srcu_gp_mutex); - ssp->srcu_idx = 0; ssp->srcu_sup->srcu_gp_seq = SRCU_GP_SEQ_INITIAL_VAL; ssp->srcu_sup->srcu_barrier_seq = 0; mutex_init(&ssp->srcu_sup->srcu_barrier_mutex); @@ -510,38 +509,39 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) * If the locks are the same as the unlocks, then there must have * been no readers on this index at some point in this function. * But there might be more readers, as a task might have read - * the current ->srcu_idx but not yet have incremented its CPU's + * the current ->srcu_ctrp but not yet have incremented its CPU's * ->srcu_ctrs[idx].srcu_locks counter. In fact, it is possible * that most of the tasks have been preempted between fetching - * ->srcu_idx and incrementing ->srcu_ctrs[idx].srcu_locks. And there - * could be almost (ULONG_MAX / sizeof(struct task_struct)) tasks - * in a system whose address space was fully populated with memory. - * Call this quantity Nt. + * ->srcu_ctrp and incrementing ->srcu_ctrs[idx].srcu_locks. And + * there could be almost (ULONG_MAX / sizeof(struct task_struct)) + * tasks in a system whose address space was fully populated + * with memory. Call this quantity Nt. * - * So suppose that the updater is preempted at this point in the - * code for a long time. That now-preempted updater has already - * flipped ->srcu_idx (possibly during the preceding grace period), - * done an smp_mb() (again, possibly during the preceding grace - * period), and summed up the ->srcu_ctrs[idx].srcu_unlocks counters. - * How many times can a given one of the aforementioned Nt tasks - * increment the old ->srcu_idx value's ->srcu_ctrs[idx].srcu_locks - * counter, in the absence of nesting? + * So suppose that the updater is preempted at this + * point in the code for a long time. That now-preempted + * updater has already flipped ->srcu_ctrp (possibly during + * the preceding grace period), done an smp_mb() (again, + * possibly during the preceding grace period), and summed up + * the ->srcu_ctrs[idx].srcu_unlocks counters. How many times + * can a given one of the aforementioned Nt tasks increment the + * old ->srcu_ctrp value's ->srcu_ctrs[idx].srcu_locks counter, + * in the absence of nesting? * * It can clearly do so once, given that it has already fetched - * the old value of ->srcu_idx and is just about to use that + * the old value of ->srcu_ctrp and is just about to use that * value to index its increment of ->srcu_ctrs[idx].srcu_locks. * But as soon as it leaves that SRCU read-side critical section, * it will increment ->srcu_ctrs[idx].srcu_unlocks, which must - * follow the updater's above read from that same value. Thus, - * as soon the reading task does an smp_mb() and a later fetch from - * ->srcu_idx, that task will be guaranteed to get the new index. + * follow the updater's above read from that same value. Thus, + as soon the reading task does an smp_mb() and a later fetch from + * ->srcu_ctrp, that task will be guaranteed to get the new index. * Except that the increment of ->srcu_ctrs[idx].srcu_unlocks * in __srcu_read_unlock() is after the smp_mb(), and the fetch - * from ->srcu_idx in __srcu_read_lock() is before the smp_mb(). - * Thus, that task might not see the new value of ->srcu_idx until + * from ->srcu_ctrp in __srcu_read_lock() is before the smp_mb(). + * Thus, that task might not see the new value of ->srcu_ctrp until * the -second- __srcu_read_lock(), which in turn means that this * task might well increment ->srcu_ctrs[idx].srcu_locks for the - * old value of ->srcu_idx twice, not just once. + * old value of ->srcu_ctrp twice, not just once. * * However, it is important to note that a given smp_mb() takes * effect not just for the task executing it, but also for any @@ -1095,7 +1095,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, /* * Wait until all readers counted by array index idx complete, but * loop an additional time if there is an expedited grace period pending. - * The caller must ensure that ->srcu_idx is not changed while checking. + * The caller must ensure that ->srcu_ctrp is not changed while checking. */ static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount) { @@ -1113,14 +1113,14 @@ static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount) }
/* - * Increment the ->srcu_idx counter so that future SRCU readers will + * Increment the ->srcu_ctrp counter so that future SRCU readers will * use the other rank of the ->srcu_(un)lock_count[] arrays. This allows * us to wait for pre-existing readers in a starvation-free manner. */ static void srcu_flip(struct srcu_struct *ssp) { /* - * Because the flip of ->srcu_idx is executed only if the + * Because the flip of ->srcu_ctrp is executed only if the * preceding call to srcu_readers_active_idx_check() found that * the ->srcu_ctrs[].srcu_unlocks and ->srcu_ctrs[].srcu_locks sums * matched and because that summing uses atomic_long_read(), @@ -1128,15 +1128,15 @@ static void srcu_flip(struct srcu_struct *ssp) * summing and the WRITE_ONCE() in this call to srcu_flip(). * This ordering ensures that if this updater saw a given reader's * increment from __srcu_read_lock(), that reader was using a value - * of ->srcu_idx from before the previous call to srcu_flip(), + * of ->srcu_ctrp from before the previous call to srcu_flip(), * which should be quite rare. This ordering thus helps forward * progress because the grace period could otherwise be delayed * by additional calls to __srcu_read_lock() using that old (soon - * to be new) value of ->srcu_idx. + * to be new) value of ->srcu_ctrp. * * This sum-equality check and ordering also ensures that if * a given call to __srcu_read_lock() uses the new value of - * ->srcu_idx, this updater's earlier scans cannot have seen + * ->srcu_ctrp, this updater's earlier scans cannot have seen * that reader's increments, which is all to the good, because * this grace period need not wait on that reader. After all, * if those earlier scans had seen that reader, there would have @@ -1151,7 +1151,6 @@ static void srcu_flip(struct srcu_struct *ssp) */ smp_mb(); /* E */ /* Pairs with B and C. */
- WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); // Flip the counter. WRITE_ONCE(ssp->srcu_ctrp, &ssp->sda->srcu_ctrs[!(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0])]);
@@ -1466,8 +1465,9 @@ EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); * * Wait for the count to drain to zero of both indexes. To avoid the * possible starvation of synchronize_srcu(), it waits for the count of - * the index=((->srcu_idx & 1) ^ 1) to drain to zero at first, - * and then flip the srcu_idx and wait for the count of the other index. + * the index=!(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0]) to drain to zero + * at first, and then flip the ->srcu_ctrp and wait for the count of the + * other index. * * Can block; must be called from process context. * @@ -1693,7 +1693,7 @@ static void srcu_advance_state(struct srcu_struct *ssp)
/* * Because readers might be delayed for an extended period after - * fetching ->srcu_idx for their index, at any point in time there + * fetching ->srcu_ctrp for their index, at any point in time there * might well be readers using both idx=0 and idx=1. We therefore * need to wait for readers to clear from both index values before * invoking a callback. @@ -1721,7 +1721,7 @@ static void srcu_advance_state(struct srcu_struct *ssp) }
if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN1) { - idx = 1 ^ (ssp->srcu_idx & 1); + idx = !(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0]); if (!try_check_zero(ssp, idx, 1)) { mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex); return; /* readers present, retry later. */ @@ -1739,7 +1739,7 @@ static void srcu_advance_state(struct srcu_struct *ssp) * SRCU read-side critical sections are normally short, * so check at least twice in quick succession after a flip. */ - idx = 1 ^ (ssp->srcu_idx & 1); + idx = !(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0]); if (!try_check_zero(ssp, idx, 2)) { mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex); return; /* readers present, retry later. */ @@ -1897,7 +1897,7 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf) int ss_state = READ_ONCE(ssp->srcu_sup->srcu_size_state); int ss_state_idx = ss_state;
- idx = ssp->srcu_idx & 0x1; + idx = ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0]; if (ss_state < 0 || ss_state >= ARRAY_SIZE(srcu_size_state_name)) ss_state_idx = ARRAY_SIZE(srcu_size_state_name) - 1; pr_alert("%s%s Tree SRCU g%ld state %d (%s)",
From: "Paul E. McKenney" paulmck@kernel.org
Currently, srcu_get_delay() can be called concurrently, for example, by a CPU that is the first to request a new grace period and the CPU processing the current grace period. Although concurrent access is harmless, it unnecessarily expands the state space. Additionally, all calls to srcu_get_delay() are from slow paths.
This commit therefore protects all calls to srcu_get_delay() with ssp->srcu_sup->lock, which is already held on the invocation from the srcu_funnel_gp_start() function. While in the area, this commit also adds a lockdep_assert_held() to srcu_get_delay() itself.
Reported-by: syzbot+16a19b06125a2963eaee@syzkaller.appspotmail.com Signed-off-by: Paul E. McKenney paulmck@kernel.org Cc: Alexei Starovoitov ast@kernel.org Cc: Andrii Nakryiko andrii@kernel.org Cc: Peter Zijlstra peterz@infradead.org Cc: Kent Overstreet kent.overstreet@linux.dev Cc: bpf@vger.kernel.org Signed-off-by: Boqun Feng boqun.feng@gmail.com --- kernel/rcu/srcutree.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 247bdf42fb54..121dd290cae1 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -648,6 +648,7 @@ static unsigned long srcu_get_delay(struct srcu_struct *ssp) unsigned long jbase = SRCU_INTERVAL; struct srcu_usage *sup = ssp->srcu_sup;
+ lockdep_assert_held(&ACCESS_PRIVATE(ssp->srcu_sup, lock)); if (srcu_gp_is_expedited(ssp)) jbase = 0; if (rcu_seq_state(READ_ONCE(sup->srcu_gp_seq))) { @@ -675,9 +676,13 @@ static unsigned long srcu_get_delay(struct srcu_struct *ssp) void cleanup_srcu_struct(struct srcu_struct *ssp) { int cpu; + unsigned long delay; struct srcu_usage *sup = ssp->srcu_sup;
- if (WARN_ON(!srcu_get_delay(ssp))) + spin_lock_irq_rcu_node(ssp->srcu_sup); + delay = srcu_get_delay(ssp); + spin_unlock_irq_rcu_node(ssp->srcu_sup); + if (WARN_ON(!delay)) return; /* Just leak it! */ if (WARN_ON(srcu_readers_active(ssp))) return; /* Just leak it! */ @@ -1101,7 +1106,9 @@ static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount) { unsigned long curdelay;
+ spin_lock_irq_rcu_node(ssp->srcu_sup); curdelay = !srcu_get_delay(ssp); + spin_unlock_irq_rcu_node(ssp->srcu_sup);
for (;;) { if (srcu_readers_active_idx_check(ssp, idx)) @@ -1850,7 +1857,9 @@ static void process_srcu(struct work_struct *work) ssp = sup->srcu_ssp;
srcu_advance_state(ssp); + spin_lock_irq_rcu_node(ssp->srcu_sup); curdelay = srcu_get_delay(ssp); + spin_unlock_irq_rcu_node(ssp->srcu_sup); if (curdelay) { WRITE_ONCE(sup->reschedule_count, 0); } else {
From: "Paul E. McKenney" paulmck@kernel.org
This commit renames the srcu_check_read_flavor_lite() function to srcu_check_read_flavor_force() and adds a read_flavor argument in order to support an srcu_read_lock_fast() variant that is to avoid array indexing in both the lock and unlock primitives.
Signed-off-by: Paul E. McKenney paulmck@kernel.org Cc: Alexei Starovoitov ast@kernel.org Cc: Andrii Nakryiko andrii@kernel.org Cc: Peter Zijlstra peterz@infradead.org Cc: Kent Overstreet kent.overstreet@linux.dev Cc: bpf@vger.kernel.org Signed-off-by: Boqun Feng boqun.feng@gmail.com --- include/linux/srcu.h | 2 +- include/linux/srcutiny.h | 2 +- include/linux/srcutree.h | 10 ++++++---- 3 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h index f6f779b9d9ff..ca00b9af7c23 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -279,7 +279,7 @@ static inline int srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp) { int retval;
- srcu_check_read_flavor_lite(ssp); + srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_LITE); retval = __srcu_read_lock_lite(ssp); rcu_try_lock_acquire(&ssp->dep_map); return retval; diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index 1321da803274..b347bde1aac2 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -82,7 +82,7 @@ static inline void srcu_barrier(struct srcu_struct *ssp) }
#define srcu_check_read_flavor(ssp, read_flavor) do { } while (0) -#define srcu_check_read_flavor_lite(ssp) do { } while (0) +#define srcu_check_read_flavor_force(ssp, read_flavor) do { } while (0)
/* Defined here to avoid size increase for non-torture kernels. */ static inline void srcu_torture_stats_print(struct srcu_struct *ssp, diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 6b7eba59f384..e29cc57eac81 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -251,16 +251,18 @@ static inline void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx)
void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor);
-// Record _lite() usage even for CONFIG_PROVE_RCU=n kernels. -static inline void srcu_check_read_flavor_lite(struct srcu_struct *ssp) +// Record reader usage even for CONFIG_PROVE_RCU=n kernels. This is +// needed only for flavors that require grace-period smp_mb() calls to be +// promoted to synchronize_rcu(). +static inline void srcu_check_read_flavor_force(struct srcu_struct *ssp, int read_flavor) { struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
- if (likely(READ_ONCE(sdp->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE)) + if (likely(READ_ONCE(sdp->srcu_reader_flavor) & read_flavor)) return;
// Note that the cmpxchg() in __srcu_check_read_flavor() is fully ordered. - __srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE); + __srcu_check_read_flavor(ssp, read_flavor); }
// Record non-_lite() usage only for CONFIG_PROVE_RCU=y kernels.
From: "Paul E. McKenney" paulmck@kernel.org
This commit switches from a direct test of SRCU_READ_FLAVOR_LITE to a new SRCU_READ_FLAVOR_SLOWGP macro to check for substituting synchronize_rcu() for smp_mb() in SRCU grace periods. Right now, SRCU_READ_FLAVOR_SLOWGP is exactly SRCU_READ_FLAVOR_LITE, but the addition of the _fast() flavor of SRCU will change that.
Signed-off-by: Paul E. McKenney paulmck@kernel.org Cc: Alexei Starovoitov ast@kernel.org Cc: Andrii Nakryiko andrii@kernel.org Cc: Peter Zijlstra peterz@infradead.org Cc: Kent Overstreet kent.overstreet@linux.dev Cc: bpf@vger.kernel.org Signed-off-by: Boqun Feng boqun.feng@gmail.com --- include/linux/srcu.h | 3 +++ kernel/rcu/srcutree.c | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h index ca00b9af7c23..505f5bdce444 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -49,6 +49,9 @@ int init_srcu_struct(struct srcu_struct *ssp); #define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite(). #define SRCU_READ_FLAVOR_ALL (SRCU_READ_FLAVOR_NORMAL | SRCU_READ_FLAVOR_NMI | \ SRCU_READ_FLAVOR_LITE) // All of the above. +#define SRCU_READ_FLAVOR_SLOWGP SRCU_READ_FLAVOR_LITE + // Flavors requiring synchronize_rcu() + // instead of smp_mb().
#ifdef CONFIG_TINY_SRCU #include <linux/srcutiny.h> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 121dd290cae1..8b5c50bc98e5 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -449,7 +449,7 @@ static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, uns } WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)), "Mixed reader flavors for srcu_struct at %ps.\n", ssp); - if (mask & SRCU_READ_FLAVOR_LITE && !gp) + if (mask & SRCU_READ_FLAVOR_SLOWGP && !gp) return false; return sum == unlocks; } @@ -487,7 +487,7 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) unsigned long unlocks;
unlocks = srcu_readers_unlock_idx(ssp, idx, &rdm); - did_gp = !!(rdm & SRCU_READ_FLAVOR_LITE); + did_gp = !!(rdm & SRCU_READ_FLAVOR_SLOWGP);
/* * Make sure that a lock is always counted if the corresponding @@ -1205,7 +1205,7 @@ static bool srcu_should_expedite(struct srcu_struct *ssp)
check_init_srcu_struct(ssp); /* If _lite() readers, don't do unsolicited expediting. */ - if (this_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE) + if (this_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_SLOWGP) return false; /* If the local srcu_data structure has callbacks, not idle. */ sdp = raw_cpu_ptr(ssp->sda);
From: "Paul E. McKenney" paulmck@kernel.org
This commit abstracts the srcu_read_lock*() pointer-to-integer conversion into a new __srcu_ptr_to_ctr(). This will be used in rcutorture for testing an srcu_read_lock_fast() that returns a pointer rather than an integer.
Signed-off-by: Paul E. McKenney paulmck@kernel.org Cc: Alexei Starovoitov ast@kernel.org Cc: Andrii Nakryiko andrii@kernel.org Cc: Peter Zijlstra peterz@infradead.org Cc: Kent Overstreet kent.overstreet@linux.dev Cc: bpf@vger.kernel.org Signed-off-by: Boqun Feng boqun.feng@gmail.com --- include/linux/srcutree.h | 9 ++++++++- kernel/rcu/srcutree.c | 4 ++-- 2 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index e29cc57eac81..f41bb3a55a04 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -211,6 +211,13 @@ void synchronize_srcu_expedited(struct srcu_struct *ssp); void srcu_barrier(struct srcu_struct *ssp); void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
+// Converts a per-CPU pointer to an ->srcu_ctrs[] array element to that +// element's index. +static inline bool __srcu_ptr_to_ctr(struct srcu_struct *ssp, struct srcu_ctr __percpu *scpp) +{ + return scpp - &ssp->sda->srcu_ctrs[0]; +} + /* * Counts the new reader in the appropriate per-CPU element of the * srcu_struct. Returns an index that must be passed to the matching @@ -228,7 +235,7 @@ static inline int __srcu_read_lock_lite(struct srcu_struct *ssp) RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite()."); this_cpu_inc(scp->srcu_locks.counter); /* Y */ barrier(); /* Avoid leaking the critical section. */ - return scp - &ssp->sda->srcu_ctrs[0]; + return __srcu_ptr_to_ctr(ssp, scp); }
/* diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 8b5c50bc98e5..a91651866485 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -753,7 +753,7 @@ int __srcu_read_lock(struct srcu_struct *ssp)
this_cpu_inc(scp->srcu_locks.counter); smp_mb(); /* B */ /* Avoid leaking the critical section. */ - return scp - &ssp->sda->srcu_ctrs[0]; + return __srcu_ptr_to_ctr(ssp, scp); } EXPORT_SYMBOL_GPL(__srcu_read_lock);
@@ -783,7 +783,7 @@ int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
atomic_long_inc(&scp->srcu_locks); smp_mb__after_atomic(); /* B */ /* Avoid leaking the critical section. */ - return scpp - &ssp->sda->srcu_ctrs[0]; + return __srcu_ptr_to_ctr(ssp, scpp); } EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe);
From: "Paul E. McKenney" paulmck@kernel.org
This commit abstracts the srcu_read_unlock*() integer-to-pointer conversion into a new __srcu_ctr_to_ptr(). This will be used in rcutorture for testing an srcu_read_unlock_fast() that avoids array-indexing overhead by taking a pointer rather than an integer.
[ paulmck: Apply kernel test robot feedback. ]
Signed-off-by: Paul E. McKenney paulmck@kernel.org Cc: Alexei Starovoitov ast@kernel.org Cc: Andrii Nakryiko andrii@kernel.org Cc: Peter Zijlstra peterz@infradead.org Cc: Kent Overstreet kent.overstreet@linux.dev Cc: bpf@vger.kernel.org Signed-off-by: Boqun Feng boqun.feng@gmail.com --- include/linux/srcutree.h | 9 ++++++++- kernel/rcu/srcutree.c | 6 ++---- 2 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index f41bb3a55a04..55fa400624bb 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -218,6 +218,13 @@ static inline bool __srcu_ptr_to_ctr(struct srcu_struct *ssp, struct srcu_ctr __ return scpp - &ssp->sda->srcu_ctrs[0]; }
+// Converts an integer to a per-CPU pointer to the corresponding +// ->srcu_ctrs[] array element. +static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ssp, int idx) +{ + return &ssp->sda->srcu_ctrs[idx]; +} + /* * Counts the new reader in the appropriate per-CPU element of the * srcu_struct. Returns an index that must be passed to the matching @@ -252,7 +259,7 @@ static inline int __srcu_read_lock_lite(struct srcu_struct *ssp) static inline void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) { barrier(); /* Avoid leaking the critical section. */ - this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_unlocks.counter); /* Z */ + this_cpu_inc(__srcu_ctr_to_ptr(ssp, idx)->srcu_unlocks.counter); /* Z */ RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_lite()."); }
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index a91651866485..7a8ace83c98d 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -765,7 +765,7 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock); void __srcu_read_unlock(struct srcu_struct *ssp, int idx) { smp_mb(); /* C */ /* Avoid leaking the critical section. */ - this_cpu_inc(ssp->sda->srcu_ctrs[idx].srcu_unlocks.counter); + this_cpu_inc(__srcu_ctr_to_ptr(ssp, idx)->srcu_unlocks.counter); } EXPORT_SYMBOL_GPL(__srcu_read_unlock);
@@ -794,10 +794,8 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe); */ void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) { - struct srcu_data *sdp = raw_cpu_ptr(ssp->sda); - smp_mb__before_atomic(); /* C */ /* Avoid leaking the critical section. */ - atomic_long_inc(&sdp->srcu_ctrs[idx].srcu_unlocks); + atomic_long_inc(&raw_cpu_ptr(__srcu_ctr_to_ptr(ssp, idx))->srcu_unlocks); } EXPORT_SYMBOL_GPL(__srcu_read_unlock_nmisafe);
From: "Paul E. McKenney" paulmck@kernel.org
There are a couple of definitions under "#ifdef CONFIG_TINY_SRCU" in include/linux/srcu.h. There is no point in them being there, so this commit moves them to include/linux/srcutiny.h and include/linux/srcutree.c, thus eliminating that #ifdef.
Signed-off-by: Paul E. McKenney paulmck@kernel.org Cc: Alexei Starovoitov ast@kernel.org Cc: Andrii Nakryiko andrii@kernel.org Cc: Peter Zijlstra peterz@infradead.org Cc: Kent Overstreet kent.overstreet@linux.dev Cc: bpf@vger.kernel.org Signed-off-by: Boqun Feng boqun.feng@gmail.com --- include/linux/srcu.h | 10 +--------- include/linux/srcutiny.h | 3 +++ include/linux/srcutree.h | 1 + 3 files changed, 5 insertions(+), 9 deletions(-)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 505f5bdce444..2bd0e24e9b55 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -52,6 +52,7 @@ int init_srcu_struct(struct srcu_struct *ssp); #define SRCU_READ_FLAVOR_SLOWGP SRCU_READ_FLAVOR_LITE // Flavors requiring synchronize_rcu() // instead of smp_mb(). +void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp);
#ifdef CONFIG_TINY_SRCU #include <linux/srcutiny.h> @@ -64,15 +65,6 @@ int init_srcu_struct(struct srcu_struct *ssp); void call_srcu(struct srcu_struct *ssp, struct rcu_head *head, void (*func)(struct rcu_head *head)); void cleanup_srcu_struct(struct srcu_struct *ssp); -int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp); -void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp); -#ifdef CONFIG_TINY_SRCU -#define __srcu_read_lock_lite __srcu_read_lock -#define __srcu_read_unlock_lite __srcu_read_unlock -#else // #ifdef CONFIG_TINY_SRCU -int __srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp); -void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) __releases(ssp); -#endif // #else // #ifdef CONFIG_TINY_SRCU void synchronize_srcu(struct srcu_struct *ssp);
#define SRCU_GET_STATE_COMPLETED 0x1 diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index b347bde1aac2..a6194f7a7e34 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -71,6 +71,9 @@ static inline int __srcu_read_lock(struct srcu_struct *ssp) return idx; }
+#define __srcu_read_lock_lite __srcu_read_lock +#define __srcu_read_unlock_lite __srcu_read_unlock + static inline void synchronize_srcu_expedited(struct srcu_struct *ssp) { synchronize_srcu(ssp); diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 55fa400624bb..ef3065c0cadc 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -207,6 +207,7 @@ struct srcu_struct { #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static)
+int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp); void synchronize_srcu_expedited(struct srcu_struct *ssp); void srcu_barrier(struct srcu_struct *ssp); void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
From: "Paul E. McKenney" paulmck@kernel.org
This commit adds srcu_read_{,un}lock_fast(), which is similar to srcu_read_{,un}lock_lite(), but avoids the array-indexing and pointer-following overhead. On a microbenchmark featuring tight loops around empty readers, this results in about a 20% speedup compared to RCU Tasks Trace on my x86 laptop.
Please note that SRCU-fast has drawbacks compared to RCU Tasks Trace, including:
o Lack of CPU stall warnings. o SRCU-fast readers permitted only where rcu_is_watching(). o A pointer-sized return value from srcu_read_lock_fast() must be passed to the corresponding srcu_read_unlock_fast(). o In the absence of readers, a synchronize_srcu() having _fast() readers will incur the latency of at least two normal RCU grace periods. o RCU Tasks Trace priority boosting could be easily added. Boosting SRCU readers is more difficult.
SRCU-fast also has a drawback compared to SRCU-lite, namely that the return value from srcu_read_lock_fast()-fast is a 64-bit pointer and that from srcu_read_lock_lite() is only a 32-bit int.
[ paulmck: Apply feedback from Akira Yokosawa. ]
Signed-off-by: Paul E. McKenney paulmck@kernel.org Cc: Alexei Starovoitov ast@kernel.org Cc: Andrii Nakryiko andrii@kernel.org Cc: Peter Zijlstra peterz@infradead.org Cc: Kent Overstreet kent.overstreet@linux.dev Cc: bpf@vger.kernel.org Signed-off-by: Boqun Feng boqun.feng@gmail.com --- include/linux/srcu.h | 47 ++++++++++++++++++++++++++++++++++++++-- include/linux/srcutiny.h | 22 +++++++++++++++++++ include/linux/srcutree.h | 38 ++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+), 2 deletions(-)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 2bd0e24e9b55..63bddc301423 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -47,9 +47,10 @@ int init_srcu_struct(struct srcu_struct *ssp); #define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock(). #define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe(). #define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite(). +#define SRCU_READ_FLAVOR_FAST 0x8 // srcu_read_lock_fast(). #define SRCU_READ_FLAVOR_ALL (SRCU_READ_FLAVOR_NORMAL | SRCU_READ_FLAVOR_NMI | \ - SRCU_READ_FLAVOR_LITE) // All of the above. -#define SRCU_READ_FLAVOR_SLOWGP SRCU_READ_FLAVOR_LITE + SRCU_READ_FLAVOR_LITE | SRCU_READ_FLAVOR_FAST) // All of the above. +#define SRCU_READ_FLAVOR_SLOWGP (SRCU_READ_FLAVOR_LITE | SRCU_READ_FLAVOR_FAST) // Flavors requiring synchronize_rcu() // instead of smp_mb(). void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp); @@ -253,6 +254,33 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) return retval; }
+/** + * srcu_read_lock_fast - register a new reader for an SRCU-protected structure. + * @ssp: srcu_struct in which to register the new reader. + * + * Enter an SRCU read-side critical section, but for a light-weight + * smp_mb()-free reader. See srcu_read_lock() for more information. + * + * If srcu_read_lock_fast() is ever used on an srcu_struct structure, + * then none of the other flavors may be used, whether before, during, + * or after. Note that grace-period auto-expediting is disabled for _fast + * srcu_struct structures because auto-expedited grace periods invoke + * synchronize_rcu_expedited(), IPIs and all. + * + * Note that srcu_read_lock_fast() can be invoked only from those contexts + * where RCU is watching, that is, from contexts where it would be legal + * to invoke rcu_read_lock(). Otherwise, lockdep will complain. + */ +static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct *ssp) __acquires(ssp) +{ + struct srcu_ctr __percpu *retval; + + srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST); + retval = __srcu_read_lock_fast(ssp); + rcu_try_lock_acquire(&ssp->dep_map); + return retval; +} + /** * srcu_read_lock_lite - register a new reader for an SRCU-protected structure. * @ssp: srcu_struct in which to register the new reader. @@ -356,6 +384,21 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) __srcu_read_unlock(ssp, idx); }
+/** + * srcu_read_unlock_fast - unregister a old reader from an SRCU-protected structure. + * @ssp: srcu_struct in which to unregister the old reader. + * @scp: return value from corresponding srcu_read_lock_fast(). + * + * Exit a light-weight SRCU read-side critical section. + */ +static inline void srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) + __releases(ssp) +{ + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST); + srcu_lock_release(&ssp->dep_map); + __srcu_read_unlock_fast(ssp, scp); +} + /** * srcu_read_unlock_lite - unregister a old reader from an SRCU-protected structure. * @ssp: srcu_struct in which to unregister the old reader. diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index a6194f7a7e34..e271f9f96bfc 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -71,6 +71,28 @@ static inline int __srcu_read_lock(struct srcu_struct *ssp) return idx; }
+struct srcu_ctr; + +static inline bool __srcu_ptr_to_ctr(struct srcu_struct *ssp, struct srcu_ctr __percpu *scpp) +{ + return (int)(intptr_t)(struct srcu_ctr __force __kernel *)scpp; +} + +static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ssp, int idx) +{ + return (struct srcu_ctr __percpu *)(intptr_t)idx; +} + +static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct *ssp) +{ + return __srcu_ctr_to_ptr(ssp, __srcu_read_lock(ssp)); +} + +static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) +{ + __srcu_read_unlock(ssp, __srcu_ptr_to_ctr(ssp, scp)); +} + #define __srcu_read_lock_lite __srcu_read_lock #define __srcu_read_unlock_lite __srcu_read_unlock
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index ef3065c0cadc..bdc467efce3a 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -226,6 +226,44 @@ static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ss return &ssp->sda->srcu_ctrs[idx]; }
+/* + * Counts the new reader in the appropriate per-CPU element of the + * srcu_struct. Returns a pointer that must be passed to the matching + * srcu_read_unlock_fast(). + * + * Note that this_cpu_inc() is an RCU read-side critical section either + * because it disables interrupts, because it is a single instruction, + * or because it is a read-modify-write atomic operation, depending on + * the whims of the architecture. + */ +static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct *ssp) +{ + struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp); + + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_fast()."); + this_cpu_inc(scp->srcu_locks.counter); /* Y */ + barrier(); /* Avoid leaking the critical section. */ + return scp; +} + +/* + * Removes the count for the old reader from the appropriate + * per-CPU element of the srcu_struct. Note that this may well be a + * different CPU than that which was incremented by the corresponding + * srcu_read_lock_fast(), but it must be within the same task. + * + * Note that this_cpu_inc() is an RCU read-side critical section either + * because it disables interrupts, because it is a single instruction, + * or because it is a read-modify-write atomic operation, depending on + * the whims of the architecture. + */ +static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) +{ + barrier(); /* Avoid leaking the critical section. */ + this_cpu_inc(scp->srcu_unlocks.counter); /* Z */ + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast()."); +} + /* * Counts the new reader in the appropriate per-CPU element of the * srcu_struct. Returns an index that must be passed to the matching
From: "Paul E. McKenney" paulmck@kernel.org
This commit permits rcutorture to test srcu_read_{,un}lock_fast(), which is specified by the rcutorture.reader_flavor=0x8 kernel boot parameter.
Signed-off-by: Paul E. McKenney paulmck@kernel.org Cc: Alexei Starovoitov ast@kernel.org Cc: Andrii Nakryiko andrii@kernel.org Cc: Peter Zijlstra peterz@infradead.org Cc: Kent Overstreet kent.overstreet@linux.dev Cc: bpf@vger.kernel.org Signed-off-by: Boqun Feng boqun.feng@gmail.com --- kernel/rcu/rcutorture.c | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 1d2de50fb5d6..1bd3eaa0b8e7 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -677,6 +677,7 @@ static void srcu_get_gp_data(int *flags, unsigned long *gp_seq) static int srcu_torture_read_lock(void) { int idx; + struct srcu_ctr __percpu *scp; int ret = 0;
if ((reader_flavor & SRCU_READ_FLAVOR_NORMAL) || !(reader_flavor & SRCU_READ_FLAVOR_ALL)) { @@ -694,6 +695,12 @@ static int srcu_torture_read_lock(void) WARN_ON_ONCE(idx & ~0x1); ret += idx << 2; } + if (reader_flavor & SRCU_READ_FLAVOR_FAST) { + scp = srcu_read_lock_fast(srcu_ctlp); + idx = __srcu_ptr_to_ctr(srcu_ctlp, scp); + WARN_ON_ONCE(idx & ~0x1); + ret += idx << 3; + } return ret; }
@@ -719,6 +726,8 @@ srcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp) static void srcu_torture_read_unlock(int idx) { WARN_ON_ONCE((reader_flavor && (idx & ~reader_flavor)) || (!reader_flavor && (idx & ~0x1))); + if (reader_flavor & SRCU_READ_FLAVOR_FAST) + srcu_read_unlock_fast(srcu_ctlp, __srcu_ctr_to_ptr(srcu_ctlp, (idx & 0x8) >> 3)); if (reader_flavor & SRCU_READ_FLAVOR_LITE) srcu_read_unlock_lite(srcu_ctlp, (idx & 0x4) >> 2); if (reader_flavor & SRCU_READ_FLAVOR_NMI)
From: "Paul E. McKenney" paulmck@kernel.org
This commit creates a new srcu-fast option for the refscale.scale_type module parameter that selects srcu_read_lock_fast() and srcu_read_unlock_fast().
Signed-off-by: Paul E. McKenney paulmck@kernel.org Cc: Alexei Starovoitov ast@kernel.org Cc: Andrii Nakryiko andrii@kernel.org Cc: Peter Zijlstra peterz@infradead.org Cc: Kent Overstreet kent.overstreet@linux.dev Cc: bpf@vger.kernel.org Signed-off-by: Boqun Feng boqun.feng@gmail.com --- kernel/rcu/refscale.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-)
diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c index 1b47376acdc4..f11a7c2af778 100644 --- a/kernel/rcu/refscale.c +++ b/kernel/rcu/refscale.c @@ -216,6 +216,36 @@ static const struct ref_scale_ops srcu_ops = { .name = "srcu" };
+static void srcu_fast_ref_scale_read_section(const int nloops) +{ + int i; + struct srcu_ctr __percpu *scp; + + for (i = nloops; i >= 0; i--) { + scp = srcu_read_lock_fast(srcu_ctlp); + srcu_read_unlock_fast(srcu_ctlp, scp); + } +} + +static void srcu_fast_ref_scale_delay_section(const int nloops, const int udl, const int ndl) +{ + int i; + struct srcu_ctr __percpu *scp; + + for (i = nloops; i >= 0; i--) { + scp = srcu_read_lock_fast(srcu_ctlp); + un_delay(udl, ndl); + srcu_read_unlock_fast(srcu_ctlp, scp); + } +} + +static const struct ref_scale_ops srcu_fast_ops = { + .init = rcu_sync_scale_init, + .readsection = srcu_fast_ref_scale_read_section, + .delaysection = srcu_fast_ref_scale_delay_section, + .name = "srcu-fast" +}; + static void srcu_lite_ref_scale_read_section(const int nloops) { int i; @@ -1163,7 +1193,7 @@ ref_scale_init(void) long i; int firsterr = 0; static const struct ref_scale_ops *scale_ops[] = { - &rcu_ops, &srcu_ops, &srcu_lite_ops, RCU_TRACE_OPS RCU_TASKS_OPS + &rcu_ops, &srcu_ops, &srcu_fast_ops, &srcu_lite_ops, RCU_TRACE_OPS RCU_TASKS_OPS &refcnt_ops, &rwlock_ops, &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &sched_clock_ops, &clock_ops, &jiffies_ops, &typesafe_ref_ops, &typesafe_lock_ops, &typesafe_seqlock_ops,
From: "Paul E. McKenney" paulmck@kernel.org
This commit causes the rcutorture SRCU-P scenario use the srcu_read_lock_fast() and srcu_read_unlock_fast() functions. This will cause these two functions to be regularly tested by several developers (myself included), for example, those who use torture.sh as an RCU acceptance test.
Signed-off-by: Paul E. McKenney paulmck@kernel.org Signed-off-by: Boqun Feng boqun.feng@gmail.com --- tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot | 1 + 1 file changed, 1 insertion(+)
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot index 2db39f298d18..fb61703690cb 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot @@ -2,3 +2,4 @@ rcutorture.torture_type=srcud rcupdate.rcu_self_test=1 rcutorture.fwd_progress=3 srcutree.big_cpu_lim=5 +rcutorture.reader_flavor=0x8
From: "Paul E. McKenney" paulmck@kernel.org
The srcu_read_unlock_lite() and srcu_read_unlock_nmisafe() both say that their idx parameters must come from srcu_read_lock(). This would be bad, because a given srcu_struct structure may be used only with one flavor of SRCU reader. This commit therefore updates the srcu_read_unlock_lite() kernel-doc header to say that its idx parameter must be obtained from srcu_read_lock_lite() and the srcu_read_unlock_nmisafe() kernel-doc header to say that its idx parameter must be obtained from srcu_read_lock_nmisafe().
Signed-off-by: Paul E. McKenney paulmck@kernel.org Signed-off-by: Boqun Feng boqun.feng@gmail.com --- include/linux/srcu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 63bddc301423..a0df80baaccf 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -402,7 +402,7 @@ static inline void srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ct /** * srcu_read_unlock_lite - unregister a old reader from an SRCU-protected structure. * @ssp: srcu_struct in which to unregister the old reader. - * @idx: return value from corresponding srcu_read_lock(). + * @idx: return value from corresponding srcu_read_lock_lite(). * * Exit a light-weight SRCU read-side critical section. */ @@ -418,7 +418,7 @@ static inline void srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) /** * srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure. * @ssp: srcu_struct in which to unregister the old reader. - * @idx: return value from corresponding srcu_read_lock(). + * @idx: return value from corresponding srcu_read_lock_nmisafe(). * * Exit an SRCU read-side critical section, but in an NMI-safe manner. */
From: "Paul E. McKenney" paulmck@kernel.org
This commit adds a sentence to the srcu_down_read() function's kernel-doc header noting that it is permissible to use srcu_down_read() and srcu_read_lock() on the same srcu_struct, even concurrently.
Signed-off-by: Paul E. McKenney paulmck@kernel.org Cc: Alexei Starovoitov ast@kernel.org Cc: Andrii Nakryiko andrii@kernel.org Cc: Peter Zijlstra peterz@infradead.org Cc: Kent Overstreet kent.overstreet@linux.dev Cc: bpf@vger.kernel.org Signed-off-by: Boqun Feng boqun.feng@gmail.com --- include/linux/srcu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h index a0df80baaccf..317eab82a5f0 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -359,7 +359,8 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) * srcu_down_read() nor srcu_up_read() may be invoked from an NMI handler. * * Calls to srcu_down_read() may be nested, similar to the manner in - * which calls to down_read() may be nested. + * which calls to down_read() may be nested. The same srcu_struct may be + * used concurrently by srcu_down_read() and srcu_read_lock(). */ static inline int srcu_down_read(struct srcu_struct *ssp) __acquires(ssp) {
From: "Paul E. McKenney" paulmck@kernel.org
A pair of matching srcu_read_lock_fast() and srcu_read_unlock_fast() invocations must take place within the same context, for example, within the same task. Otherwise, lockdep complains, as is the right thing to do for most use cases.
However, there are use cases involving tracing (for example, uretprobes) in which an SRCU reader needs to begin in one task and end in a timer handler, which might interrupt some other task. This commit therefore supplies the semaphore-like srcu_down_read_fast() and srcu_up_read_fast() functions, which act like srcu_read_lock_fast() and srcu_read_unlock_fast(), but permitting srcu_up_read_fast() to be invoked in a different context than was the matching srcu_down_read_fast().
Neither srcu_down_read_fast() nor srcu_up_read_fast() may be invoked from an NMI handler.
Reported-by: Andrii Nakryiko andrii@kernel.org Signed-off-by: Paul E. McKenney paulmck@kernel.org Cc: Alexei Starovoitov ast@kernel.org Cc: Peter Zijlstra peterz@infradead.org Cc: Kent Overstreet kent.overstreet@linux.dev Cc: bpf@vger.kernel.org Signed-off-by: Boqun Feng boqun.feng@gmail.com --- include/linux/srcu.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 317eab82a5f0..900b0d5c05f5 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -281,6 +281,24 @@ static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct * return retval; }
+/** + * srcu_down_read_fast - register a new reader for an SRCU-protected structure. + * @ssp: srcu_struct in which to register the new reader. + * + * Enter a semaphore-like SRCU read-side critical section, but for + * a light-weight smp_mb()-free reader. See srcu_read_lock_fast() and + * srcu_down_read() for more information. + * + * The same srcu_struct may be used concurrently by srcu_down_read_fast() + * and srcu_read_lock_fast(). + */ +static inline struct srcu_ctr __percpu *srcu_down_read_fast(struct srcu_struct *ssp) __acquires(ssp) +{ + WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && in_nmi()); + srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST); + return __srcu_read_lock_fast(ssp); +} + /** * srcu_read_lock_lite - register a new reader for an SRCU-protected structure. * @ssp: srcu_struct in which to register the new reader. @@ -400,6 +418,22 @@ static inline void srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ct __srcu_read_unlock_fast(ssp, scp); }
+/** + * srcu_up_read_fast - unregister a old reader from an SRCU-protected structure. + * @ssp: srcu_struct in which to unregister the old reader. + * @scp: return value from corresponding srcu_read_lock_fast(). + * + * Exit an SRCU read-side critical section, but not necessarily from + * the same context as the maching srcu_down_read_fast(). + */ +static inline void srcu_up_read_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) + __releases(ssp) +{ + WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && in_nmi()); + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST); + __srcu_read_unlock_fast(ssp, scp); +} + /** * srcu_read_unlock_lite - unregister a old reader from an SRCU-protected structure. * @ssp: srcu_struct in which to unregister the old reader.
From: "Paul E. McKenney" paulmck@kernel.org
BPF uses rcu_read_lock_trace() in NMI context, so srcu_read_lock_fast() must be NMI-safe if it is to have any chance of addressing RCU Tasks Trace use cases. This commit therefore causes srcu_read_lock_fast() and srcu_read_unlock_fast() to use atomic_long_inc() instead of this_cpu_inc() on architectures that support NMIs but do not have NMI-safe implementations of this_cpu_inc(). Note that both x86 and arm64 have NMI-safe implementations of this_cpu_inc(), and thus do not pay the performance penalty inherent in atomic_inc_long().
It is tempting to use this trick to fold srcu_read_lock_nmisafe() into srcu_read_lock(), but this would need careful thought, review, and performance analysis. Though those smp_mb() calls might well make performance a non-issue.
Reported-by: Alexei Starovoitov ast@kernel.org Signed-off-by: Paul E. McKenney paulmck@kernel.org Signed-off-by: Boqun Feng boqun.feng@gmail.com --- include/linux/srcutree.h | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-)
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index bdc467efce3a..8bed7e6cc4c1 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -231,17 +231,24 @@ static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ss * srcu_struct. Returns a pointer that must be passed to the matching * srcu_read_unlock_fast(). * - * Note that this_cpu_inc() is an RCU read-side critical section either - * because it disables interrupts, because it is a single instruction, - * or because it is a read-modify-write atomic operation, depending on - * the whims of the architecture. + * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side + * critical sections either because they disables interrupts, because they + * are a single instruction, or because they are a read-modify-write atomic + * operation, depending on the whims of the architecture. + * + * This means that __srcu_read_lock_fast() is not all that fast + * on architectures that support NMIs but do not supply NMI-safe + * implementations of this_cpu_inc(). */ static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct *ssp) { struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_fast()."); - this_cpu_inc(scp->srcu_locks.counter); /* Y */ + if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) + this_cpu_inc(scp->srcu_locks.counter); /* Y */ + else + atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks)); /* Z */ barrier(); /* Avoid leaking the critical section. */ return scp; } @@ -252,15 +259,22 @@ static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct * different CPU than that which was incremented by the corresponding * srcu_read_lock_fast(), but it must be within the same task. * - * Note that this_cpu_inc() is an RCU read-side critical section either - * because it disables interrupts, because it is a single instruction, - * or because it is a read-modify-write atomic operation, depending on - * the whims of the architecture. + * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side + * critical sections either because they disables interrupts, because they + * are a single instruction, or because they are a read-modify-write atomic + * operation, depending on the whims of the architecture. + * + * This means that __srcu_read_unlock_fast() is not all that fast + * on architectures that support NMIs but do not supply NMI-safe + * implementations of this_cpu_inc(). */ static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) { barrier(); /* Avoid leaking the critical section. */ - this_cpu_inc(scp->srcu_unlocks.counter); /* Z */ + if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) + this_cpu_inc(scp->srcu_unlocks.counter); /* Z */ + else + atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks)); /* Z */ RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast()."); }
linux-kselftest-mirror@lists.linaro.org