From: "Uladzislau Rezki (Sony)" urezki@gmail.com
From: Uladzislau Rezki urezki@gmail.com
commit 3c5d61ae919cc377c71118ccc76fa6e8518023f8 upstream.
Add a kvfree_rcu_barrier() function. It waits until all in-flight pointers are freed over RCU machinery. It does not wait any GP completion and it is within its right to return immediately if there are no outstanding pointers.
This function is useful when there is a need to guarantee that a memory is fully freed before destroying memory caches. For example, during unloading a kernel module.
Signed-off-by: Uladzislau Rezki (Sony) urezki@gmail.com Signed-off-by: Vlastimil Babka vbabka@suse.cz --- include/linux/rcutiny.h | 5 ++ include/linux/rcutree.h | 1 + kernel/rcu/tree.c | 109 +++++++++++++++++++++++++++++++++++++--- 3 files changed, 107 insertions(+), 8 deletions(-)
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index d9ac7b136aea..522123050ff8 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -111,6 +111,11 @@ static inline void __kvfree_call_rcu(struct rcu_head *head, void *ptr) kvfree(ptr); }
+static inline void kvfree_rcu_barrier(void) +{ + rcu_barrier(); +} + #ifdef CONFIG_KASAN_GENERIC void kvfree_call_rcu(struct rcu_head *head, void *ptr); #else diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 254244202ea9..58e7db80f3a8 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -35,6 +35,7 @@ static inline void rcu_virt_note_context_switch(void)
void synchronize_rcu_expedited(void); void kvfree_call_rcu(struct rcu_head *head, void *ptr); +void kvfree_rcu_barrier(void);
void rcu_barrier(void); void rcu_momentary_dyntick_idle(void); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index e641cc681901..be00aac5f4e7 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3584,18 +3584,15 @@ kvfree_rcu_drain_ready(struct kfree_rcu_cpu *krcp) }
/* - * This function is invoked after the KFREE_DRAIN_JIFFIES timeout. + * Return: %true if a work is queued, %false otherwise. */ -static void kfree_rcu_monitor(struct work_struct *work) +static bool +kvfree_rcu_queue_batch(struct kfree_rcu_cpu *krcp) { - struct kfree_rcu_cpu *krcp = container_of(work, - struct kfree_rcu_cpu, monitor_work.work); unsigned long flags; + bool queued = false; int i, j;
- // Drain ready for reclaim. - kvfree_rcu_drain_ready(krcp); - raw_spin_lock_irqsave(&krcp->lock, flags);
// Attempt to start a new batch. @@ -3634,11 +3631,27 @@ static void kfree_rcu_monitor(struct work_struct *work) // be that the work is in the pending state when // channels have been detached following by each // other. - queue_rcu_work(system_wq, &krwp->rcu_work); + queued = queue_rcu_work(system_wq, &krwp->rcu_work); } }
raw_spin_unlock_irqrestore(&krcp->lock, flags); + return queued; +} + +/* + * This function is invoked after the KFREE_DRAIN_JIFFIES timeout. + */ +static void kfree_rcu_monitor(struct work_struct *work) +{ + struct kfree_rcu_cpu *krcp = container_of(work, + struct kfree_rcu_cpu, monitor_work.work); + + // Drain ready for reclaim. + kvfree_rcu_drain_ready(krcp); + + // Queue a batch for a rest. + kvfree_rcu_queue_batch(krcp);
// If there is nothing to detach, it means that our job is // successfully done here. In case of having at least one @@ -3859,6 +3872,86 @@ void kvfree_call_rcu(struct rcu_head *head, void *ptr) } EXPORT_SYMBOL_GPL(kvfree_call_rcu);
+/** + * kvfree_rcu_barrier - Wait until all in-flight kvfree_rcu() complete. + * + * Note that a single argument of kvfree_rcu() call has a slow path that + * triggers synchronize_rcu() following by freeing a pointer. It is done + * before the return from the function. Therefore for any single-argument + * call that will result in a kfree() to a cache that is to be destroyed + * during module exit, it is developer's responsibility to ensure that all + * such calls have returned before the call to kmem_cache_destroy(). + */ +void kvfree_rcu_barrier(void) +{ + struct kfree_rcu_cpu_work *krwp; + struct kfree_rcu_cpu *krcp; + bool queued; + int i, cpu; + + /* + * Firstly we detach objects and queue them over an RCU-batch + * for all CPUs. Finally queued works are flushed for each CPU. + * + * Please note. If there are outstanding batches for a particular + * CPU, those have to be finished first following by queuing a new. + */ + for_each_possible_cpu(cpu) { + krcp = per_cpu_ptr(&krc, cpu); + + /* + * Check if this CPU has any objects which have been queued for a + * new GP completion. If not(means nothing to detach), we are done + * with it. If any batch is pending/running for this "krcp", below + * per-cpu flush_rcu_work() waits its completion(see last step). + */ + if (!need_offload_krc(krcp)) + continue; + + while (1) { + /* + * If we are not able to queue a new RCU work it means: + * - batches for this CPU are still in flight which should + * be flushed first and then repeat; + * - no objects to detach, because of concurrency. + */ + queued = kvfree_rcu_queue_batch(krcp); + + /* + * Bail out, if there is no need to offload this "krcp" + * anymore. As noted earlier it can run concurrently. + */ + if (queued || !need_offload_krc(krcp)) + break; + + /* There are ongoing batches. */ + for (i = 0; i < KFREE_N_BATCHES; i++) { + krwp = &(krcp->krw_arr[i]); + flush_rcu_work(&krwp->rcu_work); + } + } + } + + /* + * Now we guarantee that all objects are flushed. + */ + for_each_possible_cpu(cpu) { + krcp = per_cpu_ptr(&krc, cpu); + + /* + * A monitor work can drain ready to reclaim objects + * directly. Wait its completion if running or pending. + */ + cancel_delayed_work_sync(&krcp->monitor_work); + + for (i = 0; i < KFREE_N_BATCHES; i++) { + krwp = &(krcp->krw_arr[i]); + flush_rcu_work(&krwp->rcu_work); + } + } +} +EXPORT_SYMBOL_GPL(kvfree_rcu_barrier); + static unsigned long kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc) {
base-commit: 17365d66f1c6aa6bf4f4cb9842f5edeac027bcfb
From: Florian Westphal fw@strlen.de
From: Florian Westphal fw@strlen.de
commit dc783ba4b9df3fb3e76e968b2cbeb9960069263c upstream.
Ben Greear reports following splat: ------------[ cut here ]------------ net/netfilter/nf_nat_core.c:1114 module nf_nat func:nf_nat_register_fn has 256 allocated at module unload WARNING: CPU: 1 PID: 10421 at lib/alloc_tag.c:168 alloc_tag_module_unload+0x22b/0x3f0 Modules linked in: nf_nat(-) btrfs ufs qnx4 hfsplus hfs minix vfat msdos fat ... Hardware name: Default string Default string/SKYBAY, BIOS 5.12 08/04/2020 RIP: 0010:alloc_tag_module_unload+0x22b/0x3f0 codetag_unload_module+0x19b/0x2a0 ? codetag_load_module+0x80/0x80
nf_nat module exit calls kfree_rcu on those addresses, but the free operation is likely still pending by the time alloc_tag checks for leaks.
Wait for outstanding kfree_rcu operations to complete before checking resolves this warning.
Reproducer: unshare -n iptables-nft -t nat -A PREROUTING -p tcp grep nf_nat /proc/allocinfo # will list 4 allocations rmmod nft_chain_nat rmmod nf_nat # will WARN.
Link: https://lkml.kernel.org/r/20241007205236.11847-1-fw@strlen.de Fixes: a473573964e5 ("lib: code tagging module support") Signed-off-by: Florian Westphal fw@strlen.de Reported-by: Ben Greear greearb@candelatech.com Closes: https://lore.kernel.org/netdev/bdaaef9d-4364-4171-b82b-bcfc12e207eb@candelat... Cc: Uladzislau Rezki urezki@gmail.com Cc: Vlastimil Babka vbabka@suse.cz Cc: Suren Baghdasaryan surenb@google.com Cc: Kent Overstreet kent.overstreet@linux.dev Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org --- lib/codetag.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/lib/codetag.c b/lib/codetag.c index afa8a2d4f317..d1fbbb7c2ec3 100644 --- a/lib/codetag.c +++ b/lib/codetag.c @@ -228,6 +228,9 @@ bool codetag_unload_module(struct module *mod) if (!mod) return true;
+ /* await any module's kfree_rcu() operations to complete */ + kvfree_rcu_barrier(); + mutex_lock(&codetag_lock); list_for_each_entry(cttype, &codetag_types, link) { struct codetag_module *found = NULL;
On Mon, Oct 21, 2024 at 10:10:03AM -0700, Suren Baghdasaryan wrote:
From: Florian Westphal fw@strlen.de
From: Florian Westphal fw@strlen.de
Any reason for the duplicated "From:" lines?
thanks,
greg k-h
On Mon, Oct 21, 2024 at 10:45 AM Greg KH gregkh@linuxfoundation.org wrote:
On Mon, Oct 21, 2024 at 10:10:03AM -0700, Suren Baghdasaryan wrote:
From: Florian Westphal fw@strlen.de
From: Florian Westphal fw@strlen.de
Any reason for the duplicated "From:" lines?
No, just me incorrectly formatting the patches. Please feel free to remove the duplicates. Thanks!
thanks,
greg k-h
On Mon, Oct 21, 2024 at 10:10:02AM -0700, Suren Baghdasaryan wrote:
From: "Uladzislau Rezki (Sony)" urezki@gmail.com
From: Uladzislau Rezki urezki@gmail.com
commit 3c5d61ae919cc377c71118ccc76fa6e8518023f8 upstream.
Add a kvfree_rcu_barrier() function. It waits until all in-flight pointers are freed over RCU machinery. It does not wait any GP completion and it is within its right to return immediately if there are no outstanding pointers.
This function is useful when there is a need to guarantee that a memory is fully freed before destroying memory caches. For example, during unloading a kernel module.
Signed-off-by: Uladzislau Rezki (Sony) urezki@gmail.com Signed-off-by: Vlastimil Babka vbabka@suse.cz
include/linux/rcutiny.h | 5 ++ include/linux/rcutree.h | 1 + kernel/rcu/tree.c | 109 +++++++++++++++++++++++++++++++++++++--- 3 files changed, 107 insertions(+), 8 deletions(-)
We need a signed-off-by line from you, as you did the backport here, please fix that up and resend this series.
thanks,
greg k-h
On Tue, Nov 5, 2024 at 11:09 PM Greg KH gregkh@linuxfoundation.org wrote:
On Mon, Oct 21, 2024 at 10:10:02AM -0700, Suren Baghdasaryan wrote:
From: "Uladzislau Rezki (Sony)" urezki@gmail.com
From: Uladzislau Rezki urezki@gmail.com
commit 3c5d61ae919cc377c71118ccc76fa6e8518023f8 upstream.
Add a kvfree_rcu_barrier() function. It waits until all in-flight pointers are freed over RCU machinery. It does not wait any GP completion and it is within its right to return immediately if there are no outstanding pointers.
This function is useful when there is a need to guarantee that a memory is fully freed before destroying memory caches. For example, during unloading a kernel module.
Signed-off-by: Uladzislau Rezki (Sony) urezki@gmail.com Signed-off-by: Vlastimil Babka vbabka@suse.cz
include/linux/rcutiny.h | 5 ++ include/linux/rcutree.h | 1 + kernel/rcu/tree.c | 109 +++++++++++++++++++++++++++++++++++++--- 3 files changed, 107 insertions(+), 8 deletions(-)
We need a signed-off-by line from you, as you did the backport here, please fix that up and resend this series.
Doh! Ok, I'll resend it tomorrow morning. Thanks!
thanks,
greg k-h
On Wed, Nov 6, 2024 at 12:20 AM Suren Baghdasaryan surenb@google.com wrote:
On Tue, Nov 5, 2024 at 11:09 PM Greg KH gregkh@linuxfoundation.org wrote:
On Mon, Oct 21, 2024 at 10:10:02AM -0700, Suren Baghdasaryan wrote:
From: "Uladzislau Rezki (Sony)" urezki@gmail.com
From: Uladzislau Rezki urezki@gmail.com
commit 3c5d61ae919cc377c71118ccc76fa6e8518023f8 upstream.
Add a kvfree_rcu_barrier() function. It waits until all in-flight pointers are freed over RCU machinery. It does not wait any GP completion and it is within its right to return immediately if there are no outstanding pointers.
This function is useful when there is a need to guarantee that a memory is fully freed before destroying memory caches. For example, during unloading a kernel module.
Signed-off-by: Uladzislau Rezki (Sony) urezki@gmail.com Signed-off-by: Vlastimil Babka vbabka@suse.cz
include/linux/rcutiny.h | 5 ++ include/linux/rcutree.h | 1 + kernel/rcu/tree.c | 109 +++++++++++++++++++++++++++++++++++++--- 3 files changed, 107 insertions(+), 8 deletions(-)
We need a signed-off-by line from you, as you did the backport here, please fix that up and resend this series.
Doh! Ok, I'll resend it tomorrow morning. Thanks!
Added SOB in the new series posted at: https://lore.kernel.org/all/20241106170927.130996-1-surenb@google.com/
thanks,
greg k-h
linux-stable-mirror@lists.linaro.org