This small patchset is about avoid RCU context warning when unpinning htab with internal structs (timer, workqueue, or task_work).
v2: - rename bpf_free_inode() to bpf_destroy_inode() (Andrii)
v1: https://lore.kernel.org/all/20251003084528.502518-1-kafai.wan@linux.dev/
--- KaFai Wan (2): bpf: Avoid RCU context warning when unpinning htab with internal structs selftests/bpf: Add test for unpinning htab with internal timer struct
kernel/bpf/inode.c | 4 +- .../selftests/bpf/prog_tests/pinning_htab.c | 37 +++++++++++++++++++ .../selftests/bpf/progs/test_pinning_htab.c | 25 +++++++++++++ 3 files changed, 64 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/pinning_htab.c create mode 100644 tools/testing/selftests/bpf/progs/test_pinning_htab.c
When unpinning a BPF hash table (htab or htab_lru) that contains internal structures (timer, workqueue, or task_work) in its values, a BUG warning is triggered: BUG: sleeping function called from invalid context at kernel/bpf/hashtab.c:244 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 14, name: ksoftirqd/0 ...
The issue arises from the interaction between BPF object unpinning and RCU callback mechanisms: 1. BPF object unpinning uses ->free_inode() which schedules cleanup via call_rcu(), deferring the actual freeing to an RCU callback that executes within the RCU_SOFTIRQ context. 2. During cleanup of hash tables containing internal structures, htab_map_free_internal_structs() is invoked, which includes cond_resched() or cond_resched_rcu() calls to yield the CPU during potentially long operations.
However, cond_resched() or cond_resched_rcu() cannot be safely called from atomic RCU softirq context, leading to the BUG warning when attempting to reschedule.
Fix this by changing from ->free_inode() to ->destroy_inode() and rename bpf_free_inode() to bpf_destroy_inode() for BPF objects (prog, map, link). This allows direct inode freeing without RCU callback scheduling, avoiding the invalid context warning.
Reported-by: Le Chen tom2cat@sjtu.edu.cn Closes: https://lore.kernel.org/all/1444123482.1827743.1750996347470.JavaMail.zimbra... Fixes: 68134668c17f ("bpf: Add map side support for bpf timers.") Suggested-by: Alexei Starovoitov ast@kernel.org Signed-off-by: KaFai Wan kafai.wan@linux.dev --- kernel/bpf/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index f90bdcc0a047..81780bcf8d25 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -775,7 +775,7 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root) return 0; }
-static void bpf_free_inode(struct inode *inode) +static void bpf_destroy_inode(struct inode *inode) { enum bpf_type type;
@@ -790,7 +790,7 @@ const struct super_operations bpf_super_ops = { .statfs = simple_statfs, .drop_inode = inode_just_drop, .show_options = bpf_show_options, - .free_inode = bpf_free_inode, + .destroy_inode = bpf_destroy_inode, };
enum {
On 10/6/25 6:22 PM, KaFai Wan wrote:
When unpinning a BPF hash table (htab or htab_lru) that contains internal structures (timer, workqueue, or task_work) in its values, a BUG warning is triggered: BUG: sleeping function called from invalid context at kernel/bpf/hashtab.c:244 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 14, name: ksoftirqd/0 ...
The issue arises from the interaction between BPF object unpinning and RCU callback mechanisms:
- BPF object unpinning uses ->free_inode() which schedules cleanup via call_rcu(), deferring the actual freeing to an RCU callback that executes within the RCU_SOFTIRQ context.
- During cleanup of hash tables containing internal structures, htab_map_free_internal_structs() is invoked, which includes cond_resched() or cond_resched_rcu() calls to yield the CPU during potentially long operations.
However, cond_resched() or cond_resched_rcu() cannot be safely called from atomic RCU softirq context, leading to the BUG warning when attempting to reschedule.
Fix this by changing from ->free_inode() to ->destroy_inode() and rename bpf_free_inode() to bpf_destroy_inode() for BPF objects (prog, map, link). This allows direct inode freeing without RCU callback scheduling, avoiding the invalid context warning.
Reported-by: Le Chen tom2cat@sjtu.edu.cn Closes: https://lore.kernel.org/all/1444123482.1827743.1750996347470.JavaMail.zimbra... Fixes: 68134668c17f ("bpf: Add map side support for bpf timers.") Suggested-by: Alexei Starovoitov ast@kernel.org Signed-off-by: KaFai Wan kafai.wan@linux.dev
Acked-by: Yonghong Song yonghong.song@linux.dev
Add test to verify that unpinning hash tables containing internal timer structures does not trigger context warnings.
Each subtest (timer_prealloc and timer_no_prealloc) can trigger the context warning when unpinning, but the warning cannot be triggered twice within a short time interval (a HZ), which is expected behavior.
Signed-off-by: KaFai Wan kafai.wan@linux.dev --- .../selftests/bpf/prog_tests/pinning_htab.c | 37 +++++++++++++++++++ .../selftests/bpf/progs/test_pinning_htab.c | 25 +++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/pinning_htab.c create mode 100644 tools/testing/selftests/bpf/progs/test_pinning_htab.c
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning_htab.c b/tools/testing/selftests/bpf/prog_tests/pinning_htab.c new file mode 100644 index 000000000000..fc804bb87b26 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/pinning_htab.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "test_pinning_htab.skel.h" + +static void unpin_map(const char *map_name, const char *pin_path) +{ + struct test_pinning_htab *skel; + struct bpf_map *map; + int err; + + skel = test_pinning_htab__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel open_and_load")) + return; + + map = bpf_object__find_map_by_name(skel->obj, map_name); + if (!ASSERT_OK_PTR(map, "bpf_object__find_map_by_name")) + goto out; + + err = bpf_map__pin(map, pin_path); + if (!ASSERT_OK(err, "bpf_map__pin")) + goto out; + + err = bpf_map__unpin(map, pin_path); + if (!ASSERT_OK(err, "bpf_map__unpin")) + goto out; +out: + test_pinning_htab__destroy(skel); +} + +void test_pinning_htab(void) +{ + if (test__start_subtest("timer_prealloc")) + unpin_map("timer_prealloc", "/sys/fs/bpf/timer_prealloc"); + if (test__start_subtest("timer_no_prealloc")) + unpin_map("timer_no_prealloc", "/sys/fs/bpf/timer_no_prealloc"); +} diff --git a/tools/testing/selftests/bpf/progs/test_pinning_htab.c b/tools/testing/selftests/bpf/progs/test_pinning_htab.c new file mode 100644 index 000000000000..ae227930c73c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_pinning_htab.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct timer_val { + struct bpf_timer timer; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u32); + __type(value, struct timer_val); + __uint(max_entries, 1); +} timer_prealloc SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u32); + __type(value, struct timer_val); + __uint(max_entries, 1); + __uint(map_flags, BPF_F_NO_PREALLOC); +} timer_no_prealloc SEC(".maps");
On 10/6/25 6:22 PM, KaFai Wan wrote:
Add test to verify that unpinning hash tables containing internal timer structures does not trigger context warnings.
Each subtest (timer_prealloc and timer_no_prealloc) can trigger the context warning when unpinning, but the warning cannot be triggered twice within a short time interval (a HZ), which is expected behavior.
Signed-off-by: KaFai Wan kafai.wan@linux.dev
Ack with a nit below.
Acked-by: Yonghong Song yonghong.song@linux.dev
.../selftests/bpf/prog_tests/pinning_htab.c | 37 +++++++++++++++++++ .../selftests/bpf/progs/test_pinning_htab.c | 25 +++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/pinning_htab.c create mode 100644 tools/testing/selftests/bpf/progs/test_pinning_htab.c
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning_htab.c b/tools/testing/selftests/bpf/prog_tests/pinning_htab.c new file mode 100644 index 000000000000..fc804bb87b26 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/pinning_htab.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h> +#include "test_pinning_htab.skel.h"
+static void unpin_map(const char *map_name, const char *pin_path) +{
- struct test_pinning_htab *skel;
- struct bpf_map *map;
- int err;
- skel = test_pinning_htab__open_and_load();
- if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
return;
- map = bpf_object__find_map_by_name(skel->obj, map_name);
- if (!ASSERT_OK_PTR(map, "bpf_object__find_map_by_name"))
goto out;
- err = bpf_map__pin(map, pin_path);
- if (!ASSERT_OK(err, "bpf_map__pin"))
goto out;
- err = bpf_map__unpin(map, pin_path);
- if (!ASSERT_OK(err, "bpf_map__unpin"))
goto out;
No need to have 'goto out'. Just do ASSERT_OK(err, "bpf_map__unpin"
+out:
- test_pinning_htab__destroy(skel);
+}
+void test_pinning_htab(void) +{
- if (test__start_subtest("timer_prealloc"))
unpin_map("timer_prealloc", "/sys/fs/bpf/timer_prealloc");
- if (test__start_subtest("timer_no_prealloc"))
unpin_map("timer_no_prealloc", "/sys/fs/bpf/timer_no_prealloc");
+} diff --git a/tools/testing/selftests/bpf/progs/test_pinning_htab.c b/tools/testing/selftests/bpf/progs/test_pinning_htab.c new file mode 100644 index 000000000000..ae227930c73c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_pinning_htab.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h" +#include <bpf/bpf_helpers.h>
+char _license[] SEC("license") = "GPL";
+struct timer_val {
- struct bpf_timer timer;
+};
+struct {
- __uint(type, BPF_MAP_TYPE_HASH);
- __type(key, __u32);
- __type(value, struct timer_val);
- __uint(max_entries, 1);
+} timer_prealloc SEC(".maps");
+struct {
- __uint(type, BPF_MAP_TYPE_HASH);
- __type(key, __u32);
- __type(value, struct timer_val);
- __uint(max_entries, 1);
- __uint(map_flags, BPF_F_NO_PREALLOC);
+} timer_no_prealloc SEC(".maps");
linux-kselftest-mirror@lists.linaro.org