[PATCH AUTOSEL 6.14 061/108] bpf: Add bpf_rbtree_{root,left,right} kfunc

4 Jun 2025

From: Martin KaFai Lau martin.lau@kernel.org
[ Upstream commit 9e3e66c553f705de51707c7ddc7f35ce159a8ef1 ]
In a bpf fq implementation that is much closer to the kernel fq,
it will need to traverse the rbtree:
https://lore.kernel.org/bpf/20250418224652.105998-13-martin.lau@linux.dev/
The much simplified logic that uses the bpf_rbtree_{root,left,right}
to traverse the rbtree is like:
struct fq_flow {
    struct bpf_rb_node	fq_node;
    struct bpf_rb_node	rate_node;
    struct bpf_refcount	refcount;
    unsigned long		sk_long;
};
struct fq_flow_root {
    struct bpf_spin_lock lock;
    struct bpf_rb_root root __contains(fq_flow, fq_node);
};
struct fq_flow *fq_classify(...)
{
    struct bpf_rb_node *tofree[FQ_GC_MAX];
    struct fq_flow_root *root;
    struct fq_flow *gc_f, *f;
    struct bpf_rb_node *p;
    int i, fcnt = 0;
/* ... */
f = NULL;
    bpf_spin_lock(&root->lock);
    p = bpf_rbtree_root(&root->root);
    while (can_loop) {
    	if (!p)
    		break;
gc_f = bpf_rb_entry(p, struct fq_flow, fq_node);
    	if (gc_f->sk_long == sk_long) {
    		f = bpf_refcount_acquire(gc_f);
    		break;
    	}
/* To be removed from the rbtree */
    	if (fcnt < FQ_GC_MAX && fq_gc_candidate(gc_f, jiffies_now))
    		tofree[fcnt++] = p;
if (gc_f->sk_long > sk_long)
    		p = bpf_rbtree_left(&root->root, p);
    	else
    		p = bpf_rbtree_right(&root->root, p);
    }
/* remove from the rbtree */
    for (i = 0; i < fcnt; i++) {
    	p = tofree[i];
    	tofree[i] = bpf_rbtree_remove(&root->root, p);
    }
bpf_spin_unlock(&root->lock);
/* bpf_obj_drop the fq_flow(s) that have just been removed
     * from the rbtree.
     */
    for (i = 0; i < fcnt; i++) {
    	p = tofree[i];
    	if (p) {
    		gc_f = bpf_rb_entry(p, struct fq_flow, fq_node);
    		bpf_obj_drop(gc_f);
    	}
    }
return f;
}
The above simplified code needs to traverse the rbtree for two purposes,
1) find the flow with the desired sk_long value
2) while searching for the sk_long, collect flows that are
   the fq_gc_candidate. They will be removed from the rbtree.
This patch adds the bpf_rbtree_{root,left,right} kfunc to enable
the rbtree traversal. The returned bpf_rb_node pointer will be a
non-owning reference which is the same as the returned pointer
of the exisiting bpf_rbtree_first kfunc.
Acked-by: Kumar Kartikeya Dwivedi memxor@gmail.com
Signed-off-by: Martin KaFai Lau martin.lau@kernel.org
Link: https://lore.kernel.org/r/20250506015857.817950-4-martin.lau@linux.dev
Signed-off-by: Alexei Starovoitov ast@kernel.org
Signed-off-by: Sasha Levin sashal@kernel.org
---
NO This commit should not be backported to stable kernel trees. Here's
my extensive analysis: ## Primary Reason: New Feature Addition This
commit adds three new kfunc functions (`bpf_rbtree_root`,
`bpf_rbtree_left`, `bpf_rbtree_right`) to the BPF rbtree API. These are
entirely new capabilities that enable rbtree traversal functionality
that did not exist before. ## Specific Code Analysis ### 1. New Function
Implementations ```c __bpf_kfunc struct bpf_rb_node
*bpf_rbtree_root(struct bpf_rb_root *root) { struct rb_root_cached *r =
(struct rb_root_cached *)root; return (struct bpf_rb_node
*)r->rb_root.rb_node; } __bpf_kfunc struct bpf_rb_node
*bpf_rbtree_left(struct bpf_rb_root *root, struct bpf_rb_node *node) {
struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern
*)node; if (READ_ONCE(node_internal->owner) != root) return NULL; return
(struct bpf_rb_node *)node_internal->rb_node.rb_left; } __bpf_kfunc
struct bpf_rb_node *bpf_rbtree_right(struct bpf_rb_root *root, struct
bpf_rb_node *node) { struct bpf_rb_node_kern *node_internal = (struct
bpf_rb_node_kern *)node; if (READ_ONCE(node_internal->owner) != root)
return NULL; return (struct bpf_rb_node
*)node_internal->rb_node.rb_right; } ``` These are completely new
functions that extend the BPF API surface, which is characteristic of
feature additions rather than bug fixes. ### 2. Verifier Infrastructure
Expansion The commit adds these new functions to multiple verifier
tables: ```c enum special_kfunc_type { // ... existing entries ...
KF_bpf_rbtree_root, KF_bpf_rbtree_left, KF_bpf_rbtree_right, // ... }
BTF_SET_START(special_kfunc_set) // ... existing entries ...
BTF_ID(func, bpf_rbtree_root) BTF_ID(func, bpf_rbtree_left) BTF_ID(func,
bpf_rbtree_right) BTF_SET_END(special_kfunc_set) ``` This systematic
addition to verifier infrastructure demonstrates this is an API
expansion, not a fix. ### 3. Enhanced Function Classification Logic ```c
static bool is_bpf_rbtree_api_kfunc(u32 btf_id) { return btf_id ==
special_kfunc_list[KF_bpf_rbtree_add_impl] || btf_id ==
special_kfunc_list[KF_bpf_rbtree_remove] || btf_id ==
special_kfunc_list[KF_bpf_rbtree_first] || + btf_id ==
special_kfunc_list[KF_bpf_rbtree_root] || + btf_id ==
special_kfunc_list[KF_bpf_rbtree_left] || + btf_id ==
special_kfunc_list[KF_bpf_rbtree_right]; } ``` The functions are being
added to existing classification systems, expanding the API scope. ###
4. New Argument Validation Logic ```c static bool
check_kfunc_is_graph_node_api(struct bpf_verifier_env *env, enum
btf_field_type node_field_type, u32 kfunc_btf_id) { // ... existing
logic ... case BPF_RB_NODE: ret = (kfunc_btf_id ==
special_kfunc_list[KF_bpf_rbtree_remove] || kfunc_btf_id ==
special_kfunc_list[KF_bpf_rbtree_add_impl] || + kfunc_btf_id ==
special_kfunc_list[KF_bpf_rbtree_left] || + kfunc_btf_id ==
special_kfunc_list[KF_bpf_rbtree_right]); break; } ``` This adds new
argument validation paths for the new functions. ## Comparison with
Similar Commits Looking at the historical examples: - **Similar Commit
#1 (YES)**: Added basic rbtree kfuncs - this was part of the
foundational rbtree infrastructure - **Similar Commit #2 (YES)**: Added
argument support for rbtree types - essential for the basic
functionality - **Similar Commit #3 (NO)**: Added function declarations
to test headers - clearly test infrastructure - **Similar Commit #4
(NO)**: Added special verifier handling - complex new feature logic -
**Similar Commit #5 (YES)**: Added basic BTF support for rbtree types -
foundational infrastructure ## Use Case Analysis The commit message
describes a complex use case for implementing a Fair Queuing (FQ)
algorithm that requires traversal capabilities. This is clearly an
advanced feature for specialized networking applications, not a bug fix
for existing functionality. ## Risk Assessment Adding new kfuncs carries
several risks: 1. **API Stability**: New functions become part of the
stable ABI 2. **Complexity**: Introduces new code paths in verifier
logic 3. **Testing**: New functionality may not have complete test
coverage in stable kernels 4. **Dependencies**: May rely on other recent
changes not present in stable trees ## Conclusion This commit represents
a clear feature addition that extends the BPF rbtree API with new
traversal capabilities. It does not fix any existing bugs or address
critical issues. The functionality is designed for advanced use cases
and represents an expansion of the BPF programming model rather than
maintenance of existing capabilities. Following stable tree guidelines,
this should remain in mainline development kernels and not be backported
to stable releases.
kernel/bpf/helpers.c  | 30 ++++++++++++++++++++++++++++++
 kernel/bpf/verifier.c | 22 ++++++++++++++++++----
 2 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 5e5e945a86b9b..ca3d866e9a2de 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2354,6 +2354,33 @@ __bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root)
    return (struct bpf_rb_node *)rb_first_cached(r);
 }
+__bpf_kfunc struct bpf_rb_node *bpf_rbtree_root(struct bpf_rb_root *root)
+{
+	struct rb_root_cached *r = (struct rb_root_cached *)root;
+
+	return (struct bpf_rb_node *)r->rb_root.rb_node;
+}
+
+__bpf_kfunc struct bpf_rb_node *bpf_rbtree_left(struct bpf_rb_root *root, struct bpf_rb_node *node)
+{
+	struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;
+
+	if (READ_ONCE(node_internal->owner) != root)
+		return NULL;
+
+	return (struct bpf_rb_node *)node_internal->rb_node.rb_left;
+}
+
+__bpf_kfunc struct bpf_rb_node *bpf_rbtree_right(struct bpf_rb_root *root, struct bpf_rb_node *node)
+{
+	struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;
+
+	if (READ_ONCE(node_internal->owner) != root)
+		return NULL;
+
+	return (struct bpf_rb_node *)node_internal->rb_node.rb_right;
+}
+
 /**
  * bpf_task_acquire - Acquire a reference to a task. A task acquired by this
  * kfunc which is not stored in a map as a kptr, must be released by calling
@@ -3103,6 +3130,9 @@ BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
 BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_rbtree_add_impl)
 BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_rbtree_root, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_rbtree_left, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_rbtree_right, KF_RET_NULL)
#ifdef CONFIG_CGROUPS
 BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1841467c4f2e5..5c24f36ce36b2 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -11778,6 +11778,9 @@ enum special_kfunc_type {
    KF_bpf_rbtree_remove,
    KF_bpf_rbtree_add_impl,
    KF_bpf_rbtree_first,
+	KF_bpf_rbtree_root,
+	KF_bpf_rbtree_left,
+	KF_bpf_rbtree_right,
    KF_bpf_dynptr_from_skb,
    KF_bpf_dynptr_from_xdp,
    KF_bpf_dynptr_slice,
@@ -11812,6 +11815,9 @@ BTF_ID(func, bpf_rdonly_cast)
 BTF_ID(func, bpf_rbtree_remove)
 BTF_ID(func, bpf_rbtree_add_impl)
 BTF_ID(func, bpf_rbtree_first)
+BTF_ID(func, bpf_rbtree_root)
+BTF_ID(func, bpf_rbtree_left)
+BTF_ID(func, bpf_rbtree_right)
 #ifdef CONFIG_NET
 BTF_ID(func, bpf_dynptr_from_skb)
 BTF_ID(func, bpf_dynptr_from_xdp)
@@ -11843,6 +11849,9 @@ BTF_ID(func, bpf_rcu_read_unlock)
 BTF_ID(func, bpf_rbtree_remove)
 BTF_ID(func, bpf_rbtree_add_impl)
 BTF_ID(func, bpf_rbtree_first)
+BTF_ID(func, bpf_rbtree_root)
+BTF_ID(func, bpf_rbtree_left)
+BTF_ID(func, bpf_rbtree_right)
 #ifdef CONFIG_NET
 BTF_ID(func, bpf_dynptr_from_skb)
 BTF_ID(func, bpf_dynptr_from_xdp)
@@ -12258,7 +12267,10 @@ static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
 {
    return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
           btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
-	       btf_id == special_kfunc_list[KF_bpf_rbtree_first];
+	       btf_id == special_kfunc_list[KF_bpf_rbtree_first] ||
+	       btf_id == special_kfunc_list[KF_bpf_rbtree_root] ||
+	       btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
+	       btf_id == special_kfunc_list[KF_bpf_rbtree_right];
 }
static bool is_bpf_iter_num_api_kfunc(u32 btf_id)
@@ -12349,7 +12361,9 @@ static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
    	break;
    case BPF_RB_NODE:
    	ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
-		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]);
+		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
+		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
+		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_right]);
    	break;
    default:
    	verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
@@ -12864,11 +12878,11 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
    	case KF_ARG_PTR_TO_RB_NODE:
    		if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_remove]) {
    			if (!type_is_non_owning_ref(reg->type) || reg->ref_obj_id) {
-					verbose(env, "rbtree_remove node input must be non-owning ref\n");
+					verbose(env, "%s node input must be non-owning ref\n", func_name);
    				return -EINVAL;
    			}
    			if (in_rbtree_lock_required_cb(env)) {
-					verbose(env, "rbtree_remove not allowed in rbtree cb\n");
+					verbose(env, "%s not allowed in rbtree cb\n", func_name);
    				return -EINVAL;
    			}
    		} else {
-- 
2.39.5



    

2025

2024

2023

2022

2021

2020

2019

2018

2017

[PATCH AUTOSEL 6.14 061/108] bpf: Add bpf_rbtree_{root,left,right} kfunc