still mostly a WIP, but it seems to be working for the couple of tests.
Signed-off-by: Benjamin Tissoires bentiss@kernel.org
---
This is an RFC, and is not meant to be fully reviewed/applied as it is. I'm posting this to show what I wanted to explain in https://lore.kernel.org/bpf/mhkzkf4e23uvljtmwizwcxyuyat2tmfxn33xb4t7waafgmsa... --- kernel/bpf/verifier.c | 223 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 206 insertions(+), 17 deletions(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2b1e24c440c5..cc4dab81b306 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -336,6 +336,16 @@ struct bpf_kfunc_call_arg_meta { struct bpf_map *ptr; int uid; } map; + struct { + bool enabled; + bool sleepable; + u32 nr_args; + struct { + // FIXME: should be enum kfunc_ptr_arg_type type; + int type; + u32 btf_id; + } args[5]; + } async_cb; u64 mem_size; };
@@ -9538,7 +9548,8 @@ static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins */ env->subprog_info[subprog].is_cb = true; if (bpf_pseudo_kfunc_call(insn) && - !is_callback_calling_kfunc(insn->imm)) { + !is_callback_calling_kfunc(insn->imm) && + !(kfunc_meta && kfunc_meta->async_cb.enabled)) { verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n", func_id_name(insn->imm), insn->imm); return -EFAULT; @@ -9549,14 +9560,15 @@ static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins return -EFAULT; }
- if (is_async_callback_calling_insn(insn)) { + if (is_async_callback_calling_insn(insn) || (kfunc_meta && kfunc_meta->async_cb.enabled)) { struct bpf_verifier_state *async_cb;
/* there is no real recursion here. timer and workqueue callbacks are async */ env->subprog_info[subprog].is_async_cb = true; async_cb = push_async_cb(env, env->subprog_info[subprog].start, insn_idx, subprog, - is_bpf_wq_set_callback_impl_kfunc(insn->imm)); + (is_bpf_wq_set_callback_impl_kfunc(insn->imm) || + (kfunc_meta && kfunc_meta->async_cb.sleepable))); if (!async_cb) return -EFAULT; callee = async_cb->frame[0]; @@ -10890,6 +10902,16 @@ static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param return btf_param_match_suffix(btf, arg, "__str"); }
+static bool is_kfunc_arg_async_cb(const struct btf *btf, const struct btf_param *arg) +{ + return btf_param_match_suffix(btf, arg, "__async"); +} + +static bool is_kfunc_arg_sleepable_async_cb(const struct btf *btf, const struct btf_param *arg) +{ + return btf_param_match_suffix(btf, arg, "__s_async"); +} + static bool is_kfunc_arg_scalar_with_name(const struct btf *btf, const struct btf_param *arg, const char *name) @@ -11045,6 +11067,48 @@ enum kfunc_ptr_arg_type { KF_ARG_PTR_TO_WORKQUEUE, };
+static const char *__kfunc_ptr_arg_type_str(enum kfunc_ptr_arg_type value) +{ + switch (value) { + case KF_ARG_PTR_TO_CTX: + return "KF_ARG_PTR_TO_CTX"; + case KF_ARG_PTR_TO_ALLOC_BTF_ID: + return "KF_ARG_PTR_TO_ALLOC_BTF_ID"; + case KF_ARG_PTR_TO_REFCOUNTED_KPTR: + return "KF_ARG_PTR_TO_REFCOUNTED_KPTR"; + case KF_ARG_PTR_TO_DYNPTR: + return "KF_ARG_PTR_TO_DYNPTR"; + case KF_ARG_PTR_TO_ITER: + return "KF_ARG_PTR_TO_ITER"; + case KF_ARG_PTR_TO_LIST_HEAD: + return "KF_ARG_PTR_TO_LIST_HEAD"; + case KF_ARG_PTR_TO_LIST_NODE: + return "KF_ARG_PTR_TO_LIST_NODE"; + case KF_ARG_PTR_TO_BTF_ID: + return "KF_ARG_PTR_TO_BTF_ID"; + case KF_ARG_PTR_TO_MEM: + return "KF_ARG_PTR_TO_MEM"; + case KF_ARG_PTR_TO_MEM_SIZE: + return "KF_ARG_PTR_TO_MEM_SIZE"; + case KF_ARG_PTR_TO_CALLBACK: + return "KF_ARG_PTR_TO_CALLBACK"; + case KF_ARG_PTR_TO_RB_ROOT: + return "KF_ARG_PTR_TO_RB_ROOT"; + case KF_ARG_PTR_TO_RB_NODE: + return "KF_ARG_PTR_TO_RB_NODE"; + case KF_ARG_PTR_TO_NULL: + return "KF_ARG_PTR_TO_NULL"; + case KF_ARG_PTR_TO_CONST_STR: + return "KF_ARG_PTR_TO_CONST_STR"; + case KF_ARG_PTR_TO_MAP: + return "KF_ARG_PTR_TO_MAP"; + case KF_ARG_PTR_TO_WORKQUEUE: + return "KF_ARG_PTR_TO_WORKQUEUE"; + } + + return "UNKNOWN"; +} + enum special_kfunc_type { KF_bpf_obj_new_impl, KF_bpf_obj_drop_impl, @@ -12151,6 +12215,39 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ return -EINVAL; } meta->subprogno = reg->subprogno; + meta->async_cb.sleepable = is_kfunc_arg_sleepable_async_cb(meta->btf, &args[i]); + meta->async_cb.enabled = meta->async_cb.sleepable || + is_kfunc_arg_async_cb(meta->btf, &args[i]); + if (meta->async_cb.enabled) { + const struct btf_type *cb_proto; + const struct btf_param *cb_args; + u32 cb_type = args[i].type; + int i; + + cb_proto = btf_type_resolve_func_ptr(btf, cb_type, NULL); + if (cb_proto) { + meta->async_cb.nr_args = btf_type_vlen(cb_proto); + cb_args = btf_params(cb_proto); + for (i = 0; i < meta->async_cb.nr_args; i++) { + const struct btf_type *t, *ref_t; + const char *ref_tname; + u32 ref_id, t_id; + + t = btf_type_skip_modifiers(btf, cb_args[i].type, &t_id); + ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id); + ref_tname = btf_name_by_offset(btf, ref_t->name_off); + meta->async_cb.args[i].type = get_kfunc_ptr_arg_type(env, meta, + t, ref_t, ref_tname, cb_args, i, meta->async_cb.nr_args); + + /* FIXME: we should not get an error from get_kfunc_ptr_arg_type() */ + if (meta->async_cb.args[i].type < 0) + meta->async_cb.args[i].type = KF_ARG_PTR_TO_BTF_ID; + meta->async_cb.args[i].btf_id = ref_id; + } + } else { + meta->async_cb.nr_args = 0; + } + } break; case KF_ARG_PTR_TO_REFCOUNTED_KPTR: if (!type_is_ptr_alloc_obj(reg->type)) { @@ -12248,6 +12345,71 @@ static int fetch_kfunc_meta(struct bpf_verifier_env *env,
static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name);
+static int set_generic_callback_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, + int insn_idx, + struct bpf_kfunc_call_arg_meta *meta) +{ + int i; + + for (i = 0; i < 5; i++) { + if (i < meta->async_cb.nr_args) { + u32 type = meta->async_cb.args[i].type; + + switch (type) { + case KF_ARG_PTR_TO_CTX: + case KF_ARG_PTR_TO_ALLOC_BTF_ID: + case KF_ARG_PTR_TO_REFCOUNTED_KPTR: + case KF_ARG_PTR_TO_DYNPTR: + case KF_ARG_PTR_TO_ITER: + case KF_ARG_PTR_TO_LIST_HEAD: + case KF_ARG_PTR_TO_LIST_NODE: + case KF_ARG_PTR_TO_CALLBACK: + case KF_ARG_PTR_TO_RB_ROOT: + case KF_ARG_PTR_TO_RB_NODE: + case KF_ARG_PTR_TO_NULL: + case KF_ARG_PTR_TO_CONST_STR: + verbose(env, "argument #%d of type %s is not supported in async callbacks\n", + i, __kfunc_ptr_arg_type_str(meta->async_cb.args[i].type)); + return -EINVAL; + case KF_ARG_PTR_TO_MEM: + case KF_ARG_PTR_TO_MEM_SIZE: + callee->regs[BPF_REG_1 + i].type = PTR_TO_MEM; + __mark_reg_known_zero(&callee->regs[BPF_REG_1 + i]); + callee->regs[BPF_REG_1 + i].mem_size = 8; // FIXME: should store the size while walking the arguments + break; + case KF_ARG_PTR_TO_MAP: + callee->regs[BPF_REG_1 + i].type = CONST_PTR_TO_MAP; + __mark_reg_known_zero(&callee->regs[BPF_REG_1 + i]); + callee->regs[BPF_REG_1 + i].map_ptr = meta->map.ptr; + break; + case KF_ARG_PTR_TO_WORKQUEUE: + callee->regs[BPF_REG_1 + i].type = PTR_TO_MAP_VALUE; + __mark_reg_known_zero(&callee->regs[BPF_REG_1 + i]); + callee->regs[BPF_REG_1 + i].map_ptr = meta->map.ptr; + break; + case KF_ARG_PTR_TO_BTF_ID: + callee->regs[BPF_REG_1 + i].type = PTR_TO_BTF_ID; + __mark_reg_known_zero(&callee->regs[BPF_REG_1 + i]); + callee->regs[BPF_REG_1 + i].btf = meta->btf; + callee->regs[BPF_REG_1 + i].btf_id = meta->async_cb.args[i].btf_id; + break; + default: + verbose(env, "verifier bug: unexpected arg#%d type (%d) in async callback\n", + i, type); + return -EFAULT; + } + } else { + __mark_reg_not_init(env, &callee->regs[BPF_REG_1 + i]); + } + } + callee->in_callback_fn = true; + // callee->callback_ret_range = retval_range(-MAX_ERRNO, ); + return 0; +} + + static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx_p) { @@ -12313,6 +12475,16 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } }
+ if (meta.async_cb.enabled) { + err = push_callback_call(env, insn, insn_idx, meta.subprogno, + set_generic_callback_state, &meta); + if (err) { + verbose(env, "kfunc %s#%d failed callback verification\n", + func_name, meta.func_id); + return err; + } + } + rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta); rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
@@ -15918,22 +16090,39 @@ static int visit_insn(int t, struct bpf_verifier_env *env) } if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { struct bpf_kfunc_call_arg_meta meta; + const struct btf_param *args; + u32 i, nargs;
ret = fetch_kfunc_meta(env, insn, &meta, NULL); - if (ret == 0 && is_iter_next_kfunc(&meta)) { - mark_prune_point(env, t); - /* Checking and saving state checkpoints at iter_next() call - * is crucial for fast convergence of open-coded iterator loop - * logic, so we need to force it. If we don't do that, - * is_state_visited() might skip saving a checkpoint, causing - * unnecessarily long sequence of not checkpointed - * instructions and jumps, leading to exhaustion of jump - * history buffer, and potentially other undesired outcomes. - * It is expected that with correct open-coded iterators - * convergence will happen quickly, so we don't run a risk of - * exhausting memory. - */ - mark_force_checkpoint(env, t); + if (ret == 0) { + args = (const struct btf_param *)(meta.func_proto + 1); + nargs = btf_type_vlen(meta.func_proto); + + for (i = 0; i < nargs; i++) { + if (is_kfunc_arg_sleepable_async_cb(meta.btf, &args[i]) || + is_kfunc_arg_async_cb(meta.btf, &args[i])) + /* Mark this call insn as a prune point to trigger + * is_state_visited() check before call itself is + * processed by __check_func_call(). Otherwise new + * async state will be pushed for further exploration. + */ + mark_prune_point(env, t); + } + if (is_iter_next_kfunc(&meta)) { + mark_prune_point(env, t); + /* Checking and saving state checkpoints at iter_next() call + * is crucial for fast convergence of open-coded iterator loop + * logic, so we need to force it. If we don't do that, + * is_state_visited() might skip saving a checkpoint, causing + * unnecessarily long sequence of not checkpointed + * instructions and jumps, leading to exhaustion of jump + * history buffer, and potentially other undesired outcomes. + * It is expected that with correct open-coded iterators + * convergence will happen quickly, so we don't run a risk of + * exhausting memory. + */ + mark_force_checkpoint(env, t); + } } } return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);