On Mon, Jan 8, 2024 at 12:53 PM Maxim Mikityanskiy maxtram95@gmail.com wrote:
From: Eduard Zingerman eddyz87@gmail.com
Changes for scalar ID tracking of spilled unbound scalars lead to certain verification performance regression. This commit mitigates the regression by exploiting the following properties maintained by check_stack_read_fixed_off():
- a mix of STACK_MISC, STACK_ZERO and STACK_INVALID marks is read as unbounded scalar register;
- spi with all slots marked STACK_ZERO is read as scalar register with value zero.
This commit modifies stacksafe() to consider situations above equivalent.
Veristat results after this patch show significant gains:
$ ./veristat -e file,prog,states -f '!states_pct<10' -f '!states_b<10' -C not-opt after File Program States (A) States (B) States (DIFF)
pyperf180.bpf.o on_event 10456 8422 -2034 (-19.45%) pyperf600.bpf.o on_event 37319 22519 -14800 (-39.66%) strobemeta.bpf.o on_event 13435 4703 -8732 (-64.99%)
Signed-off-by: Eduard Zingerman eddyz87@gmail.com
kernel/bpf/verifier.c | 83 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index aeb3e198a5ea..cb82f8d4226f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1170,6 +1170,12 @@ static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype) *stype = STACK_MISC; }
+static bool is_spilled_scalar_reg64(const struct bpf_stack_state *stack) +{
return stack->slot_type[0] == STACK_SPILL &&
stack->spilled_ptr.type == SCALAR_VALUE;
+}
static void scrub_spilled_slot(u8 *stype) { if (*stype != STACK_INVALID) @@ -16459,11 +16465,45 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, } }
+static bool is_stack_zero64(struct bpf_stack_state *stack) +{
u32 i;
for (i = 0; i < ARRAY_SIZE(stack->slot_type); ++i)
if (stack->slot_type[i] != STACK_ZERO)
return false;
return true;
+}
+static bool is_stack_unbound_slot64(struct bpf_verifier_env *env,
struct bpf_stack_state *stack)
+{
u32 i;
for (i = 0; i < ARRAY_SIZE(stack->slot_type); ++i)
if (stack->slot_type[i] != STACK_ZERO &&
stack->slot_type[i] != STACK_MISC &&
(!env->allow_uninit_stack || stack->slot_type[i] != STACK_INVALID))
return false;
return true;
+}
+static bool is_spilled_unbound_scalar_reg64(struct bpf_stack_state *stack) +{
return is_spilled_scalar_reg64(stack) && __is_scalar_unbounded(&stack->spilled_ptr);
+}
static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, struct bpf_func_state *cur, struct bpf_idmap *idmap, bool exact) {
struct bpf_reg_state unbound_reg = {};
struct bpf_reg_state zero_reg = {}; int i, spi;
__mark_reg_unknown(env, &unbound_reg);
__mark_reg_const_zero(env, &zero_reg);
zero_reg.precise = true;
these are immutable, right? Would it make sense to set them up just once as static variables instead of initializing on each check?
/* walk slots of the explored stack and ignore any additional * slots in the current stack, since explored(safe) state * didn't use them
@@ -16484,6 +16524,49 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, continue; }
we didn't check that cur->stack[spi] is ok to access yet, it's done a bit later with `if (i >= cur->allocated_stack)`, if I'm not mistaken. So these checks would need to be moved a bit lower, probably.
/* load of stack value with all MISC and ZERO slots produces unbounded
* scalar value, call regsafe to ensure scalar ids are compared.
*/
if (is_spilled_unbound_scalar_reg64(&old->stack[spi]) &&
is_stack_unbound_slot64(env, &cur->stack[spi])) {
i += BPF_REG_SIZE - 1;
if (!regsafe(env, &old->stack[spi].spilled_ptr, &unbound_reg,
idmap, exact))
return false;
continue;
}
if (is_stack_unbound_slot64(env, &old->stack[spi]) &&
is_spilled_unbound_scalar_reg64(&cur->stack[spi])) {
i += BPF_REG_SIZE - 1;
if (!regsafe(env, &unbound_reg, &cur->stack[spi].spilled_ptr,
idmap, exact))
return false;
continue;
}
scalar_old = scalar_cur = NULL; if (is_spilled_unbound64(&old->..)) scalar_old = old->stack[spi].slot_type[0] == STACK_SPILL ? &old->stack[spi].spilled_ptr : &unbound_reg; if (is_spilled_unbound64(&cur->..)) scalar_cur = cur->stack[spi].slot_type[0] == STACK_SPILL ? &cur->stack[spi].spilled_ptr : &unbound_reg; if (scalar_old && scalar_cur) { if (!regsafe(env, scalar_old, scalar_new, idmap, exact) return false; i += BPF_REG_SIZE - 1; continue; }
where is_spilled_unbound64() would be basically `return is_spilled_unbound_scalar_reg64(&old->..) || is_stack_unbound_slot64(&old->...)`;
Similarly for zero case? Though I'm wondering if zero case should be checked first, as it's actually a subset of is_spilled_unbound64 when it comes to STACK_ZERO/STACK_MISC mixes, no?
/* load of stack value with all ZERO slots produces scalar value 0,
* call regsafe to ensure scalar ids are compared and precision
* flags are taken into account.
*/
if (is_spilled_scalar_reg64(&old->stack[spi]) &&
is_stack_zero64(&cur->stack[spi])) {
if (!regsafe(env, &old->stack[spi].spilled_ptr, &zero_reg,
idmap, exact))
return false;
i += BPF_REG_SIZE - 1;
continue;
}
if (is_stack_zero64(&old->stack[spi]) &&
is_spilled_scalar_reg64(&cur->stack[spi])) {
if (!regsafe(env, &zero_reg, &cur->stack[spi].spilled_ptr,
idmap, exact))
return false;
i += BPF_REG_SIZE - 1;
continue;
}
if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID) continue;
-- 2.43.0