Re: [PATCH bpf-next v2 14/15] bpf: Optimize state pruning for spilled scalars

10 Jan 2024

On Mon, Jan 8, 2024 at 12:53 PM Maxim Mikityanskiy maxtram95@gmail.com wrote:
...
From: Eduard Zingerman eddyz87@gmail.com
Changes for scalar ID tracking of spilled unbound scalars lead to
certain verification performance regression. This commit mitigates the
regression by exploiting the following properties maintained by
check_stack_read_fixed_off():

a mix of STACK_MISC, STACK_ZERO and STACK_INVALID marks is read as
unbounded scalar register;
spi with all slots marked STACK_ZERO is read as scalar register with
value zero.

This commit modifies stacksafe() to consider situations above
equivalent.
Veristat results after this patch show significant gains:
$ ./veristat -e file,prog,states -f '!states_pct<10' -f '!states_b<10' -C not-opt after
File              Program   States (A)  States (B)  States    (DIFF)

pyperf180.bpf.o   on_event       10456        8422   -2034 (-19.45%)
pyperf600.bpf.o   on_event       37319       22519  -14800 (-39.66%)
strobemeta.bpf.o  on_event       13435        4703   -8732 (-64.99%)
Signed-off-by: Eduard Zingerman eddyz87@gmail.com
kernel/bpf/verifier.c | 83 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index aeb3e198a5ea..cb82f8d4226f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1170,6 +1170,12 @@ static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype)
        *stype = STACK_MISC;
 }
+static bool is_spilled_scalar_reg64(const struct bpf_stack_state *stack)
+{

  return stack->slot_type[0] == STACK_SPILL &&


         stack->spilled_ptr.type == SCALAR_VALUE;



+}



static void scrub_spilled_slot(u8 *stype)
 {
        if (*stype != STACK_INVALID)
@@ -16459,11 +16465,45 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
        }
 }
+static bool is_stack_zero64(struct bpf_stack_state *stack)
+{

  u32 i;



  for (i = 0; i < ARRAY_SIZE(stack->slot_type); ++i)


          if (stack->slot_type[i] != STACK_ZERO)


                  return false;


  return true;



+}



+static bool is_stack_unbound_slot64(struct bpf_verifier_env *env,

                              struct bpf_stack_state *stack)



+{

  u32 i;



  for (i = 0; i < ARRAY_SIZE(stack->slot_type); ++i)


          if (stack->slot_type[i] != STACK_ZERO &&


              stack->slot_type[i] != STACK_MISC &&


              (!env->allow_uninit_stack || stack->slot_type[i] != STACK_INVALID))


                  return false;


  return true;



+}



+static bool is_spilled_unbound_scalar_reg64(struct bpf_stack_state *stack)
+{

  return is_spilled_scalar_reg64(stack) && __is_scalar_unbounded(&stack->spilled_ptr);



+}



static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
                      struct bpf_func_state *cur, struct bpf_idmap *idmap, bool exact)
 {

  struct bpf_reg_state unbound_reg = {};


  struct bpf_reg_state zero_reg = {};
  int i, spi;



  __mark_reg_unknown(env, &unbound_reg);


  __mark_reg_const_zero(env, &zero_reg);


  zero_reg.precise = true;



these are immutable, right? Would it make sense to set them up just
once as static variables instead of initializing on each check?
...

  /* walk slots of the explored stack and ignore any additional
   * slots in the current stack, since explored(safe) state
   * didn't use them



@@ -16484,6 +16524,49 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
                        continue;
                }
we didn't check that cur->stack[spi] is ok to access yet, it's done a
bit later with `if (i >= cur->allocated_stack)`, if I'm not mistaken.
So these checks would need to be moved a bit lower, probably.
...

          /* load of stack value with all MISC and ZERO slots produces unbounded


           * scalar value, call regsafe to ensure scalar ids are compared.


           */


          if (is_spilled_unbound_scalar_reg64(&old->stack[spi]) &&


              is_stack_unbound_slot64(env, &cur->stack[spi])) {


                  i += BPF_REG_SIZE - 1;


                  if (!regsafe(env, &old->stack[spi].spilled_ptr, &unbound_reg,


                               idmap, exact))


                          return false;


                  continue;


          }



          if (is_stack_unbound_slot64(env, &old->stack[spi]) &&


              is_spilled_unbound_scalar_reg64(&cur->stack[spi])) {


                  i += BPF_REG_SIZE - 1;


                  if (!regsafe(env,  &unbound_reg, &cur->stack[spi].spilled_ptr,


                               idmap, exact))


                          return false;


                  continue;


          }



scalar_old = scalar_cur = NULL;
if (is_spilled_unbound64(&old->..))
    scalar_old = old->stack[spi].slot_type[0] == STACK_SPILL ?
&old->stack[spi].spilled_ptr : &unbound_reg;
if (is_spilled_unbound64(&cur->..))
    scalar_cur = cur->stack[spi].slot_type[0] == STACK_SPILL ?
&cur->stack[spi].spilled_ptr : &unbound_reg;
if (scalar_old && scalar_cur) {
    if (!regsafe(env, scalar_old, scalar_new, idmap, exact)
        return false;
    i += BPF_REG_SIZE - 1;
    continue;
}
where is_spilled_unbound64() would be basically `return
is_spilled_unbound_scalar_reg64(&old->..) ||
is_stack_unbound_slot64(&old->...)`;
Similarly for zero case? Though I'm wondering if zero case should be
checked first, as it's actually a subset of is_spilled_unbound64 when
it comes to STACK_ZERO/STACK_MISC mixes, no?
...


          /* load of stack value with all ZERO slots produces scalar value 0,


           * call regsafe to ensure scalar ids are compared and precision


           * flags are taken into account.


           */


          if (is_spilled_scalar_reg64(&old->stack[spi]) &&


              is_stack_zero64(&cur->stack[spi])) {


                  if (!regsafe(env, &old->stack[spi].spilled_ptr, &zero_reg,


                               idmap, exact))


                          return false;


                  i += BPF_REG_SIZE - 1;


                  continue;


          }



          if (is_stack_zero64(&old->stack[spi]) &&


              is_spilled_scalar_reg64(&cur->stack[spi])) {


                  if (!regsafe(env, &zero_reg, &cur->stack[spi].spilled_ptr,


                               idmap, exact))


                          return false;


                  i += BPF_REG_SIZE - 1;


                  continue;


          }


          if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
                  continue;




--
2.43.0

    

2025

2024

2023

2022

2021

2020

2019

2018

2017

Re: [PATCH bpf-next v2 14/15] bpf: Optimize state pruning for spilled scalars

Signed-off-by: Eduard Zingerman eddyz87@gmail.com