[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAEf4BzYizLHHYPg0yKu-no3toMLS3wSyA2V_wtnHAyn6Burofg@mail.gmail.com>
Date: Tue, 9 Jan 2024 16:22:30 -0800
From: Andrii Nakryiko <andrii.nakryiko@...il.com>
To: Maxim Mikityanskiy <maxtram95@...il.com>
Cc: Eduard Zingerman <eddyz87@...il.com>, Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>, Andrii Nakryiko <andrii@...nel.org>,
Shung-Hsi Yu <shung-hsi.yu@...e.com>, John Fastabend <john.fastabend@...il.com>,
Martin KaFai Lau <martin.lau@...ux.dev>, Song Liu <song@...nel.org>,
Yonghong Song <yonghong.song@...ux.dev>, KP Singh <kpsingh@...nel.org>,
Stanislav Fomichev <sdf@...gle.com>, Hao Luo <haoluo@...gle.com>, Jiri Olsa <jolsa@...nel.org>,
Mykola Lysenko <mykolal@...com>, Shuah Khan <shuah@...nel.org>,
"David S. Miller" <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>,
Jesper Dangaard Brouer <hawk@...nel.org>, bpf@...r.kernel.org, linux-kselftest@...r.kernel.org,
netdev@...r.kernel.org
Subject: Re: [PATCH bpf-next v2 14/15] bpf: Optimize state pruning for spilled scalars
On Mon, Jan 8, 2024 at 12:53 PM Maxim Mikityanskiy <maxtram95@...il.com> wrote:
>
> From: Eduard Zingerman <eddyz87@...il.com>
>
> Changes for scalar ID tracking of spilled unbound scalars lead to
> certain verification performance regression. This commit mitigates the
> regression by exploiting the following properties maintained by
> check_stack_read_fixed_off():
> - a mix of STACK_MISC, STACK_ZERO and STACK_INVALID marks is read as
> unbounded scalar register;
> - spi with all slots marked STACK_ZERO is read as scalar register with
> value zero.
>
> This commit modifies stacksafe() to consider situations above
> equivalent.
>
> Veristat results after this patch show significant gains:
>
> $ ./veristat -e file,prog,states -f '!states_pct<10' -f '!states_b<10' -C not-opt after
> File Program States (A) States (B) States (DIFF)
> ---------------- -------- ---------- ---------- ----------------
> pyperf180.bpf.o on_event 10456 8422 -2034 (-19.45%)
> pyperf600.bpf.o on_event 37319 22519 -14800 (-39.66%)
> strobemeta.bpf.o on_event 13435 4703 -8732 (-64.99%)
>
> Signed-off-by: Eduard Zingerman <eddyz87@...il.com>
> ---
> kernel/bpf/verifier.c | 83 +++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 83 insertions(+)
>
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index aeb3e198a5ea..cb82f8d4226f 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -1170,6 +1170,12 @@ static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype)
> *stype = STACK_MISC;
> }
>
> +static bool is_spilled_scalar_reg64(const struct bpf_stack_state *stack)
> +{
> + return stack->slot_type[0] == STACK_SPILL &&
> + stack->spilled_ptr.type == SCALAR_VALUE;
> +}
> +
> static void scrub_spilled_slot(u8 *stype)
> {
> if (*stype != STACK_INVALID)
> @@ -16459,11 +16465,45 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
> }
> }
>
> +static bool is_stack_zero64(struct bpf_stack_state *stack)
> +{
> + u32 i;
> +
> + for (i = 0; i < ARRAY_SIZE(stack->slot_type); ++i)
> + if (stack->slot_type[i] != STACK_ZERO)
> + return false;
> + return true;
> +}
> +
> +static bool is_stack_unbound_slot64(struct bpf_verifier_env *env,
> + struct bpf_stack_state *stack)
> +{
> + u32 i;
> +
> + for (i = 0; i < ARRAY_SIZE(stack->slot_type); ++i)
> + if (stack->slot_type[i] != STACK_ZERO &&
> + stack->slot_type[i] != STACK_MISC &&
> + (!env->allow_uninit_stack || stack->slot_type[i] != STACK_INVALID))
> + return false;
> + return true;
> +}
> +
> +static bool is_spilled_unbound_scalar_reg64(struct bpf_stack_state *stack)
> +{
> + return is_spilled_scalar_reg64(stack) && __is_scalar_unbounded(&stack->spilled_ptr);
> +}
> +
> static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
> struct bpf_func_state *cur, struct bpf_idmap *idmap, bool exact)
> {
> + struct bpf_reg_state unbound_reg = {};
> + struct bpf_reg_state zero_reg = {};
> int i, spi;
>
> + __mark_reg_unknown(env, &unbound_reg);
> + __mark_reg_const_zero(env, &zero_reg);
> + zero_reg.precise = true;
these are immutable, right? Would it make sense to set them up just
once as static variables instead of initializing on each check?
> +
> /* walk slots of the explored stack and ignore any additional
> * slots in the current stack, since explored(safe) state
> * didn't use them
> @@ -16484,6 +16524,49 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
> continue;
> }
>
we didn't check that cur->stack[spi] is ok to access yet, it's done a
bit later with `if (i >= cur->allocated_stack)`, if I'm not mistaken.
So these checks would need to be moved a bit lower, probably.
> + /* load of stack value with all MISC and ZERO slots produces unbounded
> + * scalar value, call regsafe to ensure scalar ids are compared.
> + */
> + if (is_spilled_unbound_scalar_reg64(&old->stack[spi]) &&
> + is_stack_unbound_slot64(env, &cur->stack[spi])) {
> + i += BPF_REG_SIZE - 1;
> + if (!regsafe(env, &old->stack[spi].spilled_ptr, &unbound_reg,
> + idmap, exact))
> + return false;
> + continue;
> + }
> +
> + if (is_stack_unbound_slot64(env, &old->stack[spi]) &&
> + is_spilled_unbound_scalar_reg64(&cur->stack[spi])) {
> + i += BPF_REG_SIZE - 1;
> + if (!regsafe(env, &unbound_reg, &cur->stack[spi].spilled_ptr,
> + idmap, exact))
> + return false;
> + continue;
> + }
scalar_old = scalar_cur = NULL;
if (is_spilled_unbound64(&old->..))
scalar_old = old->stack[spi].slot_type[0] == STACK_SPILL ?
&old->stack[spi].spilled_ptr : &unbound_reg;
if (is_spilled_unbound64(&cur->..))
scalar_cur = cur->stack[spi].slot_type[0] == STACK_SPILL ?
&cur->stack[spi].spilled_ptr : &unbound_reg;
if (scalar_old && scalar_cur) {
if (!regsafe(env, scalar_old, scalar_new, idmap, exact)
return false;
i += BPF_REG_SIZE - 1;
continue;
}
where is_spilled_unbound64() would be basically `return
is_spilled_unbound_scalar_reg64(&old->..) ||
is_stack_unbound_slot64(&old->...)`;
Similarly for zero case? Though I'm wondering if zero case should be
checked first, as it's actually a subset of is_spilled_unbound64 when
it comes to STACK_ZERO/STACK_MISC mixes, no?
> +
> + /* load of stack value with all ZERO slots produces scalar value 0,
> + * call regsafe to ensure scalar ids are compared and precision
> + * flags are taken into account.
> + */
> + if (is_spilled_scalar_reg64(&old->stack[spi]) &&
> + is_stack_zero64(&cur->stack[spi])) {
> + if (!regsafe(env, &old->stack[spi].spilled_ptr, &zero_reg,
> + idmap, exact))
> + return false;
> + i += BPF_REG_SIZE - 1;
> + continue;
> + }
> +
> + if (is_stack_zero64(&old->stack[spi]) &&
> + is_spilled_scalar_reg64(&cur->stack[spi])) {
> + if (!regsafe(env, &zero_reg, &cur->stack[spi].spilled_ptr,
> + idmap, exact))
> + return false;
> + i += BPF_REG_SIZE - 1;
> + continue;
> + }
> +
> if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
> continue;
>
> --
> 2.43.0
>
Powered by blists - more mailing lists