[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAEf4BzZm7eUX3w-NwP0JuWtvKbO6GxN911TraY5bA8-z+ocyCg@mail.gmail.com>
Date: Wed, 24 Aug 2022 11:27:30 -0700
From: Andrii Nakryiko <andrii.nakryiko@...il.com>
To: Joanne Koong <joannelkoong@...il.com>
Cc: bpf@...r.kernel.org, andrii@...nel.org, daniel@...earbox.net,
ast@...nel.org, kafai@...com, kuba@...nel.org,
netdev@...r.kernel.org
Subject: Re: [PATCH bpf-next v4 1/3] bpf: Add skb dynptrs
On Mon, Aug 22, 2022 at 4:57 PM Joanne Koong <joannelkoong@...il.com> wrote:
>
> Add skb dynptrs, which are dynptrs whose underlying pointer points
> to a skb. The dynptr acts on skb data. skb dynptrs have two main
> benefits. One is that they allow operations on sizes that are not
> statically known at compile-time (eg variable-sized accesses).
> Another is that parsing the packet data through dynptrs (instead of
> through direct access of skb->data and skb->data_end) can be more
> ergonomic and less brittle (eg does not need manual if checking for
> being within bounds of data_end).
>
> For bpf prog types that don't support writes on skb data, the dynptr is
> read-only. For reads and writes through the bpf_dynptr_read() and
> bpf_dynptr_write() interfaces, this supports reading and writing into
> data in the non-linear paged buffers. For data slices (through the
> bpf_dynptr_data() interface), if the data is in a paged buffer, the user
> must first call bpf_skb_pull_data() to pull the data into the linear
> portion. The returned data slice from a call to bpf_dynptr_data() is of
> reg type PTR_TO_PACKET | PTR_MAYBE_NULL.
>
> Any bpf_dynptr_write() automatically invalidates any prior data slices
> to the skb dynptr. This is because a bpf_dynptr_write() may be writing
> to data in a paged buffer, so it will need to pull the buffer first into
> the head. The reason it needs to be pulled instead of writing directly to
> the paged buffers is because they may be cloned (only the head of the skb
> is by default uncloned). As such, any bpf_dynptr_write() will
> automatically have its prior data slices invalidated, even if the write
> is to data in the skb head (the verifier has no way of differentiating
> whether the write is to the head or paged buffers during program load
> time). Please note as well that any other helper calls that change the
> underlying packet buffer (eg bpf_skb_pull_data()) invalidates any data
> slices of the skb dynptr as well. Whenever such a helper call is made,
> the verifier marks any PTR_TO_PACKET reg type (which includes skb dynptr
> slices since they are PTR_TO_PACKETs) as unknown. The stack trace for
> this is check_helper_call() -> clear_all_pkt_pointers() ->
> __clear_all_pkt_pointers() -> mark_reg_unknown()
>
> For examples of how skb dynptrs can be used, please see the attached
> selftests.
>
> Signed-off-by: Joanne Koong <joannelkoong@...il.com>
> ---
> include/linux/bpf.h | 83 +++++++++++++++++-----------
> include/linux/filter.h | 4 ++
> include/uapi/linux/bpf.h | 40 ++++++++++++--
> kernel/bpf/helpers.c | 81 +++++++++++++++++++++++++---
> kernel/bpf/verifier.c | 99 ++++++++++++++++++++++++++++------
> net/core/filter.c | 53 ++++++++++++++++--
> tools/include/uapi/linux/bpf.h | 40 ++++++++++++--
> 7 files changed, 335 insertions(+), 65 deletions(-)
>
[...]
> @@ -1521,9 +1532,19 @@ BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src
> if (err)
> return err;
>
> - memcpy(dst, src->data + src->offset + offset, len);
> + type = bpf_dynptr_get_type(src);
>
> - return 0;
> + switch (type) {
> + case BPF_DYNPTR_TYPE_LOCAL:
> + case BPF_DYNPTR_TYPE_RINGBUF:
> + memcpy(dst, src->data + src->offset + offset, len);
> + return 0;
> + case BPF_DYNPTR_TYPE_SKB:
> + return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len);
> + default:
> + WARN(true, "bpf_dynptr_read: unknown dynptr type %d\n", type);
nit: probably better to WARN_ONCE?
> + return -EFAULT;
> + }
> }
>
> static const struct bpf_func_proto bpf_dynptr_read_proto = {
> @@ -1540,18 +1561,32 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = {
> BPF_CALL_5(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
> u32, len, u64, flags)
> {
> + enum bpf_dynptr_type type;
> int err;
>
> - if (!dst->data || flags || bpf_dynptr_is_rdonly(dst))
> + if (!dst->data || bpf_dynptr_is_rdonly(dst))
> return -EINVAL;
>
> err = bpf_dynptr_check_off_len(dst, offset, len);
> if (err)
> return err;
>
> - memcpy(dst->data + dst->offset + offset, src, len);
> + type = bpf_dynptr_get_type(dst);
>
> - return 0;
> + switch (type) {
> + case BPF_DYNPTR_TYPE_LOCAL:
> + case BPF_DYNPTR_TYPE_RINGBUF:
> + if (flags)
> + return -EINVAL;
> + memcpy(dst->data + dst->offset + offset, src, len);
> + return 0;
> + case BPF_DYNPTR_TYPE_SKB:
> + return __bpf_skb_store_bytes(dst->data, dst->offset + offset, src, len,
> + flags);
> + default:
> + WARN(true, "bpf_dynptr_write: unknown dynptr type %d\n", type);
ditto about WARN_ONCE
> + return -EFAULT;
> + }
> }
>
> static const struct bpf_func_proto bpf_dynptr_write_proto = {
[...]
> +static enum bpf_dynptr_type stack_slot_get_dynptr_info(struct bpf_verifier_env *env,
> + struct bpf_reg_state *reg,
> + int *ref_obj_id)
> {
> struct bpf_func_state *state = func(env, reg);
> int spi = get_spi(reg->off);
>
> - return state->stack[spi].spilled_ptr.id;
> + if (ref_obj_id)
> + *ref_obj_id = state->stack[spi].spilled_ptr.id;
> +
> + return state->stack[spi].spilled_ptr.dynptr.type;
> }
>
> static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
> @@ -6056,7 +6075,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
> case DYNPTR_TYPE_RINGBUF:
> err_extra = "ringbuf ";
> break;
> - default:
> + case DYNPTR_TYPE_SKB:
> + err_extra = "skb ";
> break;
> }
>
> @@ -7149,6 +7169,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
> {
> enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
> const struct bpf_func_proto *fn = NULL;
> + enum bpf_dynptr_type dynptr_type;
compiler technically can complain about use of uninitialized
dynptr_type, maybe initialize it to BPF_DYNPTR_TYPE_INVALID ?
> enum bpf_return_type ret_type;
> enum bpf_type_flag ret_flag;
> struct bpf_reg_state *regs;
> @@ -7320,24 +7341,43 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
> }
> }
> break;
> - case BPF_FUNC_dynptr_data:
> - for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
> - if (arg_type_is_dynptr(fn->arg_type[i])) {
> - if (meta.ref_obj_id) {
> - verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
> - return -EFAULT;
> - }
> - /* Find the id of the dynptr we're tracking the reference of */
> - meta.ref_obj_id = stack_slot_get_id(env, ®s[BPF_REG_1 + i]);
> - break;
> - }
> + case BPF_FUNC_dynptr_write:
> + {
> + struct bpf_reg_state *reg;
> +
> + reg = get_dynptr_arg_reg(fn, regs);
> + if (!reg) {
> + verbose(env, "verifier internal error: no dynptr in bpf_dynptr_data()\n");
s/bpf_dynptr_data/bpf_dynptr_write/
> + return -EFAULT;
> }
> - if (i == MAX_BPF_FUNC_REG_ARGS) {
> +
> + /* bpf_dynptr_write() for skb-type dynptrs may pull the skb, so we must
> + * invalidate all data slices associated with it
> + */
> + if (stack_slot_get_dynptr_info(env, reg, NULL) == BPF_DYNPTR_TYPE_SKB)
> + changes_data = true;
> +
> + break;
> + }
> + case BPF_FUNC_dynptr_data:
> + {
> + struct bpf_reg_state *reg;
> +
> + reg = get_dynptr_arg_reg(fn, regs);
> + if (!reg) {
> verbose(env, "verifier internal error: no dynptr in bpf_dynptr_data()\n");
> return -EFAULT;
> }
> +
> + if (meta.ref_obj_id) {
> + verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
> + return -EFAULT;
> + }
> +
> + dynptr_type = stack_slot_get_dynptr_info(env, reg, &meta.ref_obj_id);
> break;
> }
> + }
>
> if (err)
> return err;
> @@ -7397,8 +7437,15 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
> break;
> case RET_PTR_TO_ALLOC_MEM:
> mark_reg_known_zero(env, regs, BPF_REG_0);
> - regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
> - regs[BPF_REG_0].mem_size = meta.mem_size;
> +
> + if (func_id == BPF_FUNC_dynptr_data &&
> + dynptr_type == BPF_DYNPTR_TYPE_SKB) {
> + regs[BPF_REG_0].type = PTR_TO_PACKET | ret_flag;
> + regs[BPF_REG_0].range = meta.mem_size;
> + } else {
> + regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
> + regs[BPF_REG_0].mem_size = meta.mem_size;
> + }
> break;
> case RET_PTR_TO_MEM_OR_BTF_ID:
> {
> @@ -14141,6 +14188,24 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
> goto patch_call_imm;
> }
>
> + if (insn->imm == BPF_FUNC_dynptr_from_skb) {
> + bool is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
> +
> + insn_buf[0] = BPF_MOV32_IMM(BPF_REG_4, is_rdonly);
> + insn_buf[1] = *insn;
> + cnt = 2;
This might have been discussed before, sorry if I missed it. But why
this special rewrite to provide read-only flag vs having two
implementations of bpf_dynptr_from_skb() depending on program types?
If program type allows read+write access, return
bpf_dynptr_from_skb_rdwr(), for those that have read-only access -
bpf_dynptr_from_skb_rdonly(), and for others return NULL proto
(disable helper)?
You can then use it very similarly for bpf_dynptr_from_skb_meta().
> +
> + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
> + if (!new_prog)
> + return -ENOMEM;
> +
> + delta += cnt - 1;
> + env->prog = new_prog;
> + prog = new_prog;
> + insn = new_prog->insnsi + i + delta;
> + goto patch_call_imm;
> + }
> +
[...]
> BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
> {
> return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
> @@ -7726,6 +7763,8 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> return &bpf_get_socket_uid_proto;
> case BPF_FUNC_perf_event_output:
> return &bpf_skb_event_output_proto;
> + case BPF_FUNC_dynptr_from_skb:
> + return &bpf_dynptr_from_skb_proto;
> default:
> return bpf_sk_base_func_proto(func_id);
> }
> @@ -7909,6 +7948,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> return &bpf_tcp_raw_check_syncookie_ipv6_proto;
> #endif
> #endif
> + case BPF_FUNC_dynptr_from_skb:
> + return &bpf_dynptr_from_skb_proto;
> default:
> return bpf_sk_base_func_proto(func_id);
> }
> @@ -8104,6 +8145,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> case BPF_FUNC_skc_lookup_tcp:
> return &bpf_skc_lookup_tcp_proto;
> #endif
> + case BPF_FUNC_dynptr_from_skb:
> + return &bpf_dynptr_from_skb_proto;
> default:
> return bpf_sk_base_func_proto(func_id);
> }
> @@ -8142,6 +8185,8 @@ lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> return &bpf_get_smp_processor_id_proto;
> case BPF_FUNC_skb_under_cgroup:
> return &bpf_skb_under_cgroup_proto;
> + case BPF_FUNC_dynptr_from_skb:
> + return &bpf_dynptr_from_skb_proto;
so like here you'd return read-only prototype for dynptr_from_skb
(seems like LWT programs have only read-only access, according to
may_access_direct_pkt_data), but in cases where
may_access_direct_pkt_data() allows read-write access we'd choose rdwr
variant of dynptr_from_skb?
> default:
> return bpf_sk_base_func_proto(func_id);
> }
[...]
Powered by blists - more mailing lists