[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20180515023718.3zluffqkf52buc25@ast-mbp>
Date: Mon, 14 May 2018 19:37:20 -0700
From: Alexei Starovoitov <alexei.starovoitov@...il.com>
To: Joe Stringer <joe@...d.net.nz>
Cc: daniel@...earbox.net, netdev@...r.kernel.org, ast@...nel.org,
john.fastabend@...il.com, kafai@...com
Subject: Re: [RFC bpf-next 04/11] bpf: Add PTR_TO_SOCKET verifier type
On Wed, May 09, 2018 at 02:07:02PM -0700, Joe Stringer wrote:
> Teach the verifier a little bit about a new type of pointer, a
> PTR_TO_SOCKET. This pointer type is accessed from BPF through the
> 'struct bpf_sock' structure.
>
> Signed-off-by: Joe Stringer <joe@...d.net.nz>
> ---
> include/linux/bpf.h | 19 +++++++++-
> include/linux/bpf_verifier.h | 2 ++
> kernel/bpf/verifier.c | 86 ++++++++++++++++++++++++++++++++++++++------
> net/core/filter.c | 30 +++++++++-------
> 4 files changed, 114 insertions(+), 23 deletions(-)
Ack for patches 1-3. In this one few nits:
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index a38e474bf7ee..a03b4b0edcb6 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -136,7 +136,7 @@ enum bpf_arg_type {
> /* the following constraints used to prototype bpf_memcmp() and other
> * functions that access data on eBPF program stack
> */
> - ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */
> + ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value, socket) */
I don't see where in this patch this change happens...
> ARG_PTR_TO_MEM_OR_NULL, /* pointer to valid memory or NULL */
> ARG_PTR_TO_UNINIT_MEM, /* pointer to memory does not need to be initialized,
> * helper function must fill all bytes or clear
> @@ -148,6 +148,7 @@ enum bpf_arg_type {
>
> ARG_PTR_TO_CTX, /* pointer to context */
> ARG_ANYTHING, /* any (initialized) argument is ok */
> + ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */
> };
>
> /* type of values returned from helper functions */
> @@ -155,6 +156,7 @@ enum bpf_return_type {
> RET_INTEGER, /* function returns integer */
> RET_VOID, /* function doesn't return anything */
> RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */
> + RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */
> };
>
> /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
> @@ -205,6 +207,8 @@ enum bpf_reg_type {
> PTR_TO_PACKET_META, /* skb->data - meta_len */
> PTR_TO_PACKET, /* reg points to skb->data */
> PTR_TO_PACKET_END, /* skb->data + headlen */
> + PTR_TO_SOCKET, /* reg points to struct bpf_sock */
> + PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */
> };
>
> /* The information passed from prog-specific *_is_valid_access
> @@ -326,6 +330,11 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
>
> typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
> unsigned long off, unsigned long len);
> +typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type,
> + const struct bpf_insn *src,
> + struct bpf_insn *dst,
> + struct bpf_prog *prog,
> + u32 *target_size);
>
> u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
> void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
> @@ -729,4 +738,12 @@ extern const struct bpf_func_proto bpf_sock_map_update_proto;
> void bpf_user_rnd_init_once(void);
> u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
>
> +bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
> + struct bpf_insn_access_aux *info);
> +u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
> + const struct bpf_insn *si,
> + struct bpf_insn *insn_buf,
> + struct bpf_prog *prog,
> + u32 *target_size);
> +
> #endif /* _LINUX_BPF_H */
> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> index a613b52ce939..9dcd87f1d322 100644
> --- a/include/linux/bpf_verifier.h
> +++ b/include/linux/bpf_verifier.h
> @@ -57,6 +57,8 @@ struct bpf_reg_state {
> * offset, so they can share range knowledge.
> * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we
> * came from, when one is tested for != NULL.
> + * For PTR_TO_SOCKET this is used to share which pointers retain the
> + * same reference to the socket, to determine proper reference freeing.
> */
> u32 id;
> /* Ordering of fields matters. See states_equal() */
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 1b31b805dea4..d38c7c1e9da6 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -80,8 +80,8 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
> * (like pointer plus pointer becomes SCALAR_VALUE type)
> *
> * When verifier sees load or store instructions the type of base register
> - * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three pointer
> - * types recognized by check_mem_access() function.
> + * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
> + * four pointer types recognized by check_mem_access() function.
> *
> * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
> * and the range of [ptr, ptr + map's value_size) is accessible.
> @@ -244,6 +244,8 @@ static const char * const reg_type_str[] = {
> [PTR_TO_PACKET] = "pkt",
> [PTR_TO_PACKET_META] = "pkt_meta",
> [PTR_TO_PACKET_END] = "pkt_end",
> + [PTR_TO_SOCKET] = "sock",
> + [PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
> };
>
> static void print_liveness(struct bpf_verifier_env *env,
> @@ -977,6 +979,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
> case PTR_TO_PACKET_META:
> case PTR_TO_PACKET_END:
> case CONST_PTR_TO_MAP:
> + case PTR_TO_SOCKET:
> + case PTR_TO_SOCKET_OR_NULL:
> return true;
> default:
> return false;
> @@ -1360,6 +1364,28 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
> return -EACCES;
> }
>
> +static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off,
> + int size, enum bpf_access_type t)
> +{
> + struct bpf_reg_state *regs = cur_regs(env);
> + struct bpf_reg_state *reg = ®s[regno];
> + struct bpf_insn_access_aux info;
> +
> + if (reg->smin_value < 0) {
> + verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
> + regno);
> + return -EACCES;
> + }
> +
> + if (!bpf_sock_is_valid_access(off, size, t, &info)) {
> + verbose(env, "invalid bpf_sock_ops access off=%d size=%d\n",
> + off, size);
> + return -EACCES;
> + }
> +
> + return 0;
> +}
> +
> static bool __is_pointer_value(bool allow_ptr_leaks,
> const struct bpf_reg_state *reg)
> {
> @@ -1475,6 +1501,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
> */
> strict = true;
> break;
> + case PTR_TO_SOCKET:
> + pointer_desc = "sock ";
> + break;
> default:
> break;
> }
> @@ -1723,6 +1752,16 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
> err = check_packet_access(env, regno, off, size, false);
> if (!err && t == BPF_READ && value_regno >= 0)
> mark_reg_unknown(env, regs, value_regno);
> +
> + } else if (reg->type == PTR_TO_SOCKET) {
> + if (t == BPF_WRITE) {
> + verbose(env, "cannot write into socket\n");
> + return -EACCES;
> + }
> + err = check_sock_access(env, regno, off, size, t);
> + if (!err && t == BPF_READ && value_regno >= 0)
t == BPF_READ check is unnecessary.
> + mark_reg_unknown(env, regs, value_regno);
> +
> } else {
> verbose(env, "R%d invalid mem access '%s'\n", regno,
> reg_type_str[reg->type]);
> @@ -1941,6 +1980,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
> expected_type = PTR_TO_CTX;
> if (type != expected_type)
> goto err_type;
> + } else if (arg_type == ARG_PTR_TO_SOCKET) {
> + expected_type = PTR_TO_SOCKET;
> + if (type != expected_type)
> + goto err_type;
> } else if (arg_type_is_mem_ptr(arg_type)) {
> expected_type = PTR_TO_STACK;
> /* One exception here. In case function allows for NULL to be
> @@ -2477,6 +2520,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
> insn_aux->map_ptr = meta.map_ptr;
> else if (insn_aux->map_ptr != meta.map_ptr)
> insn_aux->map_ptr = BPF_MAP_PTR_POISON;
> + } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
> + mark_reg_known_zero(env, regs, BPF_REG_0);
> + regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
> + regs[BPF_REG_0].id = ++env->id_gen;
> } else {
> verbose(env, "unknown return type %d of func %s#%d\n",
> fn->ret_type, func_id_name(func_id), func_id);
> @@ -2614,6 +2661,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
> return -EACCES;
> case CONST_PTR_TO_MAP:
> case PTR_TO_PACKET_END:
> + case PTR_TO_SOCKET:
> + case PTR_TO_SOCKET_OR_NULL:
> verbose(env, "R%d pointer arithmetic on %s prohibited\n",
> dst, reg_type_str[ptr_reg->type]);
> return -EACCES;
> @@ -3559,6 +3608,8 @@ static void mark_ptr_or_null_reg(struct bpf_reg_state *reg, u32 id,
> } else {
> reg->type = PTR_TO_MAP_VALUE;
> }
> + } else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
> + reg->type = PTR_TO_SOCKET;
> }
> /* We don't need id from this point onwards anymore, thus we
> * should better reset it, so that state pruning has chances
> @@ -4333,6 +4384,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
> case PTR_TO_CTX:
> case CONST_PTR_TO_MAP:
> case PTR_TO_PACKET_END:
> + case PTR_TO_SOCKET:
> + case PTR_TO_SOCKET_OR_NULL:
> /* Only valid matches are exact, which memcmp() above
> * would have accepted
> */
> @@ -5188,10 +5241,14 @@ static void sanitize_dead_code(struct bpf_verifier_env *env)
> }
> }
>
> -/* convert load instructions that access fields of 'struct __sk_buff'
> - * into sequence of instructions that access fields of 'struct sk_buff'
> +/* convert load instructions that access fields of a context type into a
> + * sequence of instructions that access fields of the underlying structure:
> + * struct __sk_buff -> struct sk_buff
> + * struct bpf_sock_ops -> struct sock
> */
> -static int convert_ctx_accesses(struct bpf_verifier_env *env)
> +static int convert_ctx_accesses(struct bpf_verifier_env *env,
> + bpf_convert_ctx_access_t convert_ctx_access,
> + enum bpf_reg_type ctx_type)
> {
> const struct bpf_verifier_ops *ops = env->ops;
> int i, cnt, size, ctx_field_size, delta = 0;
> @@ -5218,12 +5275,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
> }
> }
>
> - if (!ops->convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux))
> + if (!convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux))
> return 0;
>
> insn = env->prog->insnsi + delta;
>
> for (i = 0; i < insn_cnt; i++, insn++) {
> + enum bpf_reg_type ptr_type;
> +
> if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
> insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
> insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
> @@ -5237,7 +5296,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
> else
> continue;
>
> - if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
> + ptr_type = env->insn_aux_data[i + delta].ptr_type;
> + if (ptr_type != ctx_type)
> continue;
>
> ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
> @@ -5269,8 +5329,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
> }
>
> target_size = 0;
> - cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog,
> - &target_size);
> + cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
> + &target_size);
> if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
> (ctx_field_size && !target_size)) {
> verbose(env, "bpf verifier is misconfigured\n");
> @@ -5785,7 +5845,13 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
>
> if (ret == 0)
> /* program is valid, convert *(u32*)(ctx + off) accesses */
> - ret = convert_ctx_accesses(env);
> + ret = convert_ctx_accesses(env, env->ops->convert_ctx_access,
> + PTR_TO_CTX);
> +
> + if (ret == 0)
> + /* Convert *(u32*)(sock_ops + off) accesses */
> + ret = convert_ctx_accesses(env, bpf_sock_convert_ctx_access,
> + PTR_TO_SOCKET);
Overall looks great.
Only this part is missing for PTR_TO_SOCKET:
} else if (dst_reg_type != *prev_dst_type &&
(dst_reg_type == PTR_TO_CTX ||
*prev_dst_type == PTR_TO_CTX)) {
verbose(env, "same insn cannot be used with different pointers\n");
return -EINVAL;
similar logic has to be added.
Otherwise the following will be accepted:
R1 = sock_ptr
goto X;
...
R1 = some_other_valid_ptr;
goto X;
...
R2 = *(u32 *)(R1 + 0);
this will be rewritten for first branch,
but it's wrong for second.
Powered by blists - more mailing lists