[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJ+HfNizsTgxTvtyUezQkqAuoWr7txYNCuDTeWgy4UTR2hbr8g@mail.gmail.com>
Date: Wed, 5 Jun 2019 19:16:51 +0200
From: Björn Töpel <bjorn.topel@...il.com>
To: Jonathan Lemon <jonathan.lemon@...il.com>
Cc: Björn Töpel <bjorn.topel@...el.com>,
"Karlsson, Magnus" <magnus.karlsson@...el.com>,
Toke Høiland-Jørgensen <toke@...hat.com>,
Jesper Dangaard Brouer <brouer@...hat.com>,
Kernel Team <kernel-team@...com>,
Netdev <netdev@...r.kernel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Alexei Starovoitov <ast@...nel.org>
Subject: Re: [PATCH 1/1] bpf: Allow bpf_map_lookup_elem() on an xskmap
On Wed, 5 Jun 2019 at 17:58, Jonathan Lemon <jonathan.lemon@...il.com> wrote:
>
> Currently, the AF_XDP code uses a separate map in order to
> determine if an xsk is bound to a queue. Instead of doing this,
> have bpf_map_lookup_elem() return a xdp_sock.
>
> Rearrange some xdp_sock members to eliminate structure holes.
>
> Signed-off-by: Jonathan Lemon <jonathan.lemon@...il.com>
> ---
> include/linux/bpf.h | 8 ++++
> include/net/xdp_sock.h | 4 +-
> include/uapi/linux/bpf.h | 4 ++
> kernel/bpf/verifier.c | 26 +++++++++++-
> kernel/bpf/xskmap.c | 7 ++++
> net/core/filter.c | 40 +++++++++++++++++++
> tools/include/uapi/linux/bpf.h | 4 ++
> .../bpf/verifier/prevent_map_lookup.c | 15 -------
> tools/testing/selftests/bpf/verifier/sock.c | 18 +++++++++
> 9 files changed, 107 insertions(+), 19 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index ff3e00ff84d2..66b175bdc645 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -276,6 +276,7 @@ enum bpf_reg_type {
> PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */
> PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
> PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */
> + PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */
> };
>
> /* The information passed from prog-specific *_is_valid_access
> @@ -670,6 +671,13 @@ void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
> void __cpu_map_flush(struct bpf_map *map);
> int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
> struct net_device *dev_rx);
> +bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
> + struct bpf_insn_access_aux *info);
> +u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
> + const struct bpf_insn *si,
> + struct bpf_insn *insn_buf,
> + struct bpf_prog *prog,
> + u32 *target_size);
>
> /* Return map's numa specified by userspace */
> static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
> diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
> index d074b6d60f8a..ae0f368a62bb 100644
> --- a/include/net/xdp_sock.h
> +++ b/include/net/xdp_sock.h
> @@ -58,11 +58,11 @@ struct xdp_sock {
> struct xdp_umem *umem;
> struct list_head flush_node;
> u16 queue_id;
> - struct xsk_queue *tx ____cacheline_aligned_in_smp;
> - struct list_head list;
> bool zc;
> /* Protects multiple processes in the control path */
> struct mutex mutex;
> + struct xsk_queue *tx ____cacheline_aligned_in_smp;
> + struct list_head list;
> /* Mutual exclusion of NAPI TX thread and sendmsg error paths
> * in the SKB destructor callback.
> */
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 7c6aef253173..ae0907d8c03a 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3083,6 +3083,10 @@ struct bpf_sock_tuple {
> };
> };
>
> +struct bpf_xdp_sock {
> + __u32 queue_id;
> +};
> +
> #define XDP_PACKET_HEADROOM 256
>
> /* User return codes for XDP prog type.
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 2778417e6e0c..abe177405989 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -334,7 +334,8 @@ static bool type_is_sk_pointer(enum bpf_reg_type type)
> {
> return type == PTR_TO_SOCKET ||
> type == PTR_TO_SOCK_COMMON ||
> - type == PTR_TO_TCP_SOCK;
> + type == PTR_TO_TCP_SOCK ||
> + type == PTR_TO_XDP_SOCK;
> }
>
> static bool reg_type_may_be_null(enum bpf_reg_type type)
> @@ -406,6 +407,7 @@ static const char * const reg_type_str[] = {
> [PTR_TO_TCP_SOCK] = "tcp_sock",
> [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
> [PTR_TO_TP_BUFFER] = "tp_buffer",
> + [PTR_TO_XDP_SOCK] = "xdp_sock",
> };
>
> static char slot_type_char[] = {
> @@ -1363,6 +1365,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
> case PTR_TO_SOCK_COMMON_OR_NULL:
> case PTR_TO_TCP_SOCK:
> case PTR_TO_TCP_SOCK_OR_NULL:
> + case PTR_TO_XDP_SOCK:
> return true;
> default:
> return false;
> @@ -1843,6 +1846,9 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
> case PTR_TO_TCP_SOCK:
> valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
> break;
> + case PTR_TO_XDP_SOCK:
> + valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
> + break;
> default:
> valid = false;
> }
> @@ -2007,6 +2013,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
> case PTR_TO_TCP_SOCK:
> pointer_desc = "tcp_sock ";
> break;
> + case PTR_TO_XDP_SOCK:
> + pointer_desc = "xdp_sock ";
> + break;
> default:
> break;
> }
> @@ -2905,10 +2914,14 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
> * appear.
> */
> case BPF_MAP_TYPE_CPUMAP:
> - case BPF_MAP_TYPE_XSKMAP:
> if (func_id != BPF_FUNC_redirect_map)
> goto error;
> break;
> + case BPF_MAP_TYPE_XSKMAP:
> + if (func_id != BPF_FUNC_redirect_map &&
> + func_id != BPF_FUNC_map_lookup_elem)
> + goto error;
> + break;
> case BPF_MAP_TYPE_ARRAY_OF_MAPS:
> case BPF_MAP_TYPE_HASH_OF_MAPS:
> if (func_id != BPF_FUNC_map_lookup_elem)
> @@ -3799,6 +3812,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
> case PTR_TO_SOCK_COMMON_OR_NULL:
> case PTR_TO_TCP_SOCK:
> case PTR_TO_TCP_SOCK_OR_NULL:
> + case PTR_TO_XDP_SOCK:
> verbose(env, "R%d pointer arithmetic on %s prohibited\n",
> dst, reg_type_str[ptr_reg->type]);
> return -EACCES;
> @@ -5038,6 +5052,9 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
> if (reg->map_ptr->inner_map_meta) {
> reg->type = CONST_PTR_TO_MAP;
> reg->map_ptr = reg->map_ptr->inner_map_meta;
> + } else if (reg->map_ptr->map_type ==
> + BPF_MAP_TYPE_XSKMAP) {
> + reg->type = PTR_TO_XDP_SOCK;
> } else {
> reg->type = PTR_TO_MAP_VALUE;
> }
> @@ -6289,6 +6306,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
> case PTR_TO_SOCK_COMMON_OR_NULL:
> case PTR_TO_TCP_SOCK:
> case PTR_TO_TCP_SOCK_OR_NULL:
> + case PTR_TO_XDP_SOCK:
> /* Only valid matches are exact, which memcmp() above
> * would have accepted
> */
> @@ -6683,6 +6701,7 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
> case PTR_TO_SOCK_COMMON_OR_NULL:
> case PTR_TO_TCP_SOCK:
> case PTR_TO_TCP_SOCK_OR_NULL:
> + case PTR_TO_XDP_SOCK:
> return false;
> default:
> return true;
> @@ -7816,6 +7835,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
> case PTR_TO_TCP_SOCK:
> convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
> break;
> + case PTR_TO_XDP_SOCK:
> + convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
> + break;
> default:
> continue;
> }
> diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
> index 686d244e798d..b5c58e9c4835 100644
> --- a/kernel/bpf/xskmap.c
> +++ b/kernel/bpf/xskmap.c
> @@ -153,6 +153,12 @@ void __xsk_map_flush(struct bpf_map *map)
> }
>
> static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
> +{
> + WARN_ON_ONCE(!rcu_read_lock_held());
> + return __xsk_map_lookup_elem(map, *(u32 *)key);
> +}
> +
> +static void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key)
> {
> return ERR_PTR(-EOPNOTSUPP);
> }
> @@ -220,6 +226,7 @@ const struct bpf_map_ops xsk_map_ops = {
> .map_free = xsk_map_free,
> .map_get_next_key = xsk_map_get_next_key,
> .map_lookup_elem = xsk_map_lookup_elem,
> + .map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only,
The sys_only was news to time! Nice!
> .map_update_elem = xsk_map_update_elem,
> .map_delete_elem = xsk_map_delete_elem,
> .map_check_btf = map_check_no_btf,
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 55bfc941d17a..f2d9d77b4b57 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -5680,6 +5680,46 @@ BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
> return INET_ECN_set_ce(skb);
> }
>
> +bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
> + struct bpf_insn_access_aux *info)
> +{
> + if (off < 0 || off >= offsetofend(struct bpf_xdp_sock, queue_id))
> + return false;
> +
> + if (off % size != 0)
> + return false;
> +
> + switch (off) {
> + default:
> + return size == sizeof(__u32);
> + }
Hmm? Missing case or remove?
Björn
> +}
> +
> +u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
> + const struct bpf_insn *si,
> + struct bpf_insn *insn_buf,
> + struct bpf_prog *prog, u32 *target_size)
> +{
> + struct bpf_insn *insn = insn_buf;
> +
> +#define BPF_XDP_SOCK_GET(FIELD) \
> + do { \
> + BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_sock, FIELD) > \
> + FIELD_SIZEOF(struct bpf_xdp_sock, FIELD)); \
> + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_sock, FIELD),\
> + si->dst_reg, si->src_reg, \
> + offsetof(struct xdp_sock, FIELD)); \
> + } while (0)
> +
> + switch (si->off) {
> + case offsetof(struct bpf_xdp_sock, queue_id):
> + BPF_XDP_SOCK_GET(queue_id);
> + break;
> + }
> +
> + return insn - insn_buf;
> +}
> +
> static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = {
> .func = bpf_skb_ecn_set_ce,
> .gpl_only = false,
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 7c6aef253173..ae0907d8c03a 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -3083,6 +3083,10 @@ struct bpf_sock_tuple {
> };
> };
>
> +struct bpf_xdp_sock {
> + __u32 queue_id;
> +};
> +
> #define XDP_PACKET_HEADROOM 256
>
> /* User return codes for XDP prog type.
> diff --git a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
> index bbdba990fefb..da7a4b37cb98 100644
> --- a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
> +++ b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
> @@ -28,21 +28,6 @@
> .errstr = "cannot pass map_type 18 into func bpf_map_lookup_elem",
> .prog_type = BPF_PROG_TYPE_SOCK_OPS,
> },
> -{
> - "prevent map lookup in xskmap",
> - .insns = {
> - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
> - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
> - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
> - BPF_LD_MAP_FD(BPF_REG_1, 0),
> - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
> - BPF_EXIT_INSN(),
> - },
> - .fixup_map_xskmap = { 3 },
> - .result = REJECT,
> - .errstr = "cannot pass map_type 17 into func bpf_map_lookup_elem",
> - .prog_type = BPF_PROG_TYPE_XDP,
> -},
> {
> "prevent map lookup in stack trace",
> .insns = {
> diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
> index b31cd2cf50d0..9ed192e14f5f 100644
> --- a/tools/testing/selftests/bpf/verifier/sock.c
> +++ b/tools/testing/selftests/bpf/verifier/sock.c
> @@ -498,3 +498,21 @@
> .result = REJECT,
> .errstr = "cannot pass map_type 24 into func bpf_map_lookup_elem",
> },
> +{
> + "bpf_map_lookup_elem(xskmap, &key); xs->queue_id",
> + .insns = {
> + BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0),
> + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
> + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
> + BPF_LD_MAP_FD(BPF_REG_1, 0),
> + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
> + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
> + BPF_EXIT_INSN(),
> + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_xdp_sock, queue_id)),
> + BPF_MOV64_IMM(BPF_REG_0, 0),
> + BPF_EXIT_INSN(),
> + },
> + .fixup_map_xskmap = { 3 },
> + .prog_type = BPF_PROG_TYPE_XDP,
> + .result = ACCEPT,
> +},
> --
> 2.17.1
>
Powered by blists - more mailing lists