[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <cc2e3011-0309-842c-4dbc-d3e82d5fc2d0@iogearbox.net>
Date: Tue, 15 May 2018 21:01:38 +0200
From: Daniel Borkmann <daniel@...earbox.net>
To: John Fastabend <john.fastabend@...il.com>, ast@...nel.org
Cc: netdev@...r.kernel.org, davem@...emloft.net
Subject: Re: [PATCH bpf-next v6 2/4] bpf: sockmap, add hash map support
On 05/14/2018 07:00 PM, John Fastabend wrote:
> Sockmap is currently backed by an array and enforces keys to be
> four bytes. This works well for many use cases and was originally
> modeled after devmap which also uses four bytes keys. However,
> this has become limiting in larger use cases where a hash would
> be more appropriate. For example users may want to use the 5-tuple
> of the socket as the lookup key.
>
> To support this add hash support.
>
> Signed-off-by: John Fastabend <john.fastabend@...il.com>
> Acked-by: David S. Miller <davem@...emloft.net>
> ---
> include/linux/bpf.h | 8 +
> include/linux/bpf_types.h | 1 +
> include/uapi/linux/bpf.h | 52 ++++-
> kernel/bpf/core.c | 1 +
> kernel/bpf/sockmap.c | 494 ++++++++++++++++++++++++++++++++++++++++++++--
> kernel/bpf/verifier.c | 14 +-
> net/core/filter.c | 58 ++++++
> 7 files changed, 610 insertions(+), 18 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index a38e474..ed0122b 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -668,6 +668,7 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
>
> #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_INET)
> struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
> +struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key);
> int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type);
> #else
> static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
> @@ -675,6 +676,12 @@ static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
> return NULL;
> }
>
> +static inline struct sock *__sock_hash_lookup_elem(struct bpf_map *map,
> + void *key)
> +{
> + return NULL;
> +}
> +
> static inline int sock_map_prog(struct bpf_map *map,
> struct bpf_prog *prog,
> u32 type)
> @@ -724,6 +731,7 @@ static inline void __xsk_map_flush(struct bpf_map *map)
> extern const struct bpf_func_proto bpf_get_stackid_proto;
> extern const struct bpf_func_proto bpf_get_stack_proto;
> extern const struct bpf_func_proto bpf_sock_map_update_proto;
> +extern const struct bpf_func_proto bpf_sock_hash_update_proto;
>
> /* Shared helpers among cBPF and eBPF. */
> void bpf_user_rnd_init_once(void);
> diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
> index d7df1b32..b67f879 100644
> --- a/include/linux/bpf_types.h
> +++ b/include/linux/bpf_types.h
> @@ -47,6 +47,7 @@
> BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
> #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_INET)
> BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
> +BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
> #endif
> BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
> #if defined(CONFIG_XDP_SOCKETS)
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 02e4112..1205d86 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -118,6 +118,7 @@ enum bpf_map_type {
> BPF_MAP_TYPE_SOCKMAP,
> BPF_MAP_TYPE_CPUMAP,
> BPF_MAP_TYPE_XSKMAP,
> + BPF_MAP_TYPE_SOCKHASH,
> };
>
> enum bpf_prog_type {
> @@ -1855,6 +1856,52 @@ struct bpf_stack_build_id {
> * Egress device index on success, 0 if packet needs to continue
> * up the stack for further processing or a negative error in case
> * of failure.
> + * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
When you rebase please fix this up properly next time and add a newline in between
the helpers. I fixed this up while applying.
> + * Description
> + * Add an entry to, or update a sockhash *map* referencing sockets.
> + * The *skops* is used as a new value for the entry associated to
> + * *key*. *flags* is one of:
> + *
> + * **BPF_NOEXIST**
> + * The entry for *key* must not exist in the map.
> + * **BPF_EXIST**
> + * The entry for *key* must already exist in the map.
> + * **BPF_ANY**
> + * No condition on the existence of the entry for *key*.
> + *
> + * If the *map* has eBPF programs (parser and verdict), those will
> + * be inherited by the socket being added. If the socket is
> + * already attached to eBPF programs, this results in an error.
> + * Return
> + * 0 on success, or a negative error in case of failure.
[...]
> +static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
> +{
> + struct bpf_htab *htab;
> + int i, err;
> + u64 cost;
> +
> + if (!capable(CAP_NET_ADMIN))
> + return ERR_PTR(-EPERM);
> +
> + /* check sanity of attributes */
> + if (attr->max_entries == 0 || attr->value_size != 4 ||
> + attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
> + return ERR_PTR(-EINVAL);
> +
> + err = bpf_tcp_ulp_register();
> + if (err && err != -EEXIST)
> + return ERR_PTR(err);
> +
> + htab = kzalloc(sizeof(*htab), GFP_USER);
> + if (!htab)
> + return ERR_PTR(-ENOMEM);
> +
> + bpf_map_init_from_attr(&htab->map, attr);
> +
> + htab->n_buckets = roundup_pow_of_two(htab->map.max_entries);
> + htab->elem_size = sizeof(struct htab_elem) +
> + round_up(htab->map.key_size, 8);
> +
> + if (htab->n_buckets == 0 ||
> + htab->n_buckets > U32_MAX / sizeof(struct bucket))
> + goto free_htab;
> +
> + cost = (u64) htab->n_buckets * sizeof(struct bucket) +
> + (u64) htab->elem_size * htab->map.max_entries;
> +
> + if (cost >= U32_MAX - PAGE_SIZE)
> + goto free_htab;
Also above two goto free_htab were buggy! Error after bpf_tcp_ulp_register()
is either 0 or -EEXIST, if it's 0, then when you bail out here this will cause
a NULL pointer dereference in subsequent map setup paths. Adapting the same
logic from sock_map_alloc() would have been enough. I've added a err = -EINVAL
to the first occasion above this time.
> + htab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
> + err = bpf_map_precharge_memlock(htab->map.pages);
> + if (err)
> + goto free_htab;
> +
> + err = -ENOMEM;
> + htab->buckets = bpf_map_area_alloc(
> + htab->n_buckets * sizeof(struct bucket),
> + htab->map.numa_node);
> + if (!htab->buckets)
> + goto free_htab;
> +
> + for (i = 0; i < htab->n_buckets; i++) {
> + INIT_HLIST_HEAD(&htab->buckets[i].head);
> + raw_spin_lock_init(&htab->buckets[i].lock);
> + }
> +
> + return &htab->map;
> +free_htab:
> + kfree(htab);
> + return ERR_PTR(err);
> +}
Powered by blists - more mailing lists