[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <Yboc/G18R1Vi1eQV@google.com>
Date: Wed, 15 Dec 2021 08:51:08 -0800
From: sdf@...gle.com
To: Pavel Begunkov <asml.silence@...il.com>
Cc: netdev@...r.kernel.org, bpf@...r.kernel.org,
Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Andrii Nakryiko <andrii@...nel.org>,
Martin KaFai Lau <kafai@...com>,
Song Liu <songliubraving@...com>, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v3] cgroup/bpf: fast path skb BPF filtering
On 12/15, Pavel Begunkov wrote:
> Add per socket fast path for not enabled BPF skb filtering, which sheds
> a nice chunk of send/recv overhead when affected. Testing udp with 128
> byte payload and/or zerocopy with any payload size showed 2-3%
> improvement in requests/s on the tx side using fast NICs across network,
> and around 4% for dummy device. Same goes for rx, not measured, but
> numbers should be relatable.
> In my understanding, this should affect a good share of machines, and at
> least it includes my laptops and some checked servers.
> The core of the problem is that even though there is
> cgroup_bpf_enabled_key guarding from __cgroup_bpf_run_filter_skb()
> overhead, there are cases where we have several cgroups and loading a
> BPF program to one also makes all others to go through the slow path
> even when they don't have any BPF attached. It's even worse, because
> apparently systemd or some other early init loads some BPF and so
> triggers exactly this situation for normal networking.
> Signed-off-by: Pavel Begunkov <asml.silence@...il.com>
> ---
> v2: replace bitmask appoach with empty_prog_array (suggested by Martin)
> v3: add "bpf_" prefix to empty_prog_array (Martin)
> include/linux/bpf-cgroup.h | 24 +++++++++++++++++++++---
> include/linux/bpf.h | 13 +++++++++++++
> kernel/bpf/cgroup.c | 18 ++----------------
> kernel/bpf/core.c | 16 ++++------------
> 4 files changed, 40 insertions(+), 31 deletions(-)
> diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
> index 11820a430d6c..c6dacdbdf565 100644
> --- a/include/linux/bpf-cgroup.h
> +++ b/include/linux/bpf-cgroup.h
> @@ -219,11 +219,28 @@ int bpf_percpu_cgroup_storage_copy(struct bpf_map
> *map, void *key, void *value);
> int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
> void *value, u64 flags);
> +static inline bool
> +__cgroup_bpf_prog_array_is_empty(struct cgroup_bpf *cgrp_bpf,
> + enum cgroup_bpf_attach_type type)
> +{
> + struct bpf_prog_array *array =
> rcu_access_pointer(cgrp_bpf->effective[type]);
> +
> + return array == &bpf_empty_prog_array.hdr;
> +}
> +
> +#define CGROUP_BPF_TYPE_ENABLED(sk, atype) \
> +({ \
> + struct cgroup *__cgrp = sock_cgroup_ptr(&(sk)->sk_cgrp_data); \
> + \
> + !__cgroup_bpf_prog_array_is_empty(&__cgrp->bpf, (atype)); \
> +})
> +
> /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by
> cgroup_bpf_enabled. */
> #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
> ({ \
> int __ret = 0; \
> - if (cgroup_bpf_enabled(CGROUP_INET_INGRESS)) \
> + if (cgroup_bpf_enabled(CGROUP_INET_INGRESS) && sk && \
> + CGROUP_BPF_TYPE_ENABLED((sk), CGROUP_INET_INGRESS)) \
Why not add this __cgroup_bpf_run_filter_skb check to
__cgroup_bpf_run_filter_skb? Result of sock_cgroup_ptr() is already there
and you can use it. Maybe move the things around if you want
it to happen earlier.
> __ret = __cgroup_bpf_run_filter_skb(sk, skb, \
> CGROUP_INET_INGRESS); \
> \
> @@ -235,9 +252,10 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map
> *map, void *key,
> int __ret = 0; \
> if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \
> typeof(sk) __sk = sk_to_full_sk(sk); \
> - if (sk_fullsock(__sk)) \
> + if (sk_fullsock(__sk) && \
> + CGROUP_BPF_TYPE_ENABLED(__sk, CGROUP_INET_EGRESS)) \
> __ret = __cgroup_bpf_run_filter_skb(__sk, skb, \
> - CGROUP_INET_EGRESS); \
> + CGROUP_INET_EGRESS); \
> } \
> __ret; \
> })
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index e7a163a3146b..0d2195c6fb2a 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1161,6 +1161,19 @@ struct bpf_prog_array {
> struct bpf_prog_array_item items[];
> };
> +struct bpf_empty_prog_array {
> + struct bpf_prog_array hdr;
> + struct bpf_prog *null_prog;
> +};
> +
> +/* to avoid allocating empty bpf_prog_array for cgroups that
> + * don't have bpf program attached use one global 'bpf_empty_prog_array'
> + * It will not be modified the caller of bpf_prog_array_alloc()
> + * (since caller requested prog_cnt == 0)
> + * that pointer should be 'freed' by bpf_prog_array_free()
> + */
> +extern struct bpf_empty_prog_array bpf_empty_prog_array;
> +
> struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
> void bpf_prog_array_free(struct bpf_prog_array *progs);
> int bpf_prog_array_length(struct bpf_prog_array *progs);
> diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
> index 43eb3501721b..99e85f44e257 100644
> --- a/kernel/bpf/cgroup.c
> +++ b/kernel/bpf/cgroup.c
> @@ -1354,20 +1354,6 @@ int __cgroup_bpf_run_filter_sysctl(struct
> ctl_table_header *head,
> }
> #ifdef CONFIG_NET
> -static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
> - enum cgroup_bpf_attach_type attach_type)
> -{
> - struct bpf_prog_array *prog_array;
> - bool empty;
> -
> - rcu_read_lock();
> - prog_array = rcu_dereference(cgrp->bpf.effective[attach_type]);
> - empty = bpf_prog_array_is_empty(prog_array);
> - rcu_read_unlock();
> -
> - return empty;
> -}
> -
> static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int
> max_optlen,
> struct bpf_sockopt_buf *buf)
> {
> @@ -1430,7 +1416,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock
> *sk, int *level,
> * attached to the hook so we don't waste time allocating
> * memory and locking the socket.
> */
> - if (__cgroup_bpf_prog_array_is_empty(cgrp, CGROUP_SETSOCKOPT))
> + if (__cgroup_bpf_prog_array_is_empty(&cgrp->bpf, CGROUP_SETSOCKOPT))
> return 0;
> /* Allocate a bit more than the initial user buffer for
> @@ -1526,7 +1512,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock
> *sk, int level,
> * attached to the hook so we don't waste time allocating
> * memory and locking the socket.
> */
> - if (__cgroup_bpf_prog_array_is_empty(cgrp, CGROUP_GETSOCKOPT))
> + if (__cgroup_bpf_prog_array_is_empty(&cgrp->bpf, CGROUP_GETSOCKOPT))
> return retval;
> ctx.optlen = max_optlen;
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 2405e39d800f..fa76d1d839ad 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -1967,18 +1967,10 @@ static struct bpf_prog_dummy {
> },
> };
> -/* to avoid allocating empty bpf_prog_array for cgroups that
> - * don't have bpf program attached use one global 'empty_prog_array'
> - * It will not be modified the caller of bpf_prog_array_alloc()
> - * (since caller requested prog_cnt == 0)
> - * that pointer should be 'freed' by bpf_prog_array_free()
> - */
> -static struct {
> - struct bpf_prog_array hdr;
> - struct bpf_prog *null_prog;
> -} empty_prog_array = {
> +struct bpf_empty_prog_array bpf_empty_prog_array = {
> .null_prog = NULL,
> };
> +EXPORT_SYMBOL(bpf_empty_prog_array);
> struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
> {
> @@ -1988,12 +1980,12 @@ struct bpf_prog_array *bpf_prog_array_alloc(u32
> prog_cnt, gfp_t flags)
> (prog_cnt + 1),
> flags);
> - return &empty_prog_array.hdr;
> + return &bpf_empty_prog_array.hdr;
> }
> void bpf_prog_array_free(struct bpf_prog_array *progs)
> {
> - if (!progs || progs == &empty_prog_array.hdr)
> + if (!progs || progs == &bpf_empty_prog_array.hdr)
> return;
> kfree_rcu(progs, rcu);
> }
> --
> 2.34.0
Powered by blists - more mailing lists