[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20171018173650.wjzuqhckabelxv65@ast-mbp>
Date: Wed, 18 Oct 2017 10:36:52 -0700
From: Alexei Starovoitov <alexei.starovoitov@...il.com>
To: John Fastabend <john.r.fastabend@...il.com>
Cc: davem@...emloft.net, netdev@...r.kernel.org,
borkmann@...earbox.net, Eric Dumazet <edumazet@...gle.com>
Subject: Re: [net PATCH 2/5] bpf: avoid preempt enable/disable in sockmap
using tcp_skb_cb region
On Wed, Oct 18, 2017 at 07:10:36AM -0700, John Fastabend wrote:
> From: John Fastabend <john.fastabend@...il.com>
>
> SK_SKB BPF programs are run from the socket/tcp context but early in
> the stack before much of the TCP metadata is needed in tcp_skb_cb. So
> we can use some unused fields to place BPF metadata needed for SK_SKB
> programs when implementing the redirect function.
>
> This allows us to drop the preempt disable logic. It does however
> require an API change so sk_redirect_map() has been updated to
> additionally provide ctx_ptr to skb. Note, we do however continue to
> disable/enable preemption around actual BPF program running to account
> for map updates.
>
> Signed-off-by: John Fastabend <john.fastabend@...il.com>
> Acked-by: Daniel Borkmann <daniel@...earbox.net>
Acked-by: Alexei Starovoitov <ast@...nel.org>
> ---
> include/linux/filter.h | 2 +
> include/net/tcp.h | 5 +++
> kernel/bpf/sockmap.c | 19 ++++++-------
> net/core/filter.c | 29 ++++++++++----------
> samples/sockmap/sockmap_kern.c | 2 +
> tools/include/uapi/linux/bpf.h | 3 +-
> tools/testing/selftests/bpf/bpf_helpers.h | 2 +
> tools/testing/selftests/bpf/sockmap_verdict_prog.c | 4 +--
> 8 files changed, 36 insertions(+), 30 deletions(-)
>
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index d29e58f..818a0b2 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -728,7 +728,7 @@ void xdp_do_flush_map(void);
> void bpf_warn_invalid_xdp_action(u32 act);
> void bpf_warn_invalid_xdp_redirect(u32 ifindex);
>
> -struct sock *do_sk_redirect_map(void);
> +struct sock *do_sk_redirect_map(struct sk_buff *skb);
>
> #ifdef CONFIG_BPF_JIT
> extern int bpf_jit_enable;
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 89974c5..b1ef98e 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -840,6 +840,11 @@ struct tcp_skb_cb {
> struct inet6_skb_parm h6;
> #endif
> } header; /* For incoming skbs */
> + struct {
> + __u32 key;
> + __u32 flags;
> + struct bpf_map *map;
> + } bpf;
I think it should be ok. cc Eric for visibility.
> };
> };
>
> diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
> index c68899d..beaabb2 100644
> --- a/kernel/bpf/sockmap.c
> +++ b/kernel/bpf/sockmap.c
> @@ -39,6 +39,7 @@
> #include <linux/workqueue.h>
> #include <linux/list.h>
> #include <net/strparser.h>
> +#include <net/tcp.h>
>
> struct bpf_stab {
> struct bpf_map map;
> @@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
> return SK_DROP;
>
> skb_orphan(skb);
> + /* We need to ensure that BPF metadata for maps is also cleared
> + * when we orphan the skb so that we don't have the possibility
> + * to reference a stale map.
> + */
> + TCP_SKB_CB(skb)->bpf.map = NULL;
> skb->sk = psock->sock;
> bpf_compute_data_end(skb);
> + preempt_disable();
> rc = (*prog->bpf_func)(skb, prog->insnsi);
> + preempt_enable();
> skb->sk = NULL;
>
> return rc;
> @@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
> struct sock *sk;
> int rc;
>
> - /* Because we use per cpu values to feed input from sock redirect
> - * in BPF program to do_sk_redirect_map() call we need to ensure we
> - * are not preempted. RCU read lock is not sufficient in this case
> - * with CONFIG_PREEMPT_RCU enabled so we must be explicit here.
> - */
> - preempt_disable();
> rc = smap_verdict_func(psock, skb);
> switch (rc) {
> case SK_REDIRECT:
> - sk = do_sk_redirect_map();
> - preempt_enable();
> + sk = do_sk_redirect_map(skb);
> if (likely(sk)) {
> struct smap_psock *peer = smap_psock_sk(sk);
>
> @@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
> /* Fall through and free skb otherwise */
> case SK_DROP:
> default:
> - if (rc != SK_REDIRECT)
> - preempt_enable();
> kfree_skb(skb);
> }
> }
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 74b8c91..ca1ba0b 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -1839,31 +1839,31 @@ static const struct bpf_func_proto bpf_redirect_proto = {
> .arg2_type = ARG_ANYTHING,
> };
>
> -BPF_CALL_3(bpf_sk_redirect_map, struct bpf_map *, map, u32, key, u64, flags)
> +BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
> + struct bpf_map *, map, u32, key, u64, flags)
> {
> - struct redirect_info *ri = this_cpu_ptr(&redirect_info);
> + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
>
> if (unlikely(flags))
> return SK_ABORTED;
>
> - ri->ifindex = key;
> - ri->flags = flags;
> - ri->map = map;
> + tcb->bpf.key = key;
> + tcb->bpf.flags = flags;
> + tcb->bpf.map = map;
>
> return SK_REDIRECT;
> }
>
> -struct sock *do_sk_redirect_map(void)
> +struct sock *do_sk_redirect_map(struct sk_buff *skb)
> {
> - struct redirect_info *ri = this_cpu_ptr(&redirect_info);
> + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
> struct sock *sk = NULL;
>
> - if (ri->map) {
> - sk = __sock_map_lookup_elem(ri->map, ri->ifindex);
> + if (tcb->bpf.map) {
> + sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key);
>
> - ri->ifindex = 0;
> - ri->map = NULL;
> - /* we do not clear flags for future lookup */
> + tcb->bpf.key = 0;
> + tcb->bpf.map = NULL;
> }
>
Powered by blists - more mailing lists