lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20171018173650.wjzuqhckabelxv65@ast-mbp>
Date:   Wed, 18 Oct 2017 10:36:52 -0700
From:   Alexei Starovoitov <alexei.starovoitov@...il.com>
To:     John Fastabend <john.r.fastabend@...il.com>
Cc:     davem@...emloft.net, netdev@...r.kernel.org,
        borkmann@...earbox.net, Eric Dumazet <edumazet@...gle.com>
Subject: Re: [net PATCH 2/5] bpf: avoid preempt enable/disable in sockmap
 using tcp_skb_cb region

On Wed, Oct 18, 2017 at 07:10:36AM -0700, John Fastabend wrote:
> From: John Fastabend <john.fastabend@...il.com>
> 
> SK_SKB BPF programs are run from the socket/tcp context but early in
> the stack before much of the TCP metadata is needed in tcp_skb_cb. So
> we can use some unused fields to place BPF metadata needed for SK_SKB
> programs when implementing the redirect function.
> 
> This allows us to drop the preempt disable logic. It does however
> require an API change so sk_redirect_map() has been updated to
> additionally provide ctx_ptr to skb. Note, we do however continue to
> disable/enable preemption around actual BPF program running to account
> for map updates.
> 
> Signed-off-by: John Fastabend <john.fastabend@...il.com>
> Acked-by: Daniel Borkmann <daniel@...earbox.net>

Acked-by: Alexei Starovoitov <ast@...nel.org>

> ---
>  include/linux/filter.h                             |    2 +
>  include/net/tcp.h                                  |    5 +++
>  kernel/bpf/sockmap.c                               |   19 ++++++-------
>  net/core/filter.c                                  |   29 ++++++++++----------
>  samples/sockmap/sockmap_kern.c                     |    2 +
>  tools/include/uapi/linux/bpf.h                     |    3 +-
>  tools/testing/selftests/bpf/bpf_helpers.h          |    2 +
>  tools/testing/selftests/bpf/sockmap_verdict_prog.c |    4 +--
>  8 files changed, 36 insertions(+), 30 deletions(-)
> 
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index d29e58f..818a0b2 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -728,7 +728,7 @@ void xdp_do_flush_map(void);
>  void bpf_warn_invalid_xdp_action(u32 act);
>  void bpf_warn_invalid_xdp_redirect(u32 ifindex);
>  
> -struct sock *do_sk_redirect_map(void);
> +struct sock *do_sk_redirect_map(struct sk_buff *skb);
>  
>  #ifdef CONFIG_BPF_JIT
>  extern int bpf_jit_enable;
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 89974c5..b1ef98e 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -840,6 +840,11 @@ struct tcp_skb_cb {
>  			struct inet6_skb_parm	h6;
>  #endif
>  		} header;	/* For incoming skbs */
> +		struct {
> +			__u32 key;
> +			__u32 flags;
> +			struct bpf_map *map;
> +		} bpf;

I think it should be ok. cc Eric for visibility.

>  	};
>  };
>  
> diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
> index c68899d..beaabb2 100644
> --- a/kernel/bpf/sockmap.c
> +++ b/kernel/bpf/sockmap.c
> @@ -39,6 +39,7 @@
>  #include <linux/workqueue.h>
>  #include <linux/list.h>
>  #include <net/strparser.h>
> +#include <net/tcp.h>
>  
>  struct bpf_stab {
>  	struct bpf_map map;
> @@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
>  		return SK_DROP;
>  
>  	skb_orphan(skb);
> +	/* We need to ensure that BPF metadata for maps is also cleared
> +	 * when we orphan the skb so that we don't have the possibility
> +	 * to reference a stale map.
> +	 */
> +	TCP_SKB_CB(skb)->bpf.map = NULL;
>  	skb->sk = psock->sock;
>  	bpf_compute_data_end(skb);
> +	preempt_disable();
>  	rc = (*prog->bpf_func)(skb, prog->insnsi);
> +	preempt_enable();
>  	skb->sk = NULL;
>  
>  	return rc;
> @@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
>  	struct sock *sk;
>  	int rc;
>  
> -	/* Because we use per cpu values to feed input from sock redirect
> -	 * in BPF program to do_sk_redirect_map() call we need to ensure we
> -	 * are not preempted. RCU read lock is not sufficient in this case
> -	 * with CONFIG_PREEMPT_RCU enabled so we must be explicit here.
> -	 */
> -	preempt_disable();
>  	rc = smap_verdict_func(psock, skb);
>  	switch (rc) {
>  	case SK_REDIRECT:
> -		sk = do_sk_redirect_map();
> -		preempt_enable();
> +		sk = do_sk_redirect_map(skb);
>  		if (likely(sk)) {
>  			struct smap_psock *peer = smap_psock_sk(sk);
>  
> @@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
>  	/* Fall through and free skb otherwise */
>  	case SK_DROP:
>  	default:
> -		if (rc != SK_REDIRECT)
> -			preempt_enable();
>  		kfree_skb(skb);
>  	}
>  }
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 74b8c91..ca1ba0b 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -1839,31 +1839,31 @@ static const struct bpf_func_proto bpf_redirect_proto = {
>  	.arg2_type      = ARG_ANYTHING,
>  };
>  
> -BPF_CALL_3(bpf_sk_redirect_map, struct bpf_map *, map, u32, key, u64, flags)
> +BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
> +	   struct bpf_map *, map, u32, key, u64, flags)
>  {
> -	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
> +	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
>  
>  	if (unlikely(flags))
>  		return SK_ABORTED;
>  
> -	ri->ifindex = key;
> -	ri->flags = flags;
> -	ri->map = map;
> +	tcb->bpf.key = key;
> +	tcb->bpf.flags = flags;
> +	tcb->bpf.map = map;
>  
>  	return SK_REDIRECT;
>  }
>  
> -struct sock *do_sk_redirect_map(void)
> +struct sock *do_sk_redirect_map(struct sk_buff *skb)
>  {
> -	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
> +	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
>  	struct sock *sk = NULL;
>  
> -	if (ri->map) {
> -		sk = __sock_map_lookup_elem(ri->map, ri->ifindex);
> +	if (tcb->bpf.map) {
> +		sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key);
>  
> -		ri->ifindex = 0;
> -		ri->map = NULL;
> -		/* we do not clear flags for future lookup */
> +		tcb->bpf.key = 0;
> +		tcb->bpf.map = NULL;
>  	}
>  

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ