netdev - Re: [PATCH v3 bpf-next 2/4] bpf: add layer 2 encap support to bpf_skb_adjust

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <12f9cef4-f5ff-78a4-ec0f-42f737696735@iogearbox.net>
Date:   Thu, 11 Apr 2019 23:08:28 +0200
From:   Daniel Borkmann <daniel@...earbox.net>
To:     Alan Maguire <alan.maguire@...cle.com>, willemb@...gle.com,
        ast@...nel.org, davem@...emloft.net, shuah@...nel.org,
        kafai@...com, songliubraving@...com, yhs@...com,
        quentin.monnet@...ronome.com, john.fastabend@...il.com,
        rdna@...com, linux-kselftest@...r.kernel.org,
        netdev@...r.kernel.org, bpf@...r.kernel.org
Subject: Re: [PATCH v3 bpf-next 2/4] bpf: add layer 2 encap support to
 bpf_skb_adjust_room

On 04/09/2019 04:06 PM, Alan Maguire wrote:
> commit 868d523535c2 ("bpf: add bpf_skb_adjust_room encap flags")
> introduced support to bpf_skb_adjust_room for GSO-friendly GRE
> and UDP encapsulation.
> 
> For GSO to work for skbs, the inner headers (mac and network) need to
> be marked.  For L3 encapsulation using bpf_skb_adjust_room, the mac
> and network headers are identical.  Here we provide a way of specifying
> the inner mac header length for cases where L2 encap is desired.  Such
> an approach can support encapsulated ethernet headers, MPLS headers etc.
> For example to convert from a packet of form [eth][ip][tcp] to
> [eth][ip][udp][inner mac][ip][tcp], something like the following could
> be done:
> 
> 	headroom = sizeof(iph) + sizeof(struct udphdr) + inner_maclen;
> 
> 	ret = bpf_skb_adjust_room(skb, headroom, BPF_ADJ_ROOM_MAC,
> 				  BPF_F_ADJ_ROOM_ENCAP_L4_UDP |
> 				  BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 |
> 				  BPF_F_ADJ_ROOM_ENCAP_L2(inner_maclen));
> 
> Signed-off-by: Alan Maguire <alan.maguire@...cle.com>
> ---
>  include/uapi/linux/bpf.h | 10 ++++++++++
>  net/core/filter.c        | 12 ++++++++----
>  2 files changed, 18 insertions(+), 4 deletions(-)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 8370245..912391c 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1500,6 +1500,10 @@ struct bpf_stack_build_id {
>   *		* **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **:
>   *		  Use with ENCAP_L3 flags to further specify the tunnel type.
>   *
> + *		* **BPF_F_ADJ_ROOM_ENCAP_L2(len) **:
> + *		  Use with ENCAP_L3/L4 flags to further specify the tunnel
> + *		  type; **len** is the length of the inner MAC header.
> + *
>   * 		A call to this helper is susceptible to change the underlaying
>   * 		packet buffer. Therefore, at load time, all checks on pointers
>   * 		previously done by the verifier are invalidated and must be
> @@ -2641,10 +2645,16 @@ enum bpf_func_id {
>  /* BPF_FUNC_skb_adjust_room flags. */
>  #define BPF_F_ADJ_ROOM_FIXED_GSO	(1ULL << 0)
>  
> +#define	BPF_ADJ_ROOM_ENCAP_L2_MASK	0xff
> +#define	BPF_ADJ_ROOM_ENCAP_L2_SHIFT	56

Would appreciate a small whitespace cleanup in that tab after the define
is replaced with a normal space like in the rest of the file.

>  #define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4	(1ULL << 1)
>  #define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6	(1ULL << 2)
>  #define BPF_F_ADJ_ROOM_ENCAP_L4_GRE	(1ULL << 3)
>  #define BPF_F_ADJ_ROOM_ENCAP_L4_UDP	(1ULL << 4)
> +#define	BPF_F_ADJ_ROOM_ENCAP_L2(len)	(((__u64)len & \
> +					  BPF_ADJ_ROOM_ENCAP_L2_MASK) \
> +					 << BPF_ADJ_ROOM_ENCAP_L2_SHIFT)

Ditto here.

>  /* Mode for BPF_FUNC_skb_adjust_room helper. */
>  enum bpf_adj_room_mode {
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 22eb2ed..a1654ef62 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -2969,11 +2969,14 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
>  #define BPF_F_ADJ_ROOM_MASK		(BPF_F_ADJ_ROOM_FIXED_GSO | \
>  					 BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
>  					 BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
> -					 BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
> +					 BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
> +					 BPF_F_ADJ_ROOM_ENCAP_L2( \
> +					  BPF_ADJ_ROOM_ENCAP_L2_MASK))
>  
>  static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
>  			    u64 flags)
>  {

One thing that it seems missing in this but also in Willem's earlier set
is that we reject these flags in bpf_skb_net_shrink(). Both are called from
bpf_skb_adjust_room(), so if we don't support decap yet we should reject
such flags so we can extend in future. Could you or Willem send a follow-up?
Thanks!

> +	u8 inner_mac_len = flags >> BPF_ADJ_ROOM_ENCAP_L2_SHIFT;
>  	bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK;
>  	u16 mac_len = 0, inner_net = 0, inner_trans = 0;
>  	unsigned int gso_type = SKB_GSO_DODGY;
> @@ -3008,6 +3011,8 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
>  
>  		mac_len = skb->network_header - skb->mac_header;
>  		inner_net = skb->network_header;
> +		if (inner_mac_len > len_diff)
> +			return -EINVAL;
>  		inner_trans = skb->transport_header;
>  	}
>  
> @@ -3016,8 +3021,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
>  		return ret;
>  
>  	if (encap) {
> -		/* inner mac == inner_net on l3 encap */
> -		skb->inner_mac_header = inner_net;
> +		skb->inner_mac_header = inner_net - inner_mac_len;
>  		skb->inner_network_header = inner_net;
>  		skb->inner_transport_header = inner_trans;
>  		skb_set_inner_protocol(skb, skb->protocol);
> @@ -3031,7 +3035,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
>  			gso_type |= SKB_GSO_GRE;
>  		else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
>  			gso_type |= SKB_GSO_IPXIP6;
> -		else
> +		else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
>  			gso_type |= SKB_GSO_IPXIP4;
>  
>  		if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE ||
>