lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <6f22e464-21a4-1d1e-7412-080e03328f6c@cumulusnetworks.com>
Date:   Wed, 28 Mar 2018 12:23:41 -0600
From:   David Ahern <dsa@...ulusnetworks.com>
To:     Shrijeet Mukherjee <shrijeetoss@...il.com>, daniel@...earbox.net
Cc:     netdev@...r.kernel.org, roopa@...ulusnetworks.com, ast@...com
Subject: Re: [PATCH bpf-next RFC 2/2] Add MPLS label push/pop functions for
 EBPF

On 3/28/18 10:27 AM, Shrijeet Mukherjee wrote:

> diff --git a/include/net/mpls.h b/include/net/mpls.h
> index 2583dbc689b8..3a5b8c00823d 100644
> --- a/include/net/mpls.h
> +++ b/include/net/mpls.h
> @@ -24,14 +24,50 @@ struct mpls_shim_hdr {
>  	__be32 label_stack_entry;
>  };
>  
> +struct mpls_entry_decoded {
> +	u32 label;
> +	u8 ttl;
> +	u8 tc;
> +	u8 bos;
> +};
> +
>  static inline bool eth_p_mpls(__be16 eth_type)
>  {
>  	return eth_type == htons(ETH_P_MPLS_UC) ||
>  		eth_type == htons(ETH_P_MPLS_MC);
>  }
>  
> -static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb)
> +static inline struct mpls_shim_hdr *skb_mpls_hdr(const struct sk_buff *skb)
>  {
>  	return (struct mpls_shim_hdr *)skb_network_header(skb);
>  }
> +
> +static inline struct mpls_shim_hdr
> +mpls_entry_encode(u32 label, unsigned int ttl, unsigned int tc, bool bos)
> +{
> +	struct mpls_shim_hdr result;
> +
> +	result.label_stack_entry =
> +		cpu_to_be32((label << MPLS_LS_LABEL_SHIFT)
> +            | (tc << MPLS_LS_TC_SHIFT)
> +            | (bos ? (1 << MPLS_LS_S_SHIFT) : 0)
> +            | (ttl << MPLS_LS_TTL_SHIFT));
> +
> +	return result;
> +}
> +
> +static inline
> +struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *hdr)
> +{
> +	struct mpls_entry_decoded result;
> +	unsigned int entry = be32_to_cpu(hdr->label_stack_entry);
> +
> +	result.label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
> +	result.ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
> +	result.tc =  (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT;
> +	result.bos = (entry & MPLS_LS_S_MASK) >> MPLS_LS_S_SHIFT;
> +
> +	return result;
> +}
> +
>  #endif

The above should be in patch 1, though without the mpls_hdr rename.


> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 18b7c510c511..2278548e1f8f 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -439,6 +439,12 @@ union bpf_attr {
>   * int bpf_skb_vlan_pop(skb)
>   *     Return: 0 on success or negative error
>   *
> + * int bpf_skb_mpls_push(skb, num_lbls, lbls[])
> + *     Return 0 on success or negative error
> + *
> + * int bpf_skb_mpls_pop(skb, num_lbls)
> + *     Return number of popped labels, 0 is no-op, deliver packet to current dst
> + *
>   * int bpf_skb_get_tunnel_key(skb, key, size, flags)
>   * int bpf_skb_set_tunnel_key(skb, key, size, flags)
>   *     retrieve or populate tunnel metadata
> @@ -794,7 +800,10 @@ union bpf_attr {
>  	FN(msg_redirect_map),		\
>  	FN(msg_apply_bytes),		\
>  	FN(msg_cork_bytes),		\
> -	FN(msg_pull_data),
> +	FN(msg_pull_data),		\
> +	FN(skb_mpls_push),		\
> +	FN(skb_mpls_pop),
> +
>  
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 00f62fafc788..c96ae8ef423d 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -2522,7 +2522,7 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
>  }
>  
>  BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
> -	   u32, mode, u64, flags)
> +				u32, mode, u64, flags)

drop the above whitespace change


>  {
>  	if (unlikely(flags))
>  		return -EINVAL;
> @@ -2542,6 +2542,125 @@ static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
>  	.arg4_type	= ARG_ANYTHING,
>  };
>  
> +static int bpf_skb_mpls_net_grow(struct sk_buff *skb, int len_diff)
> +{
> +	u32 off = skb_mac_header_len(skb); /*LL_RESERVED_SPACE ?? */
> +	int ret;
> +
> +	ret = skb_cow(skb, len_diff);
> +	if (unlikely(ret < 0))
> +		return ret;
> +
> +	skb_set_inner_protocol(skb, skb->protocol);
> +	skb_reset_inner_network_header(skb);
> +
> +	ret = bpf_skb_generic_push(skb, off, len_diff);
> +	if (unlikely(ret < 0))
> +		return ret;
> +
> +	skb_reset_mac_header(skb);
> +	skb_set_network_header(skb, ETH_HLEN);
> +	skb->protocol = eth_hdr(skb)->h_proto = htons(ETH_P_MPLS_UC);
> +
> +	if (skb_is_gso(skb)) {
> +/* Due to header grow, MSS needs to be downgraded. */
> +		skb_shinfo(skb)->gso_size -= len_diff;
> +/* Header must be checked, and gso_segs recomputed. */
> +		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
> +		skb_shinfo(skb)->gso_segs = 0;
> +	}
> +
> +	bpf_compute_data_pointers(skb);
> +	return 0;
> +}

The only thing MPLS specific is the protocol setting. Seems to me you
could leverage the existing bpf_skb_net_grow or even bpf_skb_adjust_net
and then set the protocols and headers in the caller of this function.
You already do something similar in bpf_skb_mpls_pop.


> +
> +BPF_CALL_2(bpf_skb_mpls_pop, struct sk_buff*, skb, u32, num_lbls)
> +{
> +	u32 i = 0;
> +	struct mpls_shim_hdr *hdr;
> +	unsigned char *cursor;
> +	struct mpls_entry_decoded dec;
> +
> +	if (num_lbls == 0)
> +		return 0;
> +
> +	cursor = skb_network_header(skb);
> +	do {
> +		hdr = (struct mpls_shim_hdr *)cursor;
> +		dec = mpls_entry_decode(hdr);
> +		i++; cursor = cursor + sizeof(struct mpls_shim_hdr);
> +	} while (dec.bos != 1 && i < num_lbls);
> +
> +	bpf_push_mac_rcsum(skb);
> +	skb_pull(skb, i * sizeof(struct mpls_shim_hdr));
> +	skb_reset_network_header(skb);
> +
> +	skb->protocol = eth_hdr(skb)->h_proto = htons(ETH_P_MPLS_UC);

If I pop 1 label and there are more of them, then the protocol does not
need to be adjusted -- it was already set to MPLS.

If I pop all of the labels, then the protocol is not MPLS, but whatever
the inner packet is -- IPv4 or IPv6 or other.


> +	bpf_pull_mac_rcsum(skb);
> +	bpf_compute_data_pointers(skb);
> +
> +	return i;
> +}
> +
> +const struct bpf_func_proto bpf_skb_mpls_pop_proto = {
> +				.func   = bpf_skb_mpls_pop,
> +				.gpl_only = false,
> +				.ret_type = RET_INTEGER,
> +				.arg1_type  = ARG_PTR_TO_CTX,
> +				.arg2_type  = ARG_ANYTHING,
> +};
> +EXPORT_SYMBOL_GPL(bpf_skb_mpls_pop_proto);
> +
> +BPF_CALL_3(bpf_skb_mpls_push, struct sk_buff*, skb,
> +	   __be32*, lbls, u32, num_lbls)
> +{
> +	int ret, i;
> +	unsigned int new_header_size = num_lbls * sizeof(__be32);
> +	unsigned int ttl = 255;
> +	struct dst_entry *dst = skb_dst(skb);

dst is not always set. e.g., any program on ingress prior to the layer 3
will not have it.


> +	struct net_device *out_dev = dst->dev;
> +	struct mpls_shim_hdr *hdr;
> +	bool bos;
> +
> +	/* Ensure there is enough space for the headers in the skb */
> +	ret = bpf_skb_mpls_net_grow(skb, new_header_size);
> +	if (ret < 0) {
> +		trace_printk("COW was killed\n");
> +		bpf_compute_data_pointers(skb);
> +		return -ENOMEM;
> +	}
> +
> +	skb->dev = out_dev;
> +/* XXX this may need finesse to integrate with
> + * global TTL values for MPLS
> + */
> +	if (dst->ops->family == AF_INET)
> +		ttl = ip_hdr(skb)->ttl;
> +	else if (dst->ops->family == AF_INET6)
> +		ttl = ipv6_hdr(skb)->hop_limit;
> +
> +	/* Push the new labels */
> +	hdr = skb_mpls_hdr(skb);
> +	bos = true;
> +	for (i = num_lbls - 1; i >= 0; i--) {
> +		hdr[i] = mpls_entry_encode(lbls[i], ttl, 0, bos);
> +		bos = false;
> +	}
> +
> +	bpf_compute_data_pointers(skb);
> +	return 0;
> +}
> +
> +const struct bpf_func_proto bpf_skb_mpls_push_proto = {
> +	.func		= bpf_skb_mpls_push,
> +	.gpl_only	= false,
> +	.ret_type	= RET_INTEGER,
> +	.arg1_type	= ARG_PTR_TO_CTX,
> +	.arg2_type	= ARG_PTR_TO_MEM,
> +	.arg3_type	= ARG_CONST_SIZE,
> +};
> +EXPORT_SYMBOL_GPL(bpf_skb_mpls_push_proto);
> +
>  static u32 __bpf_skb_min_len(const struct sk_buff *skb)
>  {
>  	u32 min_len = skb_network_offset(skb);
> @@ -3019,6 +3138,8 @@ bool bpf_helper_changes_pkt_data(void *func)
>  {
>  	if (func == bpf_skb_vlan_push ||
>  	    func == bpf_skb_vlan_pop ||
> +	    func == bpf_skb_mpls_push ||
> +	    func == bpf_skb_mpls_pop ||
>  	    func == bpf_skb_store_bytes ||
>  	    func == bpf_skb_change_proto ||
>  	    func == bpf_skb_change_head ||
> @@ -3682,6 +3803,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
>  		return &bpf_skb_vlan_push_proto;
>  	case BPF_FUNC_skb_vlan_pop:
>  		return &bpf_skb_vlan_pop_proto;
> +	case BPF_FUNC_skb_mpls_push:
> +		return &bpf_skb_mpls_push_proto;
> +	case BPF_FUNC_skb_mpls_pop:
> +		return &bpf_skb_mpls_pop_proto;
>  	case BPF_FUNC_skb_change_proto:
>  		return &bpf_skb_change_proto_proto;
>  	case BPF_FUNC_skb_change_type:
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ