netdev - Re: [ovs-dev] [PATCH] datapath: Add basic MPLS support to kernel

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <A0E5584A-E05C-4FFF-9CF7-5B78A68FE921@nsn.com>
Date:	Sat, 20 Apr 2013 06:25:03 +0000
From:	"Rajahalme, Jarno (NSN - FI/Espoo)" <jarno.rajahalme@....com>
To:	ext Simon Horman <horms@...ge.net.au>
CC:	"<dev@...nvswitch.org>" <dev@...nvswitch.org>,
	"<netdev@...r.kernel.org>" <netdev@...r.kernel.org>,
	Isaku Yamahata <yamahata@...inux.co.jp>,
	Ravi K <rkerur@...il.com>
Subject: Re: [ovs-dev] [PATCH] datapath: Add basic MPLS support to kernel


On Apr 19, 2013, at 10:41 , ext Simon Horman wrote:

> diff --git a/datapath/actions.c b/datapath/actions.c
> index 0dac658..2c923be 100644
> --- a/datapath/actions.c
> +++ b/datapath/actions.c
> @@ -38,6 +38,7 @@
> #include "vport.h"
> 
> static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
> +			      unsigned *mpls_stack_depth,
> 			      const struct nlattr *attr, int len, bool keep_skb);
> 
> static int make_writable(struct sk_buff *skb, int write_len)
> @@ -48,6 +49,89 @@ static int make_writable(struct sk_buff *skb, int write_len)
> 	return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
> }
> 
> +static void set_ethertype(struct sk_buff *skb, const __be16 ethertype)
> +{
> +	struct ethhdr *hdr = (struct ethhdr *)skb_mac_header(skb);
> +	if (hdr->h_proto == ethertype)
> +		return;
> +	hdr->h_proto = ethertype;

Will this work properly if the skb has VLAN headers? I recall there was an earlier version that used the l2_size (now mac_len) to locate the actual "h_proto" to update?

> +	if (get_ip_summed(skb) == OVS_CSUM_COMPLETE) {
> +		__be16 diff[] = { ~hdr->h_proto, ethertype };
> +		skb->csum = ~csum_partial((char *)diff, sizeof(diff),
> +					  ~skb->csum);
> +	}
> +}
> +
> +static int push_mpls(struct sk_buff *skb, const struct ovs_action_push_mpls *mpls)
> +{
> +	__be32 *new_mpls_lse;
> +	int err;
> +
> +	err = make_writable(skb, skb->mac_len + MPLS_HLEN);
> +	if (unlikely(err))
> +		return err;
> +
> +	skb_push(skb, MPLS_HLEN);
> +	memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
> +		skb->mac_len);
> +	skb_reset_mac_header(skb);
> +	skb_set_network_header(skb, skb->mac_len);
> +
> +	new_mpls_lse = (__be32 *)skb_network_header(skb);
> +	*new_mpls_lse = mpls->mpls_lse;
> +
> +	if (get_ip_summed(skb) == OVS_CSUM_COMPLETE)
> +		skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
> +							     MPLS_HLEN, 0));
> +
> +	set_ethertype(skb, mpls->mpls_ethertype);
> +	return 0;
> +}
> +
> +static int pop_mpls(struct sk_buff *skb, const __be16 *ethertype)
> +{
> +	int err;
> +
> +	err = make_writable(skb, skb->mac_len + MPLS_HLEN);
> +	if (unlikely(err))
> +		return err;
> +
> +	if (get_ip_summed(skb) == OVS_CSUM_COMPLETE)
> +		skb->csum = csum_sub(skb->csum,
> +				     csum_partial(skb_network_header(skb),
> +						  MPLS_HLEN, 0));
> +
> +	memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
> +		skb->mac_len);
> +
> +	skb_pull(skb, MPLS_HLEN);
> +	skb_reset_mac_header(skb);
> +	skb_set_network_header(skb, skb->mac_len);
> +
> +	set_ethertype(skb, *ethertype);
> +	return 0;
> +}
> +
> +static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse)
> +{
> +	__be32 *stack = (__be32 *)skb_network_header(skb);
> +	int err;
> +
> +	err = make_writable(skb, skb->mac_len + MPLS_HLEN);
> +	if (unlikely(err))
> +		return err;
> +
> +	if (get_ip_summed(skb) == OVS_CSUM_COMPLETE) {
> +		__be32 diff[] = { ~(*stack), *mpls_lse };
> +		skb->csum = ~csum_partial((char *)diff, sizeof(diff),
> +					  ~skb->csum);
> +	}
> +
> +	*stack = *mpls_lse;
> +
> +	return 0;
> +}
> +
> /* remove VLAN header from packet and update csum accordingly. */
> static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
> {
> @@ -115,6 +199,9 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla
> 		if (!__vlan_put_tag(skb, current_tag))
> 			return -ENOMEM;
> 
> +		/* update mac_len for MPLS functions */
> +		skb_reset_mac_len(skb);
> +
> 		if (get_ip_summed(skb) == OVS_CSUM_COMPLETE)
> 			skb->csum = csum_add(skb->csum, csum_partial(skb->data
> 					+ (2 * ETH_ALEN), VLAN_HLEN, 0));
> @@ -352,13 +439,26 @@ static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key)
> 	return 0;
> }
> 
> -static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
> +static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
> +		     unsigned mpls_stack_depth)
> {
> 	struct vport *vport;
> 
> 	if (unlikely(!skb))
> 		return -ENOMEM;
> 
> +	/* The mpls_stack_depth is only non zero if a non-MPLS packet is
> +	 * turned into an MPLS packet via an MPLS push action. In this case
> +	 * the skb may be GSO so update skb->mac_len and skb's
> +	 * network_header to correspond to the bottom of the MPLS label
> +	 * stack rather than the end of the original L2 data which is now
> +	 * the top of the MPLS label stack.  */

It is not clear to me that "bottom of the MPLS label stack" necessarily refers to the start of L3 header, "Bottom of stack" having a special meaning with MPLS.
It might be clearer to state that "during action execution network_header, and mac_len, correspondingly, have tracked the end of the L2 frame (including any VLAN headers), but proper skb output processing (e.g., GSO) requires the network_header (and mac_len) to track the start of the L3 header instead. These differ in the presence of MPLS headers."

> +	if (mpls_stack_depth) {
> +		skb->mac_len += MPLS_HLEN * mpls_stack_depth;
> +		skb_set_network_header(skb, skb->mac_len);
> +		skb_set_encapsulation_features(skb);
> +	}
> +
> 	vport = ovs_vport_rcu(dp, out_port);
> 	if (unlikely(!vport)) {
> 		kfree_skb(skb);
> @@ -398,7 +498,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
> }
> 
> static int sample(struct datapath *dp, struct sk_buff *skb,
> -		  const struct nlattr *attr)
> +		  unsigned *mpls_stack_depth, const struct nlattr *attr)
> {
> 	const struct nlattr *acts_list = NULL;
> 	const struct nlattr *a;
> @@ -418,8 +518,9 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
> 		}
> 	}
> 
> -	return do_execute_actions(dp, skb, nla_data(acts_list),
> -				  nla_len(acts_list), true);
> +	return do_execute_actions(dp, skb, mpls_stack_depth,
> +				  nla_data(acts_list), nla_len(acts_list),
> +				  true);
> }
> 
> static int execute_set_action(struct sk_buff *skb,
> @@ -459,13 +560,23 @@ static int execute_set_action(struct sk_buff *skb,
> 	case OVS_KEY_ATTR_UDP:
> 		err = set_udp(skb, nla_data(nested_attr));
> 		break;
> +
> +	case OVS_KEY_ATTR_MPLS:
> +		err = set_mpls(skb, nla_data(nested_attr));
> +		break;
> 	}
> 
> 	return err;
> }
> 
> -/* Execute a list of actions against 'skb'. */
> +/* Execute a list of actions against 'skb'.
> + *
> + * The stack depth is only tracked in the case of a non-MPLS packet
> + * that becomes MPLS via an MPLS push action. The stack depth
> + * is passed to do_output() in order to allow it to prepare the
> + * skb for possible GSO segmentation. */
> static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
> +			unsigned *mpls_stack_depth,
> 			const struct nlattr *attr, int len, bool keep_skb)
> {
> 	/* Every output action needs a separate clone of 'skb', but the common
> @@ -481,7 +592,8 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
> 		int err = 0;
> 
> 		if (prev_port != -1) {
> -			do_output(dp, skb_clone(skb, GFP_ATOMIC), prev_port);
> +			do_output(dp, skb_clone(skb, GFP_ATOMIC),
> +				  prev_port, *mpls_stack_depth);
> 			prev_port = -1;
> 		}
> 
> @@ -494,6 +606,18 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
> 			output_userspace(dp, skb, a);
> 			break;
> 
> +		case OVS_ACTION_ATTR_PUSH_MPLS:
> +			err = push_mpls(skb, nla_data(a));
> +			if (!eth_p_mpls(skb->protocol))
> +				(*mpls_stack_depth)++;
> +			break;
> +
> +		case OVS_ACTION_ATTR_POP_MPLS:
> +			err = pop_mpls(skb, nla_data(a));
> +			if (!eth_p_mpls(skb->protocol))
> +				(*mpls_stack_depth)--;
> +			break;
> +

In both cases the stack is changed whenever err == 0, but it is not immediately clear to me whether the '!eth_p_mpls(skb->protocol)' is true if and only if the label stack has changed.

  Jarno

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html