[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20170814160914.GC27729@dev-rhel7>
Date:   Mon, 14 Aug 2017 12:09:14 -0400
From:   Eric Garver <e@...g.me>
To:     Yi Yang <yi.y.yang@...el.com>
Cc:     netdev@...r.kernel.org, dev@...nvswitch.org, blp@....org,
        jbenc@...hat.com
Subject: Re: [PATCH net-next v2] openvswitch: enable NSH support
On Thu, Aug 10, 2017 at 09:21:15PM +0800, Yi Yang wrote:
> OVS master and 2.8 branch has merged NSH userspace
> patch series, this patch is to enable NSH support
> in kernel data path in order that OVS can support
> NSH in 2.8 release in compat mode by porting this.
> 
> Signed-off-by: Yi Yang <yi.y.yang@...el.com>
> ---
>  drivers/net/vxlan.c              |   7 ++
>  include/net/nsh.h                | 126 ++++++++++++++++++++++++++++++
>  include/uapi/linux/openvswitch.h |  33 ++++++++
>  net/openvswitch/actions.c        | 165 +++++++++++++++++++++++++++++++++++++++
>  net/openvswitch/flow.c           |  41 ++++++++++
>  net/openvswitch/flow.h           |   1 +
>  net/openvswitch/flow_netlink.c   |  54 ++++++++++++-
>  7 files changed, 426 insertions(+), 1 deletion(-)
>  create mode 100644 include/net/nsh.h
Hi Yi,
In general I'd like to echo Jiri's comments on the netlink attributes.
I'd like to see the metadata separate.
I have a few other comments below.
Thanks.
Eric.
[..]
> diff --git a/include/net/nsh.h b/include/net/nsh.h
> new file mode 100644
> index 0000000..96477a1
> --- /dev/null
> +++ b/include/net/nsh.h
> @@ -0,0 +1,126 @@
> +#ifndef __NET_NSH_H
> +#define __NET_NSH_H 1
> +
> +
> +/*
> + * Network Service Header:
> + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
> + * |Ver|O|C|R|R|R|R|R|R|    Length   |   MD Type   |  Next Proto   |
> + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
> + * |                Service Path ID                | Service Index |
> + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
> + * |                                                               |
> + * ~               Mandatory/Optional Context Header               ~
> + * |                                                               |
> + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
> + * Ver = The version field is used to ensure backward compatibility
> + *       going forward with future NSH updates.  It MUST be set to 0x0
> + *       by the sender, in this first revision of NSH.
> + *
> + * O = OAM. when set to 0x1 indicates that this packet is an operations
> + *     and management (OAM) packet.  The receiving SFF and SFs nodes
> + *     MUST examine the payload and take appropriate action.
> + *
> + * C = context. Indicates that a critical metadata TLV is present.
> + *
> + * Length : total length, in 4-byte words, of NSH including the Base
> + *          Header, the Service Path Header and the optional variable
> + *          TLVs.
> + * MD Type: indicates the format of NSH beyond the mandatory Base Header
> + *          and the Service Path Header.
> + *
> + * Next Protocol: indicates the protocol type of the original packet. A
> + *          new IANA registry will be created for protocol type.
> + *
> + * Service Path Identifier (SPI): identifies a service path.
> + *          Participating nodes MUST use this identifier for Service
> + *          Function Path selection.
> + *
> + * Service Index (SI): provides location within the SFP.
> + *
> + * [0] https://tools.ietf.org/html/draft-ietf-sfc-nsh-13
> + */
> +
> +/**
> + * struct nsh_md1_ctx - Keeps track of NSH context data
> + * @nshc<1-4>: NSH Contexts.
> + */
> +struct nsh_md1_ctx {
> +	__be32 c[4];
> +};
> +
> +struct nsh_md2_tlv {
> +	__be16 md_class;
> +	u8 type;
> +	u8 length;
> +	u8 md_value[];
> +};
> +
> +struct nsh_hdr {
> +	__be16 ver_flags_len;
> +	u8 md_type;
> +	u8 next_proto;
> +	__be32 path_hdr;
> +	union {
> +	    struct nsh_md1_ctx md1;
> +	    struct nsh_md2_tlv md2[0];
> +	};
> +};
> +
> +/* Masking NSH header fields. */
> +#define NSH_VER_MASK       0xc000
> +#define NSH_VER_SHIFT      14
> +#define NSH_FLAGS_MASK     0x3fc0
> +#define NSH_FLAGS_SHIFT    6
> +#define NSH_LEN_MASK       0x003f
> +#define NSH_LEN_SHIFT      0
> +
> +#define NSH_SPI_MASK       0xffffff00
> +#define NSH_SPI_SHIFT      8
> +#define NSH_SI_MASK        0x000000ff
> +#define NSH_SI_SHIFT       0
> +
> +#define NSH_DST_PORT    4790     /* UDP Port for NSH on VXLAN. */
> +#define ETH_P_NSH       0x894F   /* Ethertype for NSH. */
> +
> +/* NSH Base Header Next Protocol. */
> +#define NSH_P_IPV4        0x01
> +#define NSH_P_IPV6        0x02
> +#define NSH_P_ETHERNET    0x03
> +#define NSH_P_NSH         0x04
> +#define NSH_P_MPLS        0x05
> +
> +/* MD Type Registry. */
> +#define NSH_M_TYPE1     0x01
> +#define NSH_M_TYPE2     0x02
> +#define NSH_M_EXP1      0xFE
> +#define NSH_M_EXP2      0xFF
> +
> +/* NSH Metadata Length. */
> +#define NSH_M_TYPE1_MDLEN 16
> +
> +/* NSH Base Header Length */
> +#define NSH_BASE_HDR_LEN  8
> +
> +/* NSH MD Type 1 header Length. */
> +#define NSH_M_TYPE1_LEN   24
> +
> +static inline u16
> +nsh_hdr_len(const struct nsh_hdr *nsh)
> +{
> +	return 4 * (ntohs(nsh->ver_flags_len) & NSH_LEN_MASK) >> NSH_LEN_SHIFT;
This is doing the multiplication before the shift. It works only because
the shift is 0.
> +}
> +
> +static inline struct nsh_md1_ctx *
> +nsh_md1_ctx(struct nsh_hdr *nsh)
> +{
> +	return &nsh->md1;
> +}
> +
> +static inline struct nsh_md2_tlv *
> +nsh_md2_ctx(struct nsh_hdr *nsh)
> +{
> +	return nsh->md2;
> +}
> +
> +#endif /* __NET_NSH_H */
> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
> index 156ee4c..5a1c94b 100644
> --- a/include/uapi/linux/openvswitch.h
> +++ b/include/uapi/linux/openvswitch.h
> @@ -333,6 +333,7 @@ enum ovs_key_attr {
>  	OVS_KEY_ATTR_CT_LABELS,	/* 16-octet connection tracking label */
>  	OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4,   /* struct ovs_key_ct_tuple_ipv4 */
>  	OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6,   /* struct ovs_key_ct_tuple_ipv6 */
> +	OVS_KEY_ATTR_NSH,                  /* struct ovs_key_nsh */
>  
>  #ifdef __KERNEL__
>  	OVS_KEY_ATTR_TUNNEL_INFO,  /* struct ip_tunnel_info */
> @@ -491,6 +492,15 @@ struct ovs_key_ct_tuple_ipv6 {
>  	__u8   ipv6_proto;
>  };
>  
> +struct ovs_key_nsh {
> +	__u8 flags;
> +	__u8 mdtype;
> +	__u8 np;
> +	__u8 pad;
> +	__be32 path_hdr;
> +	__be32 c[4];
> +};
> +
>  /**
>   * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
>   * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
> @@ -769,6 +779,25 @@ struct ovs_action_push_eth {
>  	struct ovs_key_ethernet addresses;
>  };
>  
> +#define OVS_PUSH_NSH_MAX_MD_LEN 248
> +/*
> + * struct ovs_action_push_nsh - %OVS_ACTION_ATTR_PUSH_NSH
> + * @flags: NSH header flags.
> + * @mdtype: NSH metadata type.
> + * @mdlen: Length of NSH metadata in bytes.
> + * @np: NSH next_protocol: Inner packet type.
> + * @path_hdr: NSH service path id and service index.
> + * @metadata: NSH metadata for MD type 1 or 2
> + */
> +struct ovs_action_push_nsh {
> +	__u8 flags;
> +	__u8 mdtype;
> +	__u8 mdlen;
> +	__u8 np;
> +	__be32 path_hdr;
> +	__u8 metadata[];
> +};
> +
>  /**
>   * enum ovs_action_attr - Action types.
>   *
> @@ -806,6 +835,8 @@ struct ovs_action_push_eth {
>   * packet.
>   * @OVS_ACTION_ATTR_POP_ETH: Pop the outermost Ethernet header off the
>   * packet.
> + * @OVS_ACTION_ATTR_PUSH_NSH: push NSH header to the packet.
> + * @OVS_ACTION_ATTR_POP_NSH: pop the outermost NSH header off the packet.
>   *
>   * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
>   * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
> @@ -835,6 +866,8 @@ enum ovs_action_attr {
>  	OVS_ACTION_ATTR_TRUNC,        /* u32 struct ovs_action_trunc. */
>  	OVS_ACTION_ATTR_PUSH_ETH,     /* struct ovs_action_push_eth. */
>  	OVS_ACTION_ATTR_POP_ETH,      /* No argument. */
> +	OVS_ACTION_ATTR_PUSH_NSH,     /* struct ovs_action_push_nsh. */
> +	OVS_ACTION_ATTR_POP_NSH,      /* No argument. */
>  
>  	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be accepted
>  				       * from userspace. */
> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
> index e461067..50f80bc 100644
> --- a/net/openvswitch/actions.c
> +++ b/net/openvswitch/actions.c
> @@ -38,6 +38,7 @@
>  #include <net/dsfield.h>
>  #include <net/mpls.h>
>  #include <net/sctp/checksum.h>
> +#include <net/nsh.h>
>  
>  #include "datapath.h"
>  #include "flow.h"
> @@ -380,6 +381,114 @@ static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
>  	return 0;
>  }
>  
> +static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key,
> +		    const struct ovs_action_push_nsh *oapn)
> +{
> +	struct nsh_hdr *nsh;
> +	size_t length = NSH_BASE_HDR_LEN + oapn->mdlen;
> +	u8 next_proto;
> +
> +	if (key->mac_proto == MAC_PROTO_ETHERNET) {
> +		next_proto = NSH_P_ETHERNET;
> +	} else {
> +		switch (ntohs(skb->protocol)) {
> +		case ETH_P_IP:
> +			next_proto = NSH_P_IPV4;
> +			break;
> +		case ETH_P_IPV6:
> +			next_proto = NSH_P_IPV6;
> +			break;
> +		case ETH_P_NSH:
> +			next_proto = NSH_P_NSH;
> +			break;
> +		default:
> +			return -ENOTSUPP;
> +		}
> +	}
> +
>
I believe you need to validate that oapn->mdlen is a multiple of 4.
> +	/* Add the NSH header */
> +	if (skb_cow_head(skb, length) < 0)
> +		return -ENOMEM;
> +
> +	skb_push(skb, length);
> +	nsh = (struct nsh_hdr *)(skb->data);
> +	nsh->ver_flags_len = htons((oapn->flags << NSH_FLAGS_SHIFT) |
> +				 (length >> 2));
> +	nsh->next_proto = next_proto;
> +	nsh->path_hdr = oapn->path_hdr;
> +	nsh->md_type = oapn->mdtype;
> +	switch (nsh->md_type) {
> +	case NSH_M_TYPE1:
> +		nsh->md1 = *(struct nsh_md1_ctx *)oapn->metadata;
> +		break;
> +	case NSH_M_TYPE2: {
> +		/* The MD2 metadata in oapn is already padded to 4 bytes. */
> +		size_t len = DIV_ROUND_UP(oapn->mdlen, 4) * 4;
> +
> +		memcpy(nsh->md2, oapn->metadata, len);
I don't see any validation of oapn->mdlen. Normally this happens in
__ovs_nla_copy_actions(). It will be made easier if you add a separate
MD attribute as Jiri has suggested.
> +		break;
> +	}
> +	default:
> +		return -ENOTSUPP;
> +	}
> +
> +	if (!skb->inner_protocol)
> +		skb_set_inner_protocol(skb, skb->protocol);
> +
> +	skb->protocol = htons(ETH_P_NSH);
> +	key->eth.type = htons(ETH_P_NSH);
> +	skb_reset_mac_header(skb);
> +	skb_reset_mac_len(skb);
> +
> +	/* safe right before invalidate_flow_key */
> +	key->mac_proto = MAC_PROTO_NONE;
> +	invalidate_flow_key(key);
> +	return 0;
> +}
> +
[..]
Powered by blists - more mailing lists
 
