lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 11 Feb 2016 10:35:14 +0000
From:	Brian Russell <brussell@...cade.com>
To:	<netdev@...r.kernel.org>
Subject: [PATCH net-next 2/2] vxlan: support GPE/NSH

Support the Generic Protocol Extension to VxLAN which extends VxLAN to
allow multi-protocol encapsulation. IPv4, IPv6, MPLS unicast and
NSH encapsulated packets can be sent and received in addition to ethernet
frames. As defined in:

https://tools.ietf.org/html/draft-ietf-nvo3-vxlan-gpe-01

Signed-off-by: Brian Russell <brussell@...cade.com>
---
 drivers/net/vxlan.c          | 139 +++++++++++++++++++++++++++++++++++++++----
 include/net/vxlan.h          |  40 ++++++++++++-
 include/uapi/linux/if_link.h |   1 +
 3 files changed, 166 insertions(+), 14 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 65f5247..92a4cdc 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -50,6 +50,7 @@
 #include <net/ip6_checksum.h>
 #endif
 #include <net/dst_metadata.h>
+#include <net/nsh.h>
 
 #define VXLAN_VERSION	"0.1"
 
@@ -1168,14 +1169,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
 	if (!vxlan)
 		goto drop;
 
-	skb_reset_mac_header(skb);
 	skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev)));
-	skb->protocol = eth_type_trans(skb, vxlan->dev);
-	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
-
-	/* Ignore packet loops (and multicast echo) */
-	if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr))
-		goto drop;
 
 	/* Get data from the outer IP header */
 	if (vxlan_get_sk_family(vs) == AF_INET) {
@@ -1195,13 +1189,57 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
 		tun_dst = NULL;
 	}
 
+	switch (md->gpe_np) {
+	case VXLAN_GPE_NP_IPv4:
+		skb->protocol = htons(ETH_P_IP);
+		goto skip_l2;
+#if IS_ENABLED(CONFIG_IPV6)
+	case VXLAN_GPE_NP_IPv6:
+		skb->protocol = htons(ETH_P_IPV6);
+		goto skip_l2;
+#endif
+#if IS_ENABLED(CONFIG_MPLS)
+	case VXLAN_GPE_NP_MPLS:
+		skb->protocol = htons(ETH_P_MPLS_UC);
+		goto skip_l2;
+#endif
+#if IS_ENABLED(CONFIG_NET_NSH)
+	case VXLAN_GPE_NP_NSH:
+		{
+			u_char next_proto;
+
+			if (nsh_decap(skb, NULL, NULL, &next_proto) < 0)
+				goto drop;
+
+			if (next_proto != NSH_NEXT_PROTO_ETH)
+				goto skip_l2;
+		}
+		break;
+#endif
+	case VXLAN_GPE_NP_ETH:
+		/* GPE with next proto eth is equivalent to vanilla vxlan. */
+	default:
+		break;
+	}
+
+	skb_reset_mac_header(skb);
+	skb->protocol = eth_type_trans(skb, vxlan->dev);
+	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+
+	/* Ignore packet loops (and multicast echo) */
+	if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr))
+		goto drop;
+
 	if ((vxlan->flags & VXLAN_F_LEARN) &&
 	    vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source))
 		goto drop;
 
+skip_l2:
 	skb_reset_network_header(skb);
+
 	/* In flow-based mode, GBP is carried in dst_metadata */
-	if (!(vs->flags & VXLAN_F_COLLECT_METADATA))
+	if (!(vs->flags & VXLAN_F_COLLECT_METADATA) &&
+	    !(vs->flags & VXLAN_F_GPE))
 		skb->mark = md->gbp;
 
 	if (oip6)
@@ -1252,6 +1290,10 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 	struct vxlan_metadata _md;
 	struct vxlan_metadata *md = &_md;
 
+	vs = rcu_dereference_sk_user_data(sk);
+	if (!vs)
+		goto drop;
+
 	/* Need Vxlan and inner Ethernet header to be present */
 	if (!pskb_may_pull(skb, VXLAN_HLEN))
 		goto error;
@@ -1267,14 +1309,13 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 		goto bad_flags;
 	}
 
-	if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
+	/* If GPE, protocol will be set once next proto examined. */
+	if (iptunnel_pull_header(skb, VXLAN_HLEN,
+				 vs->flags & VXLAN_F_GPE ?
+				 htons(ETH_P_IP) : htons(ETH_P_TEB)))
 		goto drop;
 	vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
 
-	vs = rcu_dereference_sk_user_data(sk);
-	if (!vs)
-		goto drop;
-
 	if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
 		vxh = vxlan_remcsum(skb, vxh, sizeof(struct vxlanhdr), vni,
 				    !!(vs->flags & VXLAN_F_REMCSUM_NOPARTIAL));
@@ -1318,6 +1359,16 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 		flags &= ~VXLAN_GBP_USED_BITS;
 	}
 
+	if (vs->flags & VXLAN_F_GPE) {
+		/* Next protocol is required */
+		if (!(flags & VXLAN_HF_GPE_NP))
+			goto bad_flags;
+
+		md->gpe_np = flags & VXLAN_GPE_NP_MASK;
+
+		flags &= ~VXLAN_GPE_USED_BITS;
+	}
+
 	if (flags || vni & ~VXLAN_VNI_MASK) {
 		/* If there are any unprocessed flags remaining treat
 		 * this as a malformed packet. This behavior diverges from
@@ -1664,6 +1715,37 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
 	return false;
 }
 
+static void vxlan_build_gpe_hdr(struct vxlanhdr *vxh, __be16 proto)
+{
+	u32 next_proto;
+
+	switch (proto) {
+#if IS_ENABLED(CONFIG_NET_NSH)
+	case htons(ETH_P_NSH):
+		next_proto = VXLAN_GPE_NP_NSH;
+		break;
+#endif
+	case htons(ETH_P_IP):
+		next_proto = VXLAN_GPE_NP_IPv4;
+		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	case htons(ETH_P_IPV6):
+		next_proto = VXLAN_GPE_NP_IPv6;
+		break;
+#endif
+#if IS_ENABLED(CONFIG_MPLS)
+	case htons(ETH_P_MPLS_UC):
+		next_proto = VXLAN_GPE_NP_MPLS;
+		break;
+#endif
+	default:
+		next_proto = VXLAN_GPE_NP_ETH;
+		break;
+	}
+
+	vxh->vx_flags |= htonl(VXLAN_HF_GPE_NP | next_proto);
+}
+
 static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
 				struct vxlan_metadata *md)
 {
@@ -1750,6 +1832,9 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
 	if (vxflags & VXLAN_F_GBP)
 		vxlan_build_gbp_hdr(vxh, vxflags, md);
 
+	if (vxflags & VXLAN_F_GPE)
+		vxlan_build_gpe_hdr(vxh, skb->protocol);
+
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 	return 0;
 }
@@ -2073,6 +2158,26 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct vxlan_rdst *rdst, *fdst = NULL;
 	struct vxlan_fdb *f;
 
+	if (vxlan->flags & VXLAN_F_GPE) {
+		switch (skb->protocol) {
+#if IS_ENABLED(CONFIG_NET_NSH)
+		case htons(ETH_P_NSH):
+#endif
+#if IS_ENABLED(CONFIG_IPV6)
+		case htons(ETH_P_IPV6):
+#endif
+#if IS_ENABLED(CONFIG_MPLS)
+		case htons(ETH_P_MPLS_UC):
+#endif
+		case htons(ETH_P_IP):
+			vxlan_xmit_one(skb, dev, &vxlan->default_dst, false);
+			return NETDEV_TX_OK;
+		default:
+			/* Assume L2 and look for FDB entry */
+			break;
+		}
+	}
+
 	info = skb_tunnel_info(skb);
 
 	skb_reset_mac_header(skb);
@@ -2475,6 +2580,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
 	[IFLA_VXLAN_REMCSUM_RX]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_GBP]	= { .type = NLA_FLAG, },
 	[IFLA_VXLAN_REMCSUM_NOPARTIAL]	= { .type = NLA_FLAG },
+	[IFLA_VXLAN_GPE]	= { .type = NLA_FLAG, },
 };
 
 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -2895,6 +3001,9 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
 	if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL])
 		conf.flags |= VXLAN_F_REMCSUM_NOPARTIAL;
 
+	if (data[IFLA_VXLAN_GPE])
+		conf.flags |= VXLAN_F_GPE;
+
 	err = vxlan_dev_configure(src_net, dev, &conf);
 	switch (err) {
 	case -ENODEV:
@@ -3037,6 +3146,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	    nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL))
 		goto nla_put_failure;
 
+	if (vxlan->flags & VXLAN_F_GPE &&
+	    nla_put_flag(skb, IFLA_VXLAN_GPE))
+		goto nla_put_failure;
+
 	return 0;
 
 nla_put_failure:
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 25bd919..7886296 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -121,8 +121,44 @@ struct vxlanhdr_gbp {
 
 struct vxlan_metadata {
 	u32		gbp;
+	u8              gpe_np;
 };
 
+/*
+ * VXLAN Generic Protocol Extension:
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |R|R|Ver|I|P|R|O|       Reserved                |Next Protocol  |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                VXLAN Network Identifier (VNI) |   Reserved    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Ver            Version, initially 0
+ * I = 1	  VXLAN Network Identifier (VNI) present
+ * P = 1          Next Protocol field is present
+ * O = 1          OAM
+ * Next Protocol  Indicates the protocol header immediately following
+ *                the VXLAN GPE header.
+ *
+ * https://tools.ietf.org/html/draft-ietf-nvo3-vxlan-gpe-01
+ *
+ * Use struct vxlanhdr above with some extra defines:
+ */
+#define VXLAN_HF_GPE_OAM BIT(25) /* GPE OAM bit */
+#define VXLAN_HF_GPE_NP  BIT(26) /* GPE protocol bit */
+
+#define VXLAN_GPE_NP_MASK (0xFF)
+
+#define VXLAN_GPE_NP_IPv4 0x1
+#define VXLAN_GPE_NP_IPv6 0x2
+#define VXLAN_GPE_NP_ETH  0x3
+#define VXLAN_GPE_NP_NSH  0x4
+#define VXLAN_GPE_NP_MPLS  0x5
+
+#define VXLAN_GPE_USED_BITS (VXLAN_HF_GPE_NP  | \
+			     VXLAN_HF_GPE_OAM | \
+			     VXLAN_GPE_NP_MASK)
+
+
 /* per UDP socket information */
 struct vxlan_sock {
 	struct hlist_node hlist;
@@ -204,6 +240,7 @@ struct vxlan_dev {
 #define VXLAN_F_GBP			0x800
 #define VXLAN_F_REMCSUM_NOPARTIAL	0x1000
 #define VXLAN_F_COLLECT_METADATA	0x2000
+#define VXLAN_F_GPE			0x4000
 
 /* Flags that are used in the receive path. These flags must match in
  * order for a socket to be shareable
@@ -212,7 +249,8 @@ struct vxlan_dev {
 					 VXLAN_F_UDP_ZERO_CSUM6_RX |	\
 					 VXLAN_F_REMCSUM_RX |		\
 					 VXLAN_F_REMCSUM_NOPARTIAL |	\
-					 VXLAN_F_COLLECT_METADATA)
+					 VXLAN_F_COLLECT_METADATA |     \
+					 VXLAN_F_GPE)
 
 struct net_device *vxlan_dev_create(struct net *net, const char *name,
 				    u8 name_assign_type, struct vxlan_config *conf);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index d452cea..e8d74a5 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -456,6 +456,7 @@ enum {
 	IFLA_VXLAN_GBP,
 	IFLA_VXLAN_REMCSUM_NOPARTIAL,
 	IFLA_VXLAN_COLLECT_METADATA,
+	IFLA_VXLAN_GPE,
 	__IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
-- 
2.1.4

Powered by blists - more mailing lists