lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-Id: <1433341306-29288-2-git-send-email-roopa@cumulusnetworks.com>
Date:	Wed,  3 Jun 2015 07:21:44 -0700
From:	Roopa Prabhu <roopa@...ulusnetworks.com>
To:	ebiederm@...ssion.com, rshearma@...cade.com, tgraf@...g.ch
Cc:	netdev@...r.kernel.org
Subject: [PATCH WIP RFC 1/3] lwtunnels: basic infra for light weight tunnels like mpls

From: Roopa Prabhu <roopa@...ulusnetworks.com>

Still in the works. Not fully functional.
And in some cases me going back and forth on a
few things.

Started with thinking the tunnel hdr can be shared
and refcounted. Its somewhere mid way now.

Signed-off-by: Roopa Prabhu <roopa@...ulusnetworks.com>
---
 include/linux/if_lwtunnel.h      |    8 ++
 include/net/lwtunnel.h           |   61 +++++++++++++
 include/uapi/linux/if_lwtunnel.h |   12 +++
 include/uapi/linux/rtnetlink.h   |    8 +-
 net/Makefile                     |    2 +-
 net/lwtunnel.c                   |  177 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 266 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/if_lwtunnel.h
 create mode 100644 include/net/lwtunnel.h
 create mode 100644 include/uapi/linux/if_lwtunnel.h
 create mode 100644 net/lwtunnel.c

diff --git a/include/linux/if_lwtunnel.h b/include/linux/if_lwtunnel.h
new file mode 100644
index 0000000..5fa7ca2
--- /dev/null
+++ b/include/linux/if_lwtunnel.h
@@ -0,0 +1,8 @@
+#ifndef _IF_LWTUNNEL_H_
+#define _IF_LWTUNNEL_H_
+
+#include <linux/ip.h>
+#include <linux/in6.h>
+#include <uapi/linux/if_lwtunnel.h>
+
+#endif /* _IF_LWTUNNEL_H_ */
diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
new file mode 100644
index 0000000..3964f15
--- /dev/null
+++ b/include/net/lwtunnel.h
@@ -0,0 +1,61 @@
+#ifndef __NET_LW_TUNNELS_H
+#define __NET_LW_TUNNELS_H 1
+
+#include <linux/if_lwtunnel.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <linux/u64_stats_sync.h>
+#include <net/dsfield.h>
+#include <net/gro_cells.h>
+#include <net/inet_ecn.h>
+#include <net/ip.h>
+#include <net/netns/generic.h>
+#include <net/rtnetlink.h>
+
+#define LW_TNL_HASH_BITS   7
+#define LW_TNL_HASH_SIZE   (1 << LW_TNL_HASH_BITS)
+
+struct lwtunnel_hdr {
+	int             len;
+	__u8            data[0];
+};
+
+struct lwtunnel_state {
+	__u16			type;
+	atomic_t          refcnt;
+	spinlock_t        lock;
+	struct lwtunnel_hdr tunnel;
+};
+
+struct lwtunnel_net {
+	struct hlist_head tunnels[LW_TNL_HASH_SIZE];
+};
+
+struct lwtunnel_encap_ops {
+	size_t (*encap_hlen)(struct rtencap *e);
+	int (*build_state)(struct rtencap *e, int len, struct lwtunnel_state **ts);
+	int (*output)(struct sock *sk, struct sk_buff *skb);
+	int (*dump_encap_hdr)(struct sk_buff *skb, int attr, struct lwtunnel_state *lwtstate);
+};
+
+#define MAX_LWTUN_ENCAP_OPS 8
+extern const struct lwtunnel_encap_ops __rcu *
+		lwtun_encaps[MAX_LWTUN_ENCAP_OPS];
+
+int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op,
+			    unsigned int num);
+int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op,
+			    unsigned int num);
+int lwtunnel_dst_set_output(struct lwtunnel_state *lwtstate,
+                             struct dst_entry *dst);
+int lwtunnel_build_state(struct rtencap *encap, int len,
+                          struct lwtunnel_state **lws);
+int lwtunnel_dump_encap(struct sk_buff *skb,
+                         struct lwtunnel_state *lwtstate);
+int lwtunnel_encap_size(struct lwtunnel_state *lwtstate);
+
+struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len);
+void lwtunnel_state_free(struct lwtunnel_state *lws);
+
+#endif /* __NET_LW_TUNNELS_H */
diff --git a/include/uapi/linux/if_lwtunnel.h b/include/uapi/linux/if_lwtunnel.h
new file mode 100644
index 0000000..28b8497
--- /dev/null
+++ b/include/uapi/linux/if_lwtunnel.h
@@ -0,0 +1,12 @@
+#ifndef _UAPI_IF_LW_TUNNEL_H_
+#define _UAPI_IF_LW_TUNNEL_H_
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+enum tunnel_encap_types {
+	LW_TUNNEL_ENCAP_NONE,
+	LW_TUNNEL_ENCAP_MPLS,
+};
+
+#endif /* _UAPI_IF_LW_TUNNEL_H_ */
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 17fb02f..47e5de1 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -308,6 +308,7 @@ enum rtattr_type_t {
 	RTA_VIA,
 	RTA_NEWDST,
 	RTA_PREF,
+	RTA_ENCAP,
 	__RTA_MAX
 };
 
@@ -356,8 +357,13 @@ struct rtvia {
 	__u8			rtvia_addr[0];
 };
 
-/* RTM_CACHEINFO */
+/* RTA_ENCAP */
+struct rtencap {
+	__u16	type;
+	__u8	dst[0];
+};
 
+/* RTM_CACHEINFO */
 struct rta_cacheinfo {
 	__u32	rta_clntref;
 	__u32	rta_lastuse;
diff --git a/net/Makefile b/net/Makefile
index 3995613..6d51a9d 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -5,7 +5,7 @@
 # Rewritten to use lists instead of if-statements.
 #
 
-obj-$(CONFIG_NET)		:= socket.o core/
+obj-$(CONFIG_NET)		:= socket.o lwtunnel.o core/
 
 tmp-$(CONFIG_COMPAT) 		:= compat.o
 obj-$(CONFIG_NET)		+= $(tmp-y)
diff --git a/net/lwtunnel.c b/net/lwtunnel.c
new file mode 100644
index 0000000..e367a60
--- /dev/null
+++ b/net/lwtunnel.c
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2015 Cumulus Networks, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/capability.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_arp.h>
+#include <linux/mroute.h>
+#include <linux/init.h>
+#include <linux/in6.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/rculist.h>
+#include <linux/err.h>
+
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/protocol.h>
+#include <net/lwtunnel.h>
+#include <net/arp.h>
+#include <net/checksum.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+#include <net/rtnetlink.h>
+#include <net/udp.h>
+
+
+struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len)
+{
+    struct lwtunnel_state *lws;
+
+    lws = kzalloc(sizeof(*lws) + hdr_len, GFP_KERNEL);
+
+    return lws;
+}
+EXPORT_SYMBOL(lwtunnel_state_alloc);
+
+void lwtunnel_state_free(struct lwtunnel_state *lws)
+{
+    kfree(lws);
+}
+
+const struct lwtunnel_encap_ops __rcu *
+		lwtun_encaps[MAX_LWTUN_ENCAP_OPS] __read_mostly;
+
+int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops,
+			                unsigned int num)
+{
+	if (num >= MAX_LWTUN_ENCAP_OPS)
+		return -ERANGE;
+
+	return !cmpxchg((const struct lwtunnel_encap_ops **)
+			&lwtun_encaps[num],
+			NULL, ops) ? 0 : -1;
+}
+EXPORT_SYMBOL(lwtunnel_encap_add_ops);
+
+int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
+			                unsigned int num)
+{
+	int ret;
+
+	if (num >= MAX_LWTUN_ENCAP_OPS)
+		return -ERANGE;
+
+	ret = (cmpxchg((const struct lwtunnel_encap_ops **)
+		       &lwtun_encaps[num],
+		       ops, NULL) == ops) ? 0 : -1;
+
+	synchronize_net();
+
+	return ret;
+}
+EXPORT_SYMBOL(lwtunnel_encap_del_ops);
+
+int lwtunnel_build_state(struct rtencap *encap, int len,
+                          struct lwtunnel_state **lws)
+{
+	struct lwtunnel_encap_ops *ops;
+	int ret = -EINVAL;
+
+	if (encap->type == LW_TUNNEL_ENCAP_NONE)
+		return ret;
+
+	if (encap->type == MAX_LWTUN_ENCAP_OPS)
+		return ret;
+
+	rcu_read_lock();
+	ops = rcu_dereference(lwtun_encaps[encap->type]);
+	if (likely(ops && ops->build_state))
+		ret = ops->build_state(encap, len, lws);
+	rcu_read_unlock();
+
+	return 0;
+}
+EXPORT_SYMBOL(lwtunnel_build_state);
+
+int lwtunnel_dst_set_output(struct lwtunnel_state *lwstate,
+                             struct dst_entry *dst)
+{
+	struct lwtunnel_encap_ops *ops;
+	int ret = -EINVAL;
+
+	if (lwstate->type == LW_TUNNEL_ENCAP_NONE)
+		return 0;
+
+	if (lwstate->type == MAX_LWTUN_ENCAP_OPS)
+		return 0;
+
+	rcu_read_lock();
+	ops = rcu_dereference(lwtun_encaps[lwstate->type]);
+	if (likely(ops && ops->output))
+	dst->output = ops->output;
+	rcu_read_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL(lwtunnel_dst_set_output);
+
+int lwtunnel_dump_encap(struct sk_buff *skb, struct lwtunnel_state *lwstate)
+{
+	struct lwtunnel_encap_ops *ops;
+	int ret = -EINVAL;
+
+	if (lwstate->type == LW_TUNNEL_ENCAP_NONE)
+		return 0;
+
+	if (lwstate->type == MAX_LWTUN_ENCAP_OPS)
+		return 0;
+
+	rcu_read_lock();
+	ops = rcu_dereference(lwtun_encaps[lwstate->type]);
+	if (likely(ops && ops->dump_encap_hdr))
+		ret = ops->dump_encap_hdr(skb, RTA_ENCAP, lwstate);
+	rcu_read_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL(lwtunnel_dump_encap);
+
+int lwtunnel_encap_size(struct lwtunnel_state *lwstate)
+{
+	return lwstate->tunnel.len;
+}
+EXPORT_SYMBOL(lwtunnel_encap_size);
+
+MODULE_LICENSE("GPL");
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ