lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1437747419-8442-1-git-send-email-nicolas.dichtel@6wind.com>
Date:	Fri, 24 Jul 2015 16:16:59 +0200
From:	Nicolas Dichtel <nicolas.dichtel@...nd.com>
To:	davem@...emloft.net
Cc:	netdev@...r.kernel.org, roopa@...ulusnetworks.com, tgraf@...g.ch,
	Nicolas Dichtel <nicolas.dichtel@...nd.com>
Subject: [PATCH net-next v2] route: allow to route in a peer netns via lwt framework

This patch takes advantage of the newly added lwtunnel framework to
allow the user to set routes that point to a peer netns.

Packets are injected to the peer netns via the loopback device. It works
only when the output device is 'lo'.

Example:
ip route add 40.1.1.1/32 encap netns nsid 5 via dev lo

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@...nd.com>
---

v2: rework loopback handling part (update stats and call skb_dst_force())
    fix ipv6 processing
    check lwtunnel type before converting data to a nsid

 drivers/net/loopback.c        | 33 +++++++++++++++++++++------
 include/net/lwtunnel.h        | 27 ++++++++++++++++++++++
 include/uapi/linux/lwtunnel.h |  1 +
 net/core/net_namespace.c      | 52 +++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/route.c              |  9 ++++++--
 5 files changed, 113 insertions(+), 9 deletions(-)

diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index c76283c2f84a..4358256ff94e 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -57,6 +57,7 @@
 #include <linux/percpu.h>
 #include <net/net_namespace.h>
 #include <linux/u64_stats_sync.h>
+#include <net/lwtunnel.h>
 
 struct pcpu_lstats {
 	u64			packets;
@@ -71,29 +72,47 @@ struct pcpu_lstats {
 static netdev_tx_t loopback_xmit(struct sk_buff *skb,
 				 struct net_device *dev)
 {
+	int nsid = skb_lwt_netns_info(skb);
 	struct pcpu_lstats *lb_stats;
-	int len;
-
-	skb_orphan(skb);
+	struct net *peernet = NULL;
+	int len, ret;
 
 	/* Before queueing this packet to netif_rx(),
 	 * make sure dst is refcounted.
 	 */
 	skb_dst_force(skb);
 
-	skb->protocol = eth_type_trans(skb, dev);
+	if (nsid != NETNSA_NSID_NOT_ASSIGNED) {
+		peernet = get_net_ns_by_id(dev_net(dev), nsid);
+		if (!peernet) {
+			kfree_skb(skb);
+			goto end;
+		}
+
+		/* it's OK to use per_cpu_ptr() because BHs are off */
+		lb_stats = this_cpu_ptr(peernet->loopback_dev->lstats);
+		ret = dev_forward_skb(peernet->loopback_dev, skb);
+	} else {
+		skb_orphan(skb);
 
-	/* it's OK to use per_cpu_ptr() because BHs are off */
-	lb_stats = this_cpu_ptr(dev->lstats);
+		skb->protocol = eth_type_trans(skb, dev);
+
+		/* it's OK to use per_cpu_ptr() because BHs are off */
+		lb_stats = this_cpu_ptr(dev->lstats);
+		ret = netif_rx(skb);
+	}
 
 	len = skb->len;
-	if (likely(netif_rx(skb) == NET_RX_SUCCESS)) {
+	if (likely(ret == NET_RX_SUCCESS)) {
 		u64_stats_update_begin(&lb_stats->syncp);
 		lb_stats->bytes += len;
 		lb_stats->packets++;
 		u64_stats_update_end(&lb_stats->syncp);
 	}
 
+end:
+	if (peernet)
+		put_net(peernet);
 	return NETDEV_TX_OK;
 }
 
diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index b02039081b04..78376da1afa2 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -5,7 +5,9 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/types.h>
+#include <linux/net_namespace.h>
 #include <net/route.h>
+#include <net/ip6_fib.h>
 
 #define LWTUNNEL_HASH_BITS   7
 #define LWTUNNEL_HASH_SIZE   (1 << LWTUNNEL_HASH_BITS)
@@ -147,4 +149,29 @@ static inline int lwtunnel_output6(struct sock *sk, struct sk_buff *skb)
 
 #endif
 
+static inline u32 *lwt_netns_info(struct lwtunnel_state *lwtstate)
+{
+	return (u32 *)lwtstate->data;
+}
+
+static inline int skb_lwt_netns_info(struct sk_buff *skb)
+{
+	if (skb->protocol == htons(ETH_P_IP)) {
+		struct rtable *rt = (struct rtable *)skb_dst(skb);
+
+		if (rt &&
+		    rt->rt_lwtstate &&
+		    rt->rt_lwtstate->type & LWTUNNEL_ENCAP_NETNS)
+			return *lwt_netns_info(rt->rt_lwtstate);
+	} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
+
+		if (rt6 &&
+		    rt6->rt6i_lwtstate &&
+		    rt6->rt6i_lwtstate->type & LWTUNNEL_ENCAP_NETNS)
+			return *lwt_netns_info(rt6->rt6i_lwtstate);
+	}
+
+	return NETNSA_NSID_NOT_ASSIGNED;
+}
 #endif /* __NET_LWTUNNEL_H */
diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h
index 31377bbea3f8..6715e7a1b335 100644
--- a/include/uapi/linux/lwtunnel.h
+++ b/include/uapi/linux/lwtunnel.h
@@ -7,6 +7,7 @@ enum lwtunnel_encap_types {
 	LWTUNNEL_ENCAP_NONE,
 	LWTUNNEL_ENCAP_MPLS,
 	LWTUNNEL_ENCAP_IP,
+	LWTUNNEL_ENCAP_NETNS,
 	__LWTUNNEL_ENCAP_MAX,
 };
 
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2c2eb1b629b1..c1267aac373d 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -20,6 +20,7 @@
 #include <net/netlink.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <net/lwtunnel.h>
 
 /*
  *	Our network namespace constructor/destructor lists
@@ -725,6 +726,56 @@ out:
 	rtnl_set_sk_err(net, RTNLGRP_NSID, err);
 }
 
+static int lwt_netns_build_state(struct net_device *dev, struct nlattr *nla,
+				 struct lwtunnel_state **ts)
+{
+	struct nlattr *tb[NETNSA_MAX + 1];
+	struct lwtunnel_state *newts;
+	int *nsid;
+	int ret;
+
+	ret = nla_parse_nested(tb, NETNSA_MAX, nla, rtnl_net_policy);
+	if (ret < 0)
+		return ret;
+
+	if (!tb[NETNSA_NSID])
+		return -EINVAL;
+
+	newts = lwtunnel_state_alloc(sizeof(*nsid));
+	if (!newts)
+		return -ENOMEM;
+
+	newts->len = sizeof(*nsid);
+	nsid = lwt_netns_info(newts);
+	*nsid = nla_get_s32(tb[NETNSA_NSID]);
+	newts->type = LWTUNNEL_ENCAP_NETNS;
+
+	*ts = newts;
+	return 0;
+}
+
+static int lwt_netns_fill_encap_info(struct sk_buff *skb,
+				     struct lwtunnel_state *lwtstate)
+{
+	int *nsid = lwt_netns_info(lwtstate);
+
+	if (nla_put_s32(skb, NETNSA_NSID, *nsid))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int lwt_netns_encap_nlsize(struct lwtunnel_state *lwtstate)
+{
+	return nla_total_size(4);	/* NETNSA_NSID */
+}
+
+static const struct lwtunnel_encap_ops lwt_netns_ops = {
+	.build_state = lwt_netns_build_state,
+	.fill_encap = lwt_netns_fill_encap_info,
+	.get_encap_size = lwt_netns_encap_nlsize,
+};
+
 static int __init net_ns_init(void)
 {
 	struct net_generic *ng;
@@ -762,6 +813,7 @@ static int __init net_ns_init(void)
 	rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
 		      NULL);
 
+	lwtunnel_encap_add_ops(&lwt_netns_ops, LWTUNNEL_ENCAP_NETNS);
 	return 0;
 }
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c9b2b9fe83fc..894cb18cd8ca 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1796,10 +1796,13 @@ int ip6_route_add(struct fib6_config *cfg)
 	rt->rt6i_metric = cfg->fc_metric;
 
 	/* We cannot add true routes via loopback here,
-	   they would result in kernel looping; promote them to reject routes
+	 * they would result in kernel looping; promote them to reject routes.
+	 * Exception: routes that point to a peer netns.
 	 */
 	if ((cfg->fc_flags & RTF_REJECT) ||
 	    (dev && (dev->flags & IFF_LOOPBACK) &&
+	     (!rt->rt6i_lwtstate ||
+	      rt->rt6i_lwtstate->type != LWTUNNEL_ENCAP_NETNS) &&
 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
 	     !(cfg->fc_flags & RTF_LOCAL))) {
 		/* hold loopback dev/idev if we haven't done so. */
@@ -2880,7 +2883,9 @@ static int rt6_fill_node(struct net *net,
 	}
 	else if (rt->rt6i_flags & RTF_LOCAL)
 		rtm->rtm_type = RTN_LOCAL;
-	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
+	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK) &&
+		(!rt->rt6i_lwtstate ||
+		 rt->rt6i_lwtstate->type != LWTUNNEL_ENCAP_NETNS))
 		rtm->rtm_type = RTN_LOCAL;
 	else
 		rtm->rtm_type = RTN_UNICAST;
-- 
2.4.2

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ