lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Mon, 1 Jun 2015 17:46:14 +0100
From:	Robert Shearman <rshearma@...cade.com>
To:	<netdev@...r.kernel.org>
CC:	"Eric W. Biederman" <ebiederm@...ssion.com>,
	roopa <roopa@...ulusnetworks.com>, Thomas Graf <tgraf@...g.ch>,
	Robert Shearman <rshearma@...cade.com>
Subject: [RFC net-next 2/3] ipv4: storing and retrieval of per-nexthop encap

Parse RTA_ENCAP attribute for one path and multipath routes. The encap
length is stored in a newly added field to fib_nh, nh_encap_len,
although this is added to a padding hole in the structure so that it
doesn't increase the size at all. The encap data itself is stored at
the end of the array of nexthops. Whilst this means that retrieval
isn't optimal, especially if there are multiple nexthops, this avoids
the memory cost of an extra pointer, as well as any potential change
to the cache or instruction layout that could cause a performance
impact.

Currently, the dst structure allocated to represent the destination of
the packet and used for retrieving the encap by the encap-type
interface has been grown through the addition of the rt_encap_len and
rt_encap fields. This isn't desirable and could be fixed by defining a
new destination type with operations copied from the normal case,
other than the addition of the get_encap operation.

Signed-off-by: Robert Shearman <rshearma@...cade.com>
---
 include/net/ip_fib.h     |   2 +
 include/net/route.h      |   3 +
 net/ipv4/fib_frontend.c  |   3 +
 net/ipv4/fib_lookup.h    |   2 +
 net/ipv4/fib_semantics.c | 179 ++++++++++++++++++++++++++++++++++++++++++++++-
 net/ipv4/route.c         |  24 +++++++
 6 files changed, 211 insertions(+), 2 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 54271ed0ed45..a06cec5eb3aa 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -44,6 +44,7 @@ struct fib_config {
 	u32			fc_flow;
 	u32			fc_nlflags;
 	struct nl_info		fc_nlinfo;
+	struct nlattr *fc_encap;
  };
 
 struct fib_info;
@@ -75,6 +76,7 @@ struct fib_nh {
 	struct fib_info		*nh_parent;
 	unsigned int		nh_flags;
 	unsigned char		nh_scope;
+	unsigned char		nh_encap_len;
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	int			nh_weight;
 	int			nh_power;
diff --git a/include/net/route.h b/include/net/route.h
index fe22d03afb6a..e8b58914c4c1 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -64,6 +64,9 @@ struct rtable {
 	/* Miscellaneous cached information */
 	u32			rt_pmtu;
 
+	unsigned int		rt_encap_len;
+	void			*rt_encap;
+
 	struct list_head	rt_uncached;
 	struct uncached_list	*rt_uncached_list;
 };
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 872494e6e6eb..aa538ab7e3b9 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -656,6 +656,9 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
 		case RTA_TABLE:
 			cfg->fc_table = nla_get_u32(attr);
 			break;
+		case RTA_ENCAP:
+			cfg->fc_encap = attr;
+			break;
 		}
 	}
 
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index c6211ed60b03..003318c51ae8 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -34,6 +34,8 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id,
 		  unsigned int);
 void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len,
 	       u32 tb_id, const struct nl_info *info, unsigned int nlm_flags);
+const void *fib_get_nh_encap(const struct fib_info *fi,
+			     const struct fib_nh *nh);
 
 static inline void fib_result_assign(struct fib_result *res,
 				     struct fib_info *fi)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 28ec3c1823bf..db466b636241 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -257,6 +257,9 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
 	const struct fib_nh *onh = ofi->fib_nh;
 
 	for_nexthops(fi) {
+		const void *onh_encap = fib_get_nh_encap(fi, nh);
+		const void *nh_encap = fib_get_nh_encap(fi, nh);
+
 		if (nh->nh_oif != onh->nh_oif ||
 		    nh->nh_gw  != onh->nh_gw ||
 		    nh->nh_scope != onh->nh_scope ||
@@ -266,7 +269,10 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		    nh->nh_tclassid != onh->nh_tclassid ||
 #endif
-		    ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD))
+		    ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD) ||
+		    nh->nh_encap_len != onh->nh_encap_len ||
+		    memcmp(nh_encap, onh_encap, nh->nh_encap_len)
+			)
 			return -1;
 		onh++;
 	} endfor_nexthops(fi);
@@ -374,6 +380,11 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
 		/* may contain flow and gateway attribute */
 		nhsize += 2 * nla_total_size(4);
 
+		for_nexthops(fi) {
+			if (nh->nh_encap_len)
+				nhsize += nla_total_size(nh->nh_encap_len);
+		} endfor_nexthops(fi);
+
 		/* all nexthops are packed in a nested attribute */
 		payload += nla_total_size(fi->fib_nhs * nhsize);
 	}
@@ -434,6 +445,83 @@ static int fib_detect_death(struct fib_info *fi, int order,
 	return 1;
 }
 
+static int fib_total_encap(struct fib_config *cfg)
+{
+	struct net *net = cfg->fc_nlinfo.nl_net;
+	int total_encap_len = 0;
+
+	if (cfg->fc_mp) {
+		int remaining = cfg->fc_mp_len;
+		struct rtnexthop *rtnh = cfg->fc_mp;
+
+		while (rtnh_ok(rtnh, remaining)) {
+			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+			int attrlen;
+
+			attrlen = rtnh_attrlen(rtnh);
+			nla = nla_find(attrs, attrlen, RTA_ENCAP);
+			if (nla) {
+				struct net_device *dev;
+				int len;
+
+				dev = __dev_get_by_index(net,
+							 rtnh->rtnh_ifindex);
+				if (!dev)
+					return -EINVAL;
+
+				/* Determine space required */
+				len = rtnl_parse_encap(dev, nla, NULL);
+				if (len < 0)
+					return len;
+
+				total_encap_len += len;
+			}
+
+			rtnh = rtnh_next(rtnh, &remaining);
+		}
+	} else {
+		if (cfg->fc_encap) {
+			struct net_device *dev;
+			int len;
+
+			dev = __dev_get_by_index(net, cfg->fc_oif);
+			if (!dev)
+				return -EINVAL;
+
+			/* Determine space required */
+			len = rtnl_parse_encap(dev, cfg->fc_encap, NULL);
+			if (len < 0)
+				return len;
+
+			total_encap_len += len;
+		}
+	}
+
+	return total_encap_len;
+}
+
+static void *__fib_get_nh_encap(const struct fib_info *fi,
+				const struct fib_nh *the_nh)
+{
+	char *cur_encap_ptr = (char *)(fi->fib_nh + fi->fib_nhs);
+
+	for_nexthops(fi) {
+		if (nh == the_nh)
+			return cur_encap_ptr;
+		cur_encap_ptr += nh->nh_encap_len;
+	} endfor_nexthops(fi);
+
+	return NULL;
+}
+
+const void *fib_get_nh_encap(const struct fib_info *fi, const struct fib_nh *nh)
+{
+	if (!nh->nh_encap_len)
+		return NULL;
+
+	return __fib_get_nh_encap(fi, nh);
+}
+
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 
 static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
@@ -475,6 +563,26 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
 			if (nexthop_nh->nh_tclassid)
 				fi->fib_net->ipv4.fib_num_tclassid_users++;
 #endif
+			nla = nla_find(attrs, attrlen, RTA_ENCAP);
+			if (nla) {
+				struct net *net = cfg->fc_nlinfo.nl_net;
+				struct net_device *dev;
+				void *nh_encap;
+				int len;
+
+				dev = __dev_get_by_index(net,
+							 nexthop_nh->nh_oif);
+				if (!dev)
+					return -EINVAL;
+
+				nh_encap = __fib_get_nh_encap(fi, nexthop_nh);
+
+				/* Fill in nh encap */
+				len = rtnl_parse_encap(dev, nla, nh_encap);
+				if (len < 0)
+					return len;
+				nexthop_nh->nh_encap_len = len;
+			}
 		}
 
 		rtnh = rtnh_next(rtnh, &remaining);
@@ -495,6 +603,17 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
 	if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
 		return 1;
 
+	if (cfg->fc_encap) {
+		const void *nh_encap = fib_get_nh_encap(fi, fi->fib_nh);
+
+		if (!fi->fib_nh->nh_oif ||
+		    rtnl_match_encap(fi->fib_nh->nh_dev,
+				     cfg->fc_encap,
+				     fi->fib_nh->nh_encap_len,
+				     nh_encap))
+			return 1;
+	}
+
 	if (cfg->fc_oif || cfg->fc_gw) {
 		if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
 		    (!cfg->fc_gw  || cfg->fc_gw == fi->fib_nh->nh_gw))
@@ -530,6 +649,17 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
 			if (nla && nla_get_u32(nla) != nh->nh_tclassid)
 				return 1;
 #endif
+			nla = nla_find(attrs, attrlen, RTA_ENCAP);
+			if (nla) {
+				const void *nh_encap = fib_get_nh_encap(fi, nh);
+
+				if (!nh->nh_oif ||
+				    rtnl_match_encap(nh->nh_dev,
+						     cfg->fc_encap,
+						     nh->nh_encap_len,
+						     nh_encap))
+					return 1;
+			}
 		}
 
 		rtnh = rtnh_next(rtnh, &remaining);
@@ -760,6 +890,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 	struct fib_info *ofi;
 	int nhs = 1;
 	struct net *net = cfg->fc_nlinfo.nl_net;
+	int encap_len;
 
 	if (cfg->fc_type > RTN_MAX)
 		goto err_inval;
@@ -798,7 +929,14 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 			goto failure;
 	}
 
-	fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
+	encap_len = fib_total_encap(cfg);
+	if (encap_len < 0) {
+		err = encap_len;
+		goto failure;
+	}
+
+	fi = kzalloc(sizeof(*fi) + nhs * sizeof(struct fib_nh) + encap_len,
+		     GFP_KERNEL);
 	if (!fi)
 		goto failure;
 	fib_info_cnt++;
@@ -886,6 +1024,26 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 		nh->nh_weight = 1;
 #endif
+		if (cfg->fc_encap) {
+			struct net_device *dev;
+			void *nh_encap;
+			int len;
+
+			err = -EINVAL;
+			dev = __dev_get_by_index(net, nh->nh_oif);
+			if (!dev)
+				goto failure;
+
+			nh_encap = __fib_get_nh_encap(fi, nh);
+
+			/* Fill in nh encap */
+			len = rtnl_parse_encap(dev, cfg->fc_encap, nh_encap);
+			if (len < 0 || len > sizeof(nh->nh_encap_len) * 8) {
+				err = len;
+				goto failure;
+			}
+			nh->nh_encap_len = len;
+		}
 	}
 
 	if (fib_props[cfg->fc_type].error) {
@@ -1023,6 +1181,8 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 	    nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
 		goto nla_put_failure;
 	if (fi->fib_nhs == 1) {
+		const void *nh_encap;
+
 		if (fi->fib_nh->nh_gw &&
 		    nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
 			goto nla_put_failure;
@@ -1034,6 +1194,12 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 		    nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
 			goto nla_put_failure;
 #endif
+
+		nh_encap = fib_get_nh_encap(fi, &fi->fib_nh[0]);
+		if (nh_encap && rtnl_fill_encap(fi->fib_nh[0].nh_dev, skb,
+						fi->fib_nh[0].nh_encap_len,
+						nh_encap))
+			goto nla_put_failure;
 	}
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	if (fi->fib_nhs > 1) {
@@ -1045,6 +1211,8 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 			goto nla_put_failure;
 
 		for_nexthops(fi) {
+			const void *nh_encap;
+
 			rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
 			if (!rtnh)
 				goto nla_put_failure;
@@ -1061,6 +1229,13 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 			    nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
 				goto nla_put_failure;
 #endif
+
+			nh_encap = fib_get_nh_encap(fi, nh);
+			if (nh_encap && rtnl_fill_encap(nh->nh_dev, skb,
+							nh->nh_encap_len,
+							nh_encap))
+				goto nla_put_failure;
+
 			/* length of rtnetlink header + attributes */
 			rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
 		} endfor_nexthops(fi);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f6055984c307..d52fa3d168a5 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -110,6 +110,8 @@
 #endif
 #include <net/secure_seq.h>
 
+#include "fib_lookup.h"
+
 #define RT_FL_TOS(oldflp4) \
 	((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
 
@@ -138,6 +140,8 @@ static void		 ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
 					   struct sk_buff *skb, u32 mtu);
 static void		 ip_do_redirect(struct dst_entry *dst, struct sock *sk,
 					struct sk_buff *skb);
+static int		ipv4_dst_get_encap(const struct dst_entry *dst,
+					   const void **encap);
 static void		ipv4_dst_destroy(struct dst_entry *dst);
 
 static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
@@ -163,6 +167,7 @@ static struct dst_ops ipv4_dst_ops = {
 	.redirect =		ip_do_redirect,
 	.local_out =		__ip_local_out,
 	.neigh_lookup =		ipv4_neigh_lookup,
+	.get_encap =		ipv4_dst_get_encap,
 };
 
 #define ECN_OR_COST(class)	TC_PRIO_##class
@@ -1145,6 +1150,15 @@ static void ipv4_link_failure(struct sk_buff *skb)
 		dst_set_expires(&rt->dst, 0);
 }
 
+static int ipv4_dst_get_encap(const struct dst_entry *dst,
+			      const void **encap)
+{
+	const struct rtable *rt = (const struct rtable *)dst;
+
+	*encap = rt->rt_encap;
+	return rt->rt_encap_len;
+}
+
 static int ip_rt_bug(struct sock *sk, struct sk_buff *skb)
 {
 	pr_debug("%s: %pI4 -> %pI4, %s\n",
@@ -1394,6 +1408,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 
 	if (fi) {
 		struct fib_nh *nh = &FIB_RES_NH(*res);
+		const void *nh_encap;
 
 		if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
 			rt->rt_gateway = nh->nh_gw;
@@ -1403,6 +1418,15 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		rt->dst.tclassid = nh->nh_tclassid;
 #endif
+
+		nh_encap = fib_get_nh_encap(fi, nh);
+		if (unlikely(nh_encap)) {
+			rt->rt_encap = kmemdup(nh_encap, nh->nh_encap_len,
+					       GFP_KERNEL);
+			if (rt->rt_encap)
+				rt->rt_encap_len = nh->nh_encap_len;
+		}
+
 		if (unlikely(fnhe))
 			cached = rt_bind_exception(rt, fnhe, daddr);
 		else if (!(rt->dst.flags & DST_NOCACHE))
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ