netdev - [PATCH RFC] IPv6: RTM_GETROUTE NLM_F_MATCH handled as stated in RFC 3549

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <1325080915.26559.43.camel@hakki>
Date:	Wed, 28 Dec 2011 16:01:55 +0200
From:	Matti Vaittinen <matti.vaittinen@....com>
To:	netdev@...r.kernel.org, davem@...emloft.net
Subject: [PATCH RFC] IPv6: RTM_GETROUTE NLM_F_MATCH handled as stated in RFC
 3549

Hi Dee Ho!

RFC 3549 states:

Additional flag bits for GET requests on config information in
   the FEC.
          NLM_F_ROOT     Return the complete table instead of a
                         single entry.
          NLM_F_MATCH    Return all entries matching criteria passed in
                         message content.
          NLM_F_ATOMIC   Return an atomic snapshot of the table being
                         referenced.  This may require special
                         privileges because it has the potential to
                         interrupt service in the FE for a longer time.

   Convenience macros for flag bits:
          NLM_F_DUMP     This is NLM_F_ROOT or'ed with NLM_F_MATCH

However, currently requests with NLM_F_ROOT or NLM_F_MATCH or both 
(NLM_F_DUMP) specified will return all the (routing) entries.


To me it sounds that the NLM_F_MATCH was originally meant to
allow user to ask only entries (routes) fulfilling conditions given in 
request. This would further simplify userland applications which need 
to get only certain entries - and I believe that is often the case. 
I believe the current operation which differs from RFC makes netlink 
socket usage even more confusing. (There really is not too much 
up-to-date documentation telling how these requests+all the attributes
work).

This patch makes ipv6 module to return only routes which match 
attributes / filled fields in RTM_GETROUTE, if NLM_F_MATCH is 
specified and NLM_F_ROOT is not. This patch has not been tested, 
and is meant more to be for visualization of what I thought of doing.
If the NLM_F_MATCH support is considered to be good idea, then I 
will check this more thoroughly and send another patch.

I assume this would not break *many* existing userspace applications, 
since specifying NLM_F_MATCH (especially with no NLM_F_ROOT) sounds 
pretty stupid - if no entries should be filtered.

I checked iproute2, and it uses NLM_F_DUMP and does filtering entries 
in userspace - thus it is not affected. 

I guess this same idea could be brought in RTM_GETADDR and RTM_GETLINK 
too? Maybe also on IPv4 side? 

Any comments? 


















This patch demonstrates how RTM_GETROUTE request behaviour for IPv6 
could be changed to better match RFC 3549 when NLM_F_MATCH flag is 
specified.

"NLM_F_MATCH    Return all entries matching criteria passed in
                         message content."

Signed-off-by: Matti Vaittinen <Mazziesaccount@...il.com>
---

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 2ad92ca..27313ad 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -139,12 +139,21 @@ extern void			rt6_pmtu_discovery(const struct in6_addr *daddr,
 struct netlink_callback;
 
 struct rt6_rtnl_dump_arg {
+	long maxtbl;
 	struct sk_buff *skb;
 	struct netlink_callback *cb;
 	struct net *net;
 };
+struct rt6_rtnl_match_arg {
+	long maxtbl;
+	struct sk_buff *skb;
+	struct netlink_callback *cb;
+	struct net *net;
+	struct fib6_config *cfg;
+};
 
 extern int rt6_dump_route(struct rt6_info *rt, void *p_arg);
+extern int rt6_dump_route_if_match(struct rt6_info *rt, void *p_arg);
 extern void rt6_ifdown(struct net *net, struct net_device *dev);
 extern void rt6_mtu_change(struct net_device *dev, unsigned mtu);
 extern void rt6_remove_prefsrc(struct inet6_ifaddr *ifp);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 2783631..13bea56 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -268,7 +268,26 @@ static void __net_init fib6_tables_init(struct net *net)
 }
 
 #endif
+static int fib6_dump_node_if_match(struct fib6_walker_t *w)
+{
+	int res;
+	struct rt6_info *rt;
+
+	for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
+		res = rt6_dump_route_if_match(rt, w->args);
+		if (res < 0) {
+			/* Frame is full or request was not parsed correctly.
+			 * Anyways, suspend walking
+			 */
+			w->leaf = rt;
+			return 1;
+		}
+		WARN_ON(res == 0);
+	}
+	w->leaf = NULL;
+	return 0;
 
+}
 static int fib6_dump_node(struct fib6_walker_t *w)
 {
 	int res;
@@ -292,11 +311,14 @@ static void fib6_dump_end(struct netlink_callback *cb)
 	struct fib6_walker_t *w = (void*)cb->args[2];
 
 	if (w) {
+		if (w->func == fib6_dump_node_if_match)
+			kfree(((struct rt6_rtnl_match_arg *)w->args)->cfg);
 		if (cb->args[4]) {
 			cb->args[4] = 0;
 			fib6_walker_unlink(w);
 		}
 		cb->args[2] = 0;
+		kfree(w->args);
 		kfree(w);
 	}
 	cb->done = (void*)cb->args[3];
@@ -356,16 +378,13 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 	struct net *net = sock_net(skb->sk);
 	unsigned int h, s_h;
 	unsigned int e = 0, s_e;
-	struct rt6_rtnl_dump_arg arg;
+	struct rt6_rtnl_dump_arg *arg;
 	struct fib6_walker_t *w;
 	struct fib6_table *tb;
 	struct hlist_node *node;
 	struct hlist_head *head;
 	int res = 0;
 
-	s_h = cb->args[0];
-	s_e = cb->args[1];
-
 	w = (void *)cb->args[2];
 	if (!w) {
 		/* New dump:
@@ -381,17 +400,57 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 		w = kzalloc(sizeof(*w), GFP_ATOMIC);
 		if (!w)
 			return -ENOMEM;
-		w->func = fib6_dump_node;
+		if (cb->nlh && (cb->nlh->nlmsg_flags & NLM_F_MATCH) &&
+		    !(cb->nlh->nlmsg_flags & NLM_F_ROOT) &&
+		    nlmsg_len(cb->nlh) >= sizeof(struct rtmsg)) {
+
+			struct rtmsg *rtm = nlmsg_data(cb->nlh);
+
+			cb->done = fib6_dump_done;
+			w->args = kzalloc(sizeof(struct rt6_rtnl_match_arg), GFP_ATOMIC);
+			if (!w->args) {
+				kfree(w);
+				return -ENOMEM;
+			}
+			((struct rt6_rtnl_match_arg *)w->args)->cfg =
+			    kzalloc(sizeof(struct fib6_config), GFP_ATOMIC);
+
+			if (!((struct rt6_rtnl_match_arg *)w->args)->cfg) {
+				kfree(w->args);
+				kfree(w);
+				return -ENOMEM;
+			}
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+			if (rtm->rtm_table <= FIB6_TABLE_HASHSZ) {
+				cb->args[0] = rtm->rtm_table;
+				((struct rt6_rtnl_match_arg *)w->args)->maxtbl =
+				    (rtm->rtm_table) ? rtm->rtm_table+1 : FIB6_TABLE_HASHSZ;
+			}
+#endif
+
+			((struct rt6_rtnl_match_arg *)w->args)->cfg->fc_table = 0xFFFFFFFF;
+			w->func = fib6_dump_node_if_match;
+		} else {
+			w->func = fib6_dump_node;
+			w->args = kzalloc(sizeof(struct rt6_rtnl_dump_arg), GFP_ATOMIC);
+			if (!w->args) {
+				kfree(w);
+				return -ENOMEM;
+			}
+			((struct rt6_rtnl_dump_arg *)w->args)->maxtbl = FIB6_TABLE_HASHSZ;
+		}
 		cb->args[2] = (long)w;
 	}
+	arg = (struct rt6_rtnl_dump_arg *)w->args;
 
-	arg.skb = skb;
-	arg.cb = cb;
-	arg.net = net;
-	w->args = &arg;
+	s_h = cb->args[0];
+	s_e = cb->args[1];
+	arg->skb = skb;
+	arg->cb = cb;
+	arg->net = net;
 
 	rcu_read_lock();
-	for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
+	for (h = s_h; h < arg->maxtbl; h++, s_e = 0) {
 		e = 0;
 		head = &net->ipv6.fib_table_hash[h];
 		hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 35b07cc..a71d44d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2350,7 +2350,154 @@ static inline size_t rt6_nlmsg_size(void)
 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
 	       + nla_total_size(sizeof(struct rta_cacheinfo));
 }
+static int rt6_cmp_fill_node(struct net *net,
+		     struct sk_buff *skb, struct rt6_info *rt,
+		     u32 pid,
+		     int prefix, struct fib6_config *cfg)
+{
+	struct rtmsg *rtm;
+	struct nlmsghdr *nlh;
+	long expires;
+	u32 table;
+	struct neighbour *n = NULL;
+	u32 seq;
+	struct in6_addr gwaddr;
+
+
+	if (prefix) {	/* user wants prefix routes only */
+		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
+			/* success since this is not a prefix route */
+			return 1;
+		}
+	}
+	if (cfg->fc_dst_len && cfg->fc_dst_len != rt->rt6i_dst.plen)
+		return 1;
+
+	if (cfg->fc_src_len && cfg->fc_src_len != rt->rt6i_src.plen)
+		return 1;
+	cfg->fc_flags = ((~RTF_UP)&cfg->fc_flags);
+	if (cfg->fc_flags && cfg->fc_flags != rt->rt6i_flags)
+		return 1;
+
+	if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
+		return 1;
+
+	if (cfg->fc_flags & RTF_GATEWAY) {
+		rcu_read_lock();
+		n = dst_get_neighbour_noref(&rt->dst);
+		if (!n) {
+			rcu_read_unlock();
+			return 1;
+		}
+		gwaddr = *(struct in6_addr *)&(n->primary_key);
+		rcu_read_unlock();
+		if (memcmp(&gwaddr, &cfg->fc_gateway, 16))
+			return 1;
+	}
+
+	if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
+		return 1;
+
+	if (cfg->fc_ifindex)
+	   if (!rt->dst.dev || rt->rt6i_dev->ifindex != cfg->fc_ifindex)
+			return 1;
+
+	if (memcmp(&in6addr_any, &cfg->fc_dst, sizeof(struct in6_addr)))
+		if (memcmp(&cfg->fc_dst, &rt->rt6i_dst.addr, sizeof(struct in6_addr)))
+			return 1;
+
+#ifdef CONFIG_IPV6_SUBTREES
+	if (memcmp(&in6addr_any, &cfg->fc_src, sizeof(struct in6_addr)))
+		if (memcmp(&cfg->fc_src, &rt->rt6i_src.addr, sizeof(struct in6_addr)))
+			return 1;
+#endif
+	if (memcmp(&in6addr_any, &cfg->fc_prefsrc, sizeof(struct in6_addr)))
+		if (!rt->rt6i_prefsrc.plen ||
+		    memcmp(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc, sizeof(struct in6_addr)))
+			return 1;
+	/* All values checked, now allocate and fill NLMSG */
+	seq = cfg->fc_nlinfo.nlh->nlmsg_seq;
+
+	nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	rtm = nlmsg_data(nlh);
+	rtm->rtm_family = AF_INET6;
+	rtm->rtm_dst_len = rt->rt6i_dst.plen;
+	rtm->rtm_src_len = rt->rt6i_src.plen;
+	rtm->rtm_tos = 0;
+	if (rt->rt6i_table)
+		table = rt->rt6i_table->tb6_id;
+	else
+		table = RT6_TABLE_UNSPEC;
+	rtm->rtm_table = table;
+	NLA_PUT_U32(skb, RTA_TABLE, table);
+	if (rt->rt6i_flags&RTF_REJECT)
+		rtm->rtm_type = RTN_UNREACHABLE;
+	else if (rt->rt6i_flags&RTF_LOCAL)
+		rtm->rtm_type = RTN_LOCAL;
+	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
+		rtm->rtm_type = RTN_LOCAL;
+	else
+		rtm->rtm_type = RTN_UNICAST;
+	rtm->rtm_flags = 0;
+	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
+	rtm->rtm_protocol = rt->rt6i_protocol;
+	if (rt->rt6i_flags&RTF_DYNAMIC)
+		rtm->rtm_protocol = RTPROT_REDIRECT;
+	else if (rt->rt6i_flags & RTF_ADDRCONF)
+		rtm->rtm_protocol = RTPROT_KERNEL;
+	else if (rt->rt6i_flags&RTF_DEFAULT)
+		rtm->rtm_protocol = RTPROT_RA;
+
+	if (rt->rt6i_flags&RTF_CACHE)
+		rtm->rtm_flags |= RTM_F_CLONED;
+
+	if (rtm->rtm_dst_len)
+		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
+#ifdef CONFIG_IPV6_SUBTREES
+	if (rtm->rtm_src_len)
+		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
+#endif
+	if (rt->rt6i_prefsrc.plen) {
+		struct in6_addr saddr_buf;
+		saddr_buf = rt->rt6i_prefsrc.addr;
+		NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
+	}
 
+	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
+		goto nla_put_failure;
+
+	rcu_read_lock();
+	n = dst_get_neighbour_noref(&rt->dst);
+	if (n)
+		NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
+	rcu_read_unlock();
+
+	if (rt->dst.dev)
+		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
+
+	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
+
+	if (!(rt->rt6i_flags & RTF_EXPIRES))
+		expires = 0;
+	else if (rt->rt6i_expires - jiffies < INT_MAX)
+		expires = rt->rt6i_expires - jiffies;
+	else
+		expires = INT_MAX;
+
+	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
+			       expires, rt->dst.error) < 0)
+		goto nla_put_failure;
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+
+}
 static int rt6_fill_node(struct net *net,
 			 struct sk_buff *skb, struct rt6_info *rt,
 			 struct in6_addr *dst, struct in6_addr *src,
@@ -2478,7 +2625,34 @@ nla_put_failure:
 	nlmsg_cancel(skb, nlh);
 	return -EMSGSIZE;
 }
+int rt6_dump_route_if_match(struct rt6_info *rt, void *p_arg)
+{
+	struct rt6_rtnl_match_arg *arg = (struct rt6_rtnl_match_arg *) p_arg;
+	int prefix;
+	int err;
 
+	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
+		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
+		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
+	} else
+		prefix = 0;
+	/* this is a bit of a hack. But we do not want to
+	 * evaluate config struct for same nlmsg
+	 * for each route we fetch from fib.
+	 * It's enough to do it once.
+	 */
+	if (0xFFFFFFFF == arg->cfg->fc_table) {
+		arg->cfg->fc_table = 0;
+		err = rtm_to_fib6_config(arg->cb->skb,
+		    (struct nlmsghdr *)arg->cb->nlh, arg->cfg);
+		if (err)
+			return err;
+	}
+	return rt6_cmp_fill_node(arg->net,
+		     arg->skb, rt,
+		     NETLINK_CB(arg->cb->skb).pid,
+		     prefix, arg->cfg);
+}
 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 {
 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
@@ -2489,7 +2663,6 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
 	} else
 		prefix = 0;
-
 	return rt6_fill_node(arg->net,
 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,




--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html