lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20110505.163614.212671515.davem@davemloft.net>
Date:	Thu, 05 May 2011 16:36:14 -0700 (PDT)
From:	David Miller <davem@...emloft.net>
To:	netdev@...r.kernel.org
CC:	tgraf@...g.ch, jpirko@...hat.com, herbert@...dor.apana.org.au,
	eric.dumazet@...il.com
Subject: [PATCH v6 BONUS 4/3] ipv4: Store rtable entries directly in FIB


Ok, here is the fun patch showing the scheme I'm working on.  Two
things going on here.

First, we store pre-constructed rtable entries, on demand, inside of
the routing table objects themselves.

Second, we get rid of RT_TABLE_LOCAL and load all routes equally
into RT_TABLE_MAIN.

Signed-off-by: David S. Miller <davem@...emloft.net>

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 10422ef..f3c9598 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -44,6 +44,7 @@ struct fib_config {
  };
 
 struct fib_info;
+struct rtable;
 
 struct fib_nh {
 	struct net_device	*nh_dev;
@@ -62,6 +63,7 @@ struct fib_nh {
 	__be32			nh_gw;
 	__be32			nh_saddr;
 	int			nh_saddr_genid;
+	struct rtable		*nh_rtable;
 };
 
 /*
@@ -200,10 +202,6 @@ static inline int fib_lookup(struct net *net, const struct flowi4 *flp,
 {
 	struct fib_table *table;
 
-	table = fib_get_table(net, RT_TABLE_LOCAL);
-	if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF))
-		return 0;
-
 	table = fib_get_table(net, RT_TABLE_MAIN);
 	if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF))
 		return 0;
diff --git a/include/net/route.h b/include/net/route.h
index 70155fb..04e7197 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -109,6 +109,7 @@ extern int		ip_rt_init(void);
 extern void		ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw,
 				       __be32 src, struct net_device *dev);
 extern void		rt_cache_flush(struct net *net, int how);
+extern struct rtable *ip_route_output_new(struct net *, struct flowi4 *flp);
 extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp);
 extern struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
 					   struct sock *sk);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 33bbbda..24e67d8 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -155,7 +155,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net,
 	res.r = NULL;
 #endif
 
-	local_table = fib_get_table(net, RT_TABLE_LOCAL);
+	local_table = fib_get_table(net, RT_TABLE_MAIN);
 	if (local_table) {
 		ret = RTN_UNICAST;
 		rcu_read_lock();
@@ -662,11 +662,7 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad
 		},
 	};
 
-	if (type == RTN_UNICAST)
-		tb = fib_new_table(net, RT_TABLE_MAIN);
-	else
-		tb = fib_new_table(net, RT_TABLE_LOCAL);
-
+	tb = fib_new_table(net, RT_TABLE_MAIN);
 	if (tb == NULL)
 		return;
 
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 641a5a2..c37ebd3 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -148,6 +148,10 @@ static void free_fib_info_rcu(struct rcu_head *head)
 {
 	struct fib_info *fi = container_of(head, struct fib_info, rcu);
 
+	change_nexthops(fi) {
+		ip_rt_put(nexthop_nh->nh_rtable);
+		nexthop_nh->nh_rtable = NULL;
+	} endfor_nexthops(fi);
 	if (fi->fib_metrics != (u32 *) dst_default_metrics)
 		kfree(fi->fib_metrics);
 	kfree(fi);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 1e67624..2f77d28 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1861,6 +1861,68 @@ out:
 }
 EXPORT_SYMBOL_GPL(__ip_route_output_key);
 
+struct rtable *ip_route_output_new(struct net *net, struct flowi4 *fl4)
+{
+	struct net_device *dev_out = NULL;
+	u32 tos	= RT_FL_TOS(fl4);
+	unsigned int flags = 0;
+	struct fib_result res;
+	struct rtable *rth;
+	int orig_oif;
+
+	res.fi = NULL;
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	res.r = NULL;
+#endif
+
+	orig_oif = fl4->flowi4_oif;
+
+	fl4->flowi4_iif = net->loopback_dev->ifindex;
+	fl4->flowi4_tos = tos & IPTOS_RT_MASK;
+	fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
+			 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
+
+	rcu_read_lock();
+	if (fib_lookup(net, fl4, &res)) {
+		rth = ERR_PTR(-ENETUNREACH);
+		goto out;
+	}
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
+		fib_select_multipath(&res);
+	else
+#endif
+	if (!res.prefixlen && res.table->tb_num_default > 1 &&
+	    res.type == RTN_UNICAST && !fl4->flowi4_oif)
+		fib_select_default(&res);
+
+	if (!fl4->saddr)
+		fl4->saddr = FIB_RES_PREFSRC(net, res);
+
+	dev_out = FIB_RES_DEV(res);
+	fl4->flowi4_oif = dev_out->ifindex;
+
+	rth = FIB_RES_NH(res).nh_rtable;
+	if (!rth) {
+		if (res.type == RTN_LOCAL)
+			flags |= RTCF_LOCAL;
+		rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
+		if (!IS_ERR(rth))
+			rth = rt_finalize(rth, NULL);
+		if (!IS_ERR(rth))
+			FIB_RES_NH(res).nh_rtable = rth;
+	}
+
+	if (!IS_ERR(rth))
+		atomic_inc(&rth->dst.__refcnt);
+
+out:
+	rcu_read_unlock();
+	return rth;
+}
+EXPORT_SYMBOL_GPL(ip_route_output_new);
+
 static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
 {
 	return NULL;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 544f435..9bb827e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -929,7 +929,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 				   faddr, saddr, dport, inet->inet_sport);
 
 		security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
-		rt = ip_route_output_flow(net, &fl4, sk);
+		rt = ip_route_output_new(net, &fl4);
 		if (IS_ERR(rt)) {
 			err = PTR_ERR(rt);
 			rt = NULL;
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ