lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <592c763828171c414e8927878b1a22027e33dee7.1574071944.git.pabeni@redhat.com>
Date:   Mon, 18 Nov 2019 12:01:30 +0100
From:   Paolo Abeni <pabeni@...hat.com>
To:     netdev@...r.kernel.org
Cc:     "David S. Miller" <davem@...emloft.net>,
        Willem de Bruijn <willemdebruijn.kernel@...il.com>,
        Edward Cree <ecree@...arflare.com>
Subject: [PATCH net-next v2 2/2] ipv4: use dst hint for ipv4 list receive

This is alike the previous change, with some additional ipv4 specific
quirk. Even when using the route hint we still have to do perform
additional per packet checks about source address validity: a new
helper is added to wrap them.

Moreover, the ipv4 route lookup, even in the absence of policy routing,
may depend on pkts ToS, so we cache that values, too.

Explicitly avoid hints for local broadcast: this simplify the code
and broadcasts are slower path anyway.

UDP flood performances vs recvmmsg() receiver:

vanilla		patched		delta
Kpps		Kpps		%
1683		1833		+8

In the worst case scenario - each packet has a different
destination address - the performance delta is within noise
range.

v1 -> v2:
 - fix build issue with !CONFIG_IP_MULTIPLE_TABLES

Signed-off-by: Paolo Abeni <pabeni@...hat.com>
---
 include/net/route.h | 11 +++++++++++
 net/ipv4/ip_input.c | 38 +++++++++++++++++++++++++++++++++-----
 net/ipv4/route.c    | 38 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 82 insertions(+), 5 deletions(-)

diff --git a/include/net/route.h b/include/net/route.h
index 6c516840380d..f7a8a52318cd 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -185,6 +185,17 @@ int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src,
 		       u8 tos, struct net_device *devin,
 		       struct fib_result *res);
 
+struct ip_route_input_hint {
+	unsigned long	refdst;
+	__be32		daddr;
+	char		tos;
+	bool		local;
+};
+
+int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src,
+		      u8 tos, struct net_device *devin,
+		      struct ip_route_input_hint *hint);
+
 static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
 				 u8 tos, struct net_device *devin)
 {
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 24a95126e698..25f6fcc65380 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -305,7 +305,8 @@ static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev)
 INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *));
 INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *));
 static int ip_rcv_finish_core(struct net *net, struct sock *sk,
-			      struct sk_buff *skb, struct net_device *dev)
+			      struct sk_buff *skb, struct net_device *dev,
+			      struct ip_route_input_hint *hint)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	int (*edemux)(struct sk_buff *skb);
@@ -335,8 +336,12 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
 	 *	how the packet travels inside Linux networking.
 	 */
 	if (!skb_valid_dst(skb)) {
-		err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
-					   iph->tos, dev);
+		if (hint && hint->daddr == iph->daddr && hint->tos == iph->tos)
+			err = ip_route_use_hint(skb, iph->daddr, iph->saddr,
+						iph->tos, dev, hint);
+		else
+			err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
+						   iph->tos, dev);
 		if (unlikely(err))
 			goto drop_error;
 	}
@@ -408,7 +413,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 	if (!skb)
 		return NET_RX_SUCCESS;
 
-	ret = ip_rcv_finish_core(net, sk, skb, dev);
+	ret = ip_rcv_finish_core(net, sk, skb, dev, NULL);
 	if (ret != NET_RX_DROP)
 		ret = dst_input(skb);
 	return ret;
@@ -535,9 +540,20 @@ static void ip_sublist_rcv_finish(struct list_head *head)
 	}
 }
 
+static bool ip_can_cache_route_hint(struct net *net, struct rtable *rt)
+{
+	return rt->rt_type != RTN_BROADCAST &&
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	       !net->ipv6.fib6_has_custom_rules;
+#else
+	       1;
+#endif
+}
+
 static void ip_list_rcv_finish(struct net *net, struct sock *sk,
 			       struct list_head *head)
 {
+	struct ip_route_input_hint _hint, *hint = NULL;
 	struct dst_entry *curr_dst = NULL;
 	struct sk_buff *skb, *next;
 	struct list_head sublist;
@@ -554,11 +570,23 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
 		skb = l3mdev_ip_rcv(skb);
 		if (!skb)
 			continue;
-		if (ip_rcv_finish_core(net, sk, skb, dev) == NET_RX_DROP)
+		if (ip_rcv_finish_core(net, sk, skb, dev, hint) == NET_RX_DROP)
 			continue;
 
 		dst = skb_dst(skb);
 		if (curr_dst != dst) {
+			struct rtable *rt = (struct rtable *)dst;
+
+			if (ip_can_cache_route_hint(net, rt)) {
+				_hint.refdst = skb->_skb_refdst;
+				_hint.daddr = ip_hdr(skb)->daddr;
+				_hint.tos = ip_hdr(skb)->tos;
+				_hint.local = rt->rt_type == RTN_LOCAL;
+				hint = &_hint;
+			} else {
+				hint = NULL;
+			}
+
 			/* dispatch old sublist */
 			if (!list_empty(&sublist))
 				ip_sublist_rcv_finish(&sublist);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index dcc4fa10138d..b0ddff17db80 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2019,6 +2019,44 @@ static int ip_mkroute_input(struct sk_buff *skb,
 	return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
 }
 
+/* Implements all the saddr-related checks as ip_route_input_slow(),
+ * assuming daddr is valid and this is not a local broadcast.
+ * Uses the provided hint instead of performing a route lookup.
+ */
+int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+		      u8 tos, struct net_device *dev,
+		      struct ip_route_input_hint *hint)
+{
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
+	struct net *net = dev_net(dev);
+	int err = -EINVAL;
+	u32 itag = 0;
+
+	if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
+		goto martian_source;
+
+	if (ipv4_is_zeronet(saddr))
+		goto martian_source;
+
+	if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
+		goto martian_source;
+
+	if (hint->local) {
+		err = fib_validate_source(skb, saddr, daddr, tos, 0, dev,
+					  in_dev, &itag);
+		if (err < 0)
+			goto martian_source;
+	}
+
+	err = 0;
+	__skb_dst_copy(skb, hint->refdst);
+	return err;
+
+martian_source:
+	ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
+	return err;
+}
+
 /*
  *	NOTE. We drop all the packets that has local source
  *	addresses, because every properly looped back packet
-- 
2.21.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ