lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20181023121000.GP3823@gauss3.secunet.de>
Date:   Tue, 23 Oct 2018 14:10:00 +0200
From:   Steffen Klassert <steffen.klassert@...unet.com>
To:     Paolo Abeni <pabeni@...hat.com>
CC:     <netdev@...r.kernel.org>, Willem de Bruijn <willemb@...gle.com>
Subject: Re: [RFC PATCH v2 00/10] udp: implement GRO support

On Fri, Oct 19, 2018 at 04:25:10PM +0200, Paolo Abeni wrote:
> This series implements GRO support for UDP sockets, as the RX counterpart
> of commit bec1f6f69736 ("udp: generate gso with UDP_SEGMENT").
> The core functionality is implemented by the second patch, introducing a new
> sockopt to enable UDP_GRO, while patch 3 implements support for passing the
> segment size to the user space via a new cmsg.
> UDP GRO performs a socket lookup for each ingress packets and aggregate datagram
> directed to UDP GRO enabled sockets with constant l4 tuple.
> 
> UDP GRO packets can land on non GRO-enabled sockets, e.g. due to iptables NAT
> rules, and that could potentially confuse existing applications.
> 
> The solution adopted here is to de-segment the GRO packet before enqueuing
> as needed. Since we must cope with packet reinsertion after de-segmentation,
> the relevant code is factored-out in ipv4 and ipv6 specific helpers and exposed
> to UDP usage.
> 
> While the current code can probably be improved, this safeguard ,implemented in
> the patches 4-7, allows future enachements to enable UDP GSO offload on more
> virtual devices eventually even on forwarded packets.

I was curious what I get with this when doing packet forwarding.
So I added forwarding support with the patch below (on top of
your patchset). While the packet processing could work the way
I do it in this patch, I'm not sure about the way I enable
UDP GRO on forwarding. Maybe there it a better way than just
do UDP GRO if forwarding is enabled on the receiving device.

Some quick benchmark numbers with UDP packet forwarding
(1460 byte packets) through two gateways:

net-next: 16.4 Gbps

net-next + UDP GRO: 20.3 Gbps

Subject: [PATCH RFC] udp: Allow gro for the forwarding path.

This patch adds a early route lookup to inet_gro_receive()
in case forwarding is enabled on the receiving device.
To be forwarded packets are allowed to enter the UDP
GRO handlers then.

Signed-off-by: Steffen Klassert <steffen.klassert@...unet.com>
---
 include/linux/netdevice.h |  2 +-
 include/net/dst.h         |  1 +
 net/core/dev.c            |  6 ++++--
 net/ipv4/af_inet.c        | 12 ++++++++++++
 net/ipv4/route.c          |  1 +
 net/ipv4/udp_offload.c    | 11 +++++++++--
 6 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index dc1d9ed33b31..2eb3fa960ad4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2304,7 +2304,7 @@ struct napi_gro_cb {
 	/* Number of gro_receive callbacks this packet already went through */
 	u8 recursion_counter:4;
 
-	/* 1 bit hole */
+	u8	is_fwd:1;
 
 	/* used to support CHECKSUM_COMPLETE for tunneling protocols */
 	__wsum	csum;
diff --git a/include/net/dst.h b/include/net/dst.h
index 6cf0870414c7..01bdf825a6f9 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -54,6 +54,7 @@ struct dst_entry {
 #define DST_XFRM_TUNNEL		0x0020
 #define DST_XFRM_QUEUE		0x0040
 #define DST_METADATA		0x0080
+#define DST_FWD			0x0100
 
 	/* A non-zero value of dst->obsolete forces by-hand validation
 	 * of the route entry.  Positive values are set by the generic
diff --git a/net/core/dev.c b/net/core/dev.c
index 022ad73d6253..c6aaffb99456 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5387,8 +5387,10 @@ static struct list_head *gro_list_prepare(struct napi_struct *napi,
 
 		diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
 		diffs |= p->vlan_tci ^ skb->vlan_tci;
-		diffs |= skb_metadata_dst_cmp(p, skb);
-		diffs |= skb_metadata_differs(p, skb);
+		if (!NAPI_GRO_CB(p)->is_fwd) {
+			diffs |= skb_metadata_dst_cmp(p, skb);
+			diffs |= skb_metadata_differs(p, skb);
+		}
 		if (maclen == ETH_HLEN)
 			diffs |= compare_ether_header(skb_mac_header(p),
 						      skb_mac_header(skb));
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1fbe2f815474..6e4e8588c0b1 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1479,8 +1479,20 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
 			NAPI_GRO_CB(p)->flush_id = flush_id;
 		else
 			NAPI_GRO_CB(p)->flush_id |= flush_id;
+
+		NAPI_GRO_CB(skb)->is_fwd = NAPI_GRO_CB(p)->is_fwd;
+
+		goto found;
 	}
 
+	if (IN_DEV_FORWARD(__in_dev_get_rcu(skb->dev))) {
+		if (!ip_route_input_noref(skb, iph->daddr, iph->saddr, iph->tos,
+					  skb->dev)) {
+			if ((skb_dst(skb)->flags & DST_FWD))
+				NAPI_GRO_CB(skb)->is_fwd = 1;
+		}
+	}
+found:
 	NAPI_GRO_CB(skb)->is_atomic = !!(iph->frag_off & htons(IP_DF));
 	NAPI_GRO_CB(skb)->flush |= flush;
 	skb_set_network_header(skb, off);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c0a9d26c06ce..3e5b3f0be0f0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1757,6 +1757,7 @@ static int __mkroute_input(struct sk_buff *skb,
 	rth->rt_is_input = 1;
 	RT_CACHE_STAT_INC(in_slow_tot);
 
+	rth->dst.flags |= DST_FWD;
 	rth->dst.input = ip_forward;
 
 	rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index d93c1e8097ba..c99f117bc44e 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -402,6 +402,12 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
 	struct sock *sk;
 
 	rcu_read_lock();
+	if (NAPI_GRO_CB(skb)->is_fwd) {
+		pp = call_gro_receive(udp_gro_receive_segment, head, skb);
+		rcu_read_unlock();
+		return pp;
+	}
+
 	sk = (*lookup)(skb, uh->source, uh->dest);
 	if (!sk)
 		goto out_unlock;
@@ -456,7 +462,8 @@ static struct sk_buff *udp4_gro_receive(struct list_head *head,
 {
 	struct udphdr *uh = udp_gro_udphdr(skb);
 
-	if (unlikely(!uh) || !static_branch_unlikely(&udp_encap_needed_key))
+	if (unlikely(!uh) || (!static_branch_unlikely(&udp_encap_needed_key) &&
+	    !NAPI_GRO_CB(skb)->is_fwd))
 		goto flush;
 
 	/* Don't bother verifying checksum if we're going to flush anyway. */
@@ -503,7 +510,7 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
 
 	rcu_read_lock();
 	sk = (*lookup)(skb, uh->source, uh->dest);
-	if (sk && udp_sk(sk)->gro_enabled) {
+	if ((sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_fwd) {
 		err = udp_gro_complete_segment(skb);
 	} else if (sk && udp_sk(sk)->gro_complete) {
 		skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM
-- 
2.17.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ