[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20181023121000.GP3823@gauss3.secunet.de>
Date: Tue, 23 Oct 2018 14:10:00 +0200
From: Steffen Klassert <steffen.klassert@...unet.com>
To: Paolo Abeni <pabeni@...hat.com>
CC: <netdev@...r.kernel.org>, Willem de Bruijn <willemb@...gle.com>
Subject: Re: [RFC PATCH v2 00/10] udp: implement GRO support
On Fri, Oct 19, 2018 at 04:25:10PM +0200, Paolo Abeni wrote:
> This series implements GRO support for UDP sockets, as the RX counterpart
> of commit bec1f6f69736 ("udp: generate gso with UDP_SEGMENT").
> The core functionality is implemented by the second patch, introducing a new
> sockopt to enable UDP_GRO, while patch 3 implements support for passing the
> segment size to the user space via a new cmsg.
> UDP GRO performs a socket lookup for each ingress packets and aggregate datagram
> directed to UDP GRO enabled sockets with constant l4 tuple.
>
> UDP GRO packets can land on non GRO-enabled sockets, e.g. due to iptables NAT
> rules, and that could potentially confuse existing applications.
>
> The solution adopted here is to de-segment the GRO packet before enqueuing
> as needed. Since we must cope with packet reinsertion after de-segmentation,
> the relevant code is factored-out in ipv4 and ipv6 specific helpers and exposed
> to UDP usage.
>
> While the current code can probably be improved, this safeguard ,implemented in
> the patches 4-7, allows future enachements to enable UDP GSO offload on more
> virtual devices eventually even on forwarded packets.
I was curious what I get with this when doing packet forwarding.
So I added forwarding support with the patch below (on top of
your patchset). While the packet processing could work the way
I do it in this patch, I'm not sure about the way I enable
UDP GRO on forwarding. Maybe there it a better way than just
do UDP GRO if forwarding is enabled on the receiving device.
Some quick benchmark numbers with UDP packet forwarding
(1460 byte packets) through two gateways:
net-next: 16.4 Gbps
net-next + UDP GRO: 20.3 Gbps
Subject: [PATCH RFC] udp: Allow gro for the forwarding path.
This patch adds a early route lookup to inet_gro_receive()
in case forwarding is enabled on the receiving device.
To be forwarded packets are allowed to enter the UDP
GRO handlers then.
Signed-off-by: Steffen Klassert <steffen.klassert@...unet.com>
---
include/linux/netdevice.h | 2 +-
include/net/dst.h | 1 +
net/core/dev.c | 6 ++++--
net/ipv4/af_inet.c | 12 ++++++++++++
net/ipv4/route.c | 1 +
net/ipv4/udp_offload.c | 11 +++++++++--
6 files changed, 28 insertions(+), 5 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index dc1d9ed33b31..2eb3fa960ad4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2304,7 +2304,7 @@ struct napi_gro_cb {
/* Number of gro_receive callbacks this packet already went through */
u8 recursion_counter:4;
- /* 1 bit hole */
+ u8 is_fwd:1;
/* used to support CHECKSUM_COMPLETE for tunneling protocols */
__wsum csum;
diff --git a/include/net/dst.h b/include/net/dst.h
index 6cf0870414c7..01bdf825a6f9 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -54,6 +54,7 @@ struct dst_entry {
#define DST_XFRM_TUNNEL 0x0020
#define DST_XFRM_QUEUE 0x0040
#define DST_METADATA 0x0080
+#define DST_FWD 0x0100
/* A non-zero value of dst->obsolete forces by-hand validation
* of the route entry. Positive values are set by the generic
diff --git a/net/core/dev.c b/net/core/dev.c
index 022ad73d6253..c6aaffb99456 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5387,8 +5387,10 @@ static struct list_head *gro_list_prepare(struct napi_struct *napi,
diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
diffs |= p->vlan_tci ^ skb->vlan_tci;
- diffs |= skb_metadata_dst_cmp(p, skb);
- diffs |= skb_metadata_differs(p, skb);
+ if (!NAPI_GRO_CB(p)->is_fwd) {
+ diffs |= skb_metadata_dst_cmp(p, skb);
+ diffs |= skb_metadata_differs(p, skb);
+ }
if (maclen == ETH_HLEN)
diffs |= compare_ether_header(skb_mac_header(p),
skb_mac_header(skb));
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1fbe2f815474..6e4e8588c0b1 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1479,8 +1479,20 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
NAPI_GRO_CB(p)->flush_id = flush_id;
else
NAPI_GRO_CB(p)->flush_id |= flush_id;
+
+ NAPI_GRO_CB(skb)->is_fwd = NAPI_GRO_CB(p)->is_fwd;
+
+ goto found;
}
+ if (IN_DEV_FORWARD(__in_dev_get_rcu(skb->dev))) {
+ if (!ip_route_input_noref(skb, iph->daddr, iph->saddr, iph->tos,
+ skb->dev)) {
+ if ((skb_dst(skb)->flags & DST_FWD))
+ NAPI_GRO_CB(skb)->is_fwd = 1;
+ }
+ }
+found:
NAPI_GRO_CB(skb)->is_atomic = !!(iph->frag_off & htons(IP_DF));
NAPI_GRO_CB(skb)->flush |= flush;
skb_set_network_header(skb, off);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c0a9d26c06ce..3e5b3f0be0f0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1757,6 +1757,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_is_input = 1;
RT_CACHE_STAT_INC(in_slow_tot);
+ rth->dst.flags |= DST_FWD;
rth->dst.input = ip_forward;
rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index d93c1e8097ba..c99f117bc44e 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -402,6 +402,12 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
struct sock *sk;
rcu_read_lock();
+ if (NAPI_GRO_CB(skb)->is_fwd) {
+ pp = call_gro_receive(udp_gro_receive_segment, head, skb);
+ rcu_read_unlock();
+ return pp;
+ }
+
sk = (*lookup)(skb, uh->source, uh->dest);
if (!sk)
goto out_unlock;
@@ -456,7 +462,8 @@ static struct sk_buff *udp4_gro_receive(struct list_head *head,
{
struct udphdr *uh = udp_gro_udphdr(skb);
- if (unlikely(!uh) || !static_branch_unlikely(&udp_encap_needed_key))
+ if (unlikely(!uh) || (!static_branch_unlikely(&udp_encap_needed_key) &&
+ !NAPI_GRO_CB(skb)->is_fwd))
goto flush;
/* Don't bother verifying checksum if we're going to flush anyway. */
@@ -503,7 +510,7 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
rcu_read_lock();
sk = (*lookup)(skb, uh->source, uh->dest);
- if (sk && udp_sk(sk)->gro_enabled) {
+ if ((sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_fwd) {
err = udp_gro_complete_segment(skb);
} else if (sk && udp_sk(sk)->gro_complete) {
skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM
--
2.17.1
Powered by blists - more mailing lists