[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <3fa3822651e29b8484d598b10ae61b0efde6b14f.1539957909.git.pabeni@redhat.com>
Date: Fri, 19 Oct 2018 16:25:12 +0200
From: Paolo Abeni <pabeni@...hat.com>
To: netdev@...r.kernel.org
Cc: Willem de Bruijn <willemb@...gle.com>,
Steffen Klassert <steffen.klassert@...unet.com>
Subject: [RFC PATCH v2 02/10] udp: implement GRO for plain UDP sockets.
This is the RX counterpart of commit bec1f6f69736 ("udp: generate gso
with UDP_SEGMENT"). When UDP_GRO is enabled, such socket is also
eligible for GRO in the rx path: UDP segments directed to such socket
are assembled into a larger GSO_UDP_L4 packet.
The core UDP GRO support is enabled with setsockopt(UDP_GRO).
Initial benchmark numbers:
Before:
udp rx: 1079 MB/s 769065 calls/s
After:
udp rx: 1466 MB/s 24877 calls/s
This change introduces a side effect in respect to UDP tunnels:
after a UDP tunnel creation, now the kernel performs a lookup per ingress
UDP packet, while before such lookup happened only if the ingress packet
carried a valid internal header csum.
v1 -> v2:
- use a new option to enable UDP GRO
- use static keys to protect the UDP GRO socket lookup
Signed-off-by: Paolo Abeni <pabeni@...hat.com>
---
include/linux/udp.h | 3 +-
include/uapi/linux/udp.h | 1 +
net/ipv4/udp.c | 7 +++
net/ipv4/udp_offload.c | 109 +++++++++++++++++++++++++++++++--------
net/ipv6/udp_offload.c | 6 +--
5 files changed, 98 insertions(+), 28 deletions(-)
diff --git a/include/linux/udp.h b/include/linux/udp.h
index a4dafff407fb..f613b329852e 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -50,11 +50,12 @@ struct udp_sock {
__u8 encap_type; /* Is this an Encapsulation socket? */
unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */
no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */
- encap_enabled:1; /* This socket enabled encap
+ encap_enabled:1, /* This socket enabled encap
* processing; UDP tunnels and
* different encapsulation layer set
* this
*/
+ gro_enabled:1; /* Can accept GRO packets */
/*
* Following member retains the information to create a UDP header
* when the socket is uncorked.
diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h
index 09502de447f5..30baccb6c9c4 100644
--- a/include/uapi/linux/udp.h
+++ b/include/uapi/linux/udp.h
@@ -33,6 +33,7 @@ struct udphdr {
#define UDP_NO_CHECK6_TX 101 /* Disable sending checksum for UDP6X */
#define UDP_NO_CHECK6_RX 102 /* Disable accpeting checksum for UDP6 */
#define UDP_SEGMENT 103 /* Set GSO segmentation size */
+#define UDP_GRO 104 /* This socket can receive UDP GRO packets */
/* UDP encapsulation types */
#define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9fcb5374e166..3c277378814f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -115,6 +115,7 @@
#include "udp_impl.h"
#include <net/sock_reuseport.h>
#include <net/addrconf.h>
+#include <net/udp_tunnel.h>
struct udp_table udp_table __read_mostly;
EXPORT_SYMBOL(udp_table);
@@ -2459,6 +2460,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
up->gso_size = val;
break;
+ case UDP_GRO:
+ if (valbool)
+ udp_tunnel_encap_enable(sk->sk_socket);
+ up->gro_enabled = valbool;
+ break;
+
/*
* UDP-Lite's partial checksum coverage (RFC 3828).
*/
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 802f2bc00d69..d93c1e8097ba 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -343,6 +343,54 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
return segs;
}
+#define UDO_GRO_CNT_MAX 64
+static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
+ struct sk_buff *skb)
+{
+ struct udphdr *uh = udp_hdr(skb);
+ struct sk_buff *pp = NULL;
+ struct udphdr *uh2;
+ struct sk_buff *p;
+
+ /* requires non zero csum, for simmetry with GSO */
+ if (!uh->check) {
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
+ }
+
+ /* pull encapsulating udp header */
+ skb_gro_pull(skb, sizeof(struct udphdr));
+ skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
+
+ list_for_each_entry(p, head, list) {
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ uh2 = udp_hdr(p);
+
+ /* Match ports only, as csum is always non zero */
+ if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+
+ /* Terminate the flow on len mismatch or if it grow "too much".
+ * Under small packet flood GRO count could elsewhere grow a lot
+ * leading to execessive truesize values
+ */
+ if (!skb_gro_receive(p, skb) &&
+ NAPI_GRO_CB(p)->count > UDO_GRO_CNT_MAX)
+ pp = p;
+ else if (uh->len != uh2->len)
+ pp = p;
+
+ return pp;
+ }
+
+ /* mismatch, but we never need to flush */
+ return NULL;
+}
+
struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
struct udphdr *uh, udp_lookup_t lookup)
{
@@ -353,23 +401,27 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
int flush = 1;
struct sock *sk;
+ rcu_read_lock();
+ sk = (*lookup)(skb, uh->source, uh->dest);
+ if (!sk)
+ goto out_unlock;
+
+ if (udp_sk(sk)->gro_enabled) {
+ pp = call_gro_receive(udp_gro_receive_segment, head, skb);
+ rcu_read_unlock();
+ return pp;
+ }
+
if (NAPI_GRO_CB(skb)->encap_mark ||
(skb->ip_summed != CHECKSUM_PARTIAL &&
NAPI_GRO_CB(skb)->csum_cnt == 0 &&
- !NAPI_GRO_CB(skb)->csum_valid))
- goto out;
+ !NAPI_GRO_CB(skb)->csum_valid) ||
+ !udp_sk(sk)->gro_receive)
+ goto out_unlock;
/* mark that this skb passed once through the tunnel gro layer */
NAPI_GRO_CB(skb)->encap_mark = 1;
- rcu_read_lock();
- sk = (*lookup)(skb, uh->source, uh->dest);
-
- if (sk && udp_sk(sk)->gro_receive)
- goto unflush;
- goto out_unlock;
-
-unflush:
flush = 0;
list_for_each_entry(p, head, list) {
@@ -394,7 +446,6 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
out_unlock:
rcu_read_unlock();
-out:
skb_gro_flush_final(skb, pp, flush);
return pp;
}
@@ -427,6 +478,19 @@ static struct sk_buff *udp4_gro_receive(struct list_head *head,
return NULL;
}
+static int udp_gro_complete_segment(struct sk_buff *skb)
+{
+ struct udphdr *uh = udp_hdr(skb);
+
+ skb->csum_start = (unsigned char *)uh - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ skb->ip_summed = CHECKSUM_PARTIAL;
+
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+ skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
+ return 0;
+}
+
int udp_gro_complete(struct sk_buff *skb, int nhoff,
udp_lookup_t lookup)
{
@@ -437,16 +501,21 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
uh->len = newlen;
- /* Set encapsulation before calling into inner gro_complete() functions
- * to make them set up the inner offsets.
- */
- skb->encapsulation = 1;
-
rcu_read_lock();
sk = (*lookup)(skb, uh->source, uh->dest);
- if (sk && udp_sk(sk)->gro_complete)
+ if (sk && udp_sk(sk)->gro_enabled) {
+ err = udp_gro_complete_segment(skb);
+ } else if (sk && udp_sk(sk)->gro_complete) {
+ skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM
+ : SKB_GSO_UDP_TUNNEL;
+
+ /* Set encapsulation before calling into inner gro_complete()
+ * functions to make them set up the inner offsets.
+ */
+ skb->encapsulation = 1;
err = udp_sk(sk)->gro_complete(sk, skb,
nhoff + sizeof(struct udphdr));
+ }
rcu_read_unlock();
if (skb->remcsum_offload)
@@ -461,13 +530,9 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
const struct iphdr *iph = ip_hdr(skb);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
- if (uh->check) {
- skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+ if (uh->check)
uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
iph->daddr, 0);
- } else {
- skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
- }
return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb);
}
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 1b8e161ac527..828b2457f97b 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -147,13 +147,9 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
- if (uh->check) {
- skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+ if (uh->check)
uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr,
&ipv6h->daddr, 0);
- } else {
- skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
- }
return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb);
}
--
2.17.2
Powered by blists - more mailing lists