[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <fc84377af8867930138e05c27952d4d4@imap.linux.ibm.com>
Date: Mon, 12 Oct 2015 15:26:24 -0700
From: Ramu Ramamurthy <sramamur@...ux.vnet.ibm.com>
To: Tom Herbert <tom@...bertland.com>,
Eric Dumazet <eric.dumazet@...il.com>,
Jesse Gross <jesse@...ira.com>, hkchu@...gle.com
Cc: Netdev <netdev@...r.kernel.org>, Davem <davem@...emloft.net>,
jkidambi@...ibm.com, manand@...ibm.com
Subject: [PATCH] - gre: add eth_p_teb gro-handler for OVS with gre tunnels
Problem:
--------
When using OVS with GRE tunnels, and GRO is enabled on the nic,
We find that GRO doesnt really take effect. As a result, TCP stream
performance on a 10G nic is around 2-3Gbps.
Root Cause:
-----------
The protocol field set in GRE (by OVS) is ETH_P_TEB.
The code in gre_gro_receive() (gre_offload.c) calls
gro_find_receive_by_type() to determine a gro handler for the
ETH_P_TEB protocol. However, no such protocol is registered
at the device layer (only ETH_P_IP, ETH_P_IPV6, and mpls related
protocols are registered). Hence, GRO is skipped.
Fix:
----
Add a GRO handler at the device layer for the ETH_P_TEB protocol. It is
implemented
as a independent module, so it can be added if needed.
Measurements:
Single TCP stream performance
Before: 2.4 Gbps
After: 7.1 Gbps
Signed-off-by: Ramu Ramamurthy <ramu.ramamurthy@...ibm.com>
---
net/ipv4/Makefile | 1 +
net/ipv4/eth_teb_offload.c | 114
++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 115 insertions(+), 0 deletions(-)
create mode 100644 net/ipv4/eth_teb_offload.c
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 89aacb6..8b2d7ee 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_NET_IPIP) += ipip.o
gre-y := gre_demux.o
obj-$(CONFIG_NET_FOU) += fou.o
obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
+obj-$(CONFIG_NET_IPGRE_DEMUX) += eth_teb_offload.o
obj-$(CONFIG_NET_IPGRE) += ip_gre.o
obj-$(CONFIG_NET_UDP_TUNNEL) += udp_tunnel.o
obj-$(CONFIG_NET_IPVTI) += ip_vti.o
diff --git a/net/ipv4/eth_teb_offload.c b/net/ipv4/eth_teb_offload.c
new file mode 100644
index 0000000..fc4aabd
--- /dev/null
+++ b/net/ipv4/eth_teb_offload.c
@@ -0,0 +1,114 @@
+/*
+ * This module performs GRO for Transparent Ethernet Bridging
+ * protocol encapulated within GRE. The usecase is to boost the
+ * performance of OpenVswitch with GRE tunnels.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <net/protocol.h>
+#include <linux/etherdevice.h>
+
+static struct sk_buff **eth_teb_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
+{
+ struct sk_buff *p, **pp = NULL;
+ struct ethhdr *eh, *eh2;
+ unsigned int hlen, off_eth;
+ const struct packet_offload *ptype;
+ __be16 type;
+ int flush = 1;
+
+ off_eth = skb_gro_offset(skb);
+ hlen = off_eth + sizeof(*eh);
+ eh = skb_gro_header_fast(skb, off_eth);
+ if (skb_gro_header_hard(skb, hlen)) {
+ eh = skb_gro_header_slow(skb, hlen, off_eth);
+ if (unlikely(!eh))
+ goto out;
+ }
+
+ flush = 0;
+
+ for (p = *head; p; p = p->next) {
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ eh2 = (struct ethhdr *)(p->data + off_eth);
+ if (compare_ether_header(eh, eh2)) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+ }
+
+ type = eh->h_proto;
+
+ rcu_read_lock();
+ ptype = gro_find_receive_by_type(type);
+ if (!ptype) {
+ flush = 1;
+ goto out_unlock;
+ }
+
+ skb_gro_pull(skb, sizeof(*eh));
+ skb_gro_postpull_rcsum(skb, eh, sizeof(*eh));
+ pp = ptype->callbacks.gro_receive(head, skb);
+
+out_unlock:
+ rcu_read_unlock();
+out:
+ NAPI_GRO_CB(skb)->flush |= flush;
+
+ return pp;
+}
+
+static int eth_teb_gro_complete(struct sk_buff *skb, int nhoff)
+{
+ struct ethhdr *eh;
+ struct packet_offload *ptype;
+ __be16 type;
+ int ethteb_len = sizeof(struct ethhdr);
+ int err = -ENOSYS;
+
+ eh = (struct ethhdr *)(skb->data + nhoff);
+ type = eh->h_proto;
+
+ rcu_read_lock();
+ ptype = gro_find_complete_by_type(type);
+ if (ptype)
+ err = ptype->callbacks.gro_complete(skb, nhoff + ethteb_len);
+
+ rcu_read_unlock();
+ return err;
+}
+
+static struct packet_offload ethteb_offload __read_mostly = {
+ .type = cpu_to_be16(ETH_P_TEB),
+ .callbacks = {
+ .gro_receive = eth_teb_gro_receive,
+ .gro_complete = eth_teb_gro_complete,
+ },
+};
+
+static int __init eth_teb_offload_init(void)
+{
+ pr_info("Transparent Ethernet Bridging offload register\n");
+ dev_add_offload(ðteb_offload);
+ return 0;
+}
+
+static void __exit eth_teb_offload_exit(void)
+{
+ dev_remove_offload(ðteb_offload);
+ pr_info("Transparent Ethernet Bridging offload deregister\n");
+}
+
+module_init(eth_teb_offload_init)
+module_exit(eth_teb_offload_exit)
+
+MODULE_DESCRIPTION("Offload for Transparent Ethernet Bridging");
+MODULE_AUTHOR("Ramu Ramamurthy (ramu.ramamurthy@...ibm.com)");
+MODULE_LICENSE("GPL");
--
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists