[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CA+mtBx-fUe_VbUUnjEVXeJO=87yypoDuMfoctO0Q3nMx+6UZFA@mail.gmail.com>
Date: Tue, 7 Jan 2014 10:08:05 -0800
From: Tom Herbert <therbert@...gle.com>
To: Or Gerlitz <ogerlitz@...lanox.com>
Cc: Jerry Chu <hkchu@...gle.com>, Eric Dumazet <edumazet@...gle.com>,
Herbert Xu <herbert@...dor.apana.org.au>,
Linux Netdev List <netdev@...r.kernel.org>,
David Miller <davem@...emloft.net>,
Yan Burman <yanb@...lanox.com>,
Shlomo Pongratz <shlomop@...lanox.com>
Subject: Re: [PATCH net-next V2 3/3] net: Add GRO support for vxlan traffic
On Tue, Jan 7, 2014 at 7:29 AM, Or Gerlitz <ogerlitz@...lanox.com> wrote:
> Add gro handlers for vxlan using the udp gro infrastructure
>
> On my setup, which is net-next (now with the mlx4 vxlan offloads patches) --
> for single TCP session that goes through vxlan tunneling I got nice improvement
> from 6.8Gbs to 11.5Gbs
>
> --> UDP/VXLAN GRO disabled
> $ netperf -H 192.168.52.147 -c -C
>
> $ netperf -t TCP_STREAM -H 192.168.52.147 -c -C
> MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.52.147 () port 0 AF_INET
> Recv Send Send Utilization Service Demand
> Socket Socket Message Elapsed Send Recv Send Recv
> Size Size Size Time Throughput local remote local remote
> bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB
>
> 87380 65536 65536 10.00 6799.75 12.54 24.79 0.604 1.195
>
> --> UDP/VXLAN GRO enabled
>
> $ netperf -t TCP_STREAM -H 192.168.52.147 -c -C
> MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.52.147 () port 0 AF_INET
> Recv Send Send Utilization Service Demand
> Socket Socket Message Elapsed Send Recv Send Recv
> Size Size Size Time Throughput local remote local remote
> bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB
>
> 87380 65536 65536 10.00 11562.72 24.90 20.34 0.706 0.577
>
> Signed-off-by: Or Gerlitz <ogerlitz@...lanox.com>
> ---
> drivers/net/vxlan.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++-
> 1 files changed, 99 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
> index 481f85d..b51823b 100644
> --- a/drivers/net/vxlan.c
> +++ b/drivers/net/vxlan.c
> @@ -40,6 +40,7 @@
> #include <net/net_namespace.h>
> #include <net/netns/generic.h>
> #include <net/vxlan.h>
> +#include <net/protocol.h>
> #if IS_ENABLED(CONFIG_IPV6)
> #include <net/ipv6.h>
> #include <net/addrconf.h>
> @@ -554,6 +555,98 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
> return 1;
> }
>
> +static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff *skb)
> +{
> + struct sk_buff *p, **pp = NULL;
> + struct vxlanhdr *vh, *vh2;
> + struct ethhdr *eh;
> + unsigned int hlen, off, off_eth;
> + const struct packet_offload *ptype;
> + __be16 type;
> + int flush = 1;
> +
> + off = skb_gro_offset(skb);
> + hlen = off + sizeof(*vh);
> + vh = skb_gro_header_fast(skb, off);
> + if (skb_gro_header_hard(skb, hlen)) {
> + vh = skb_gro_header_slow(skb, hlen, off);
> + if (unlikely(!vh))
> + goto out;
> + }
> +
> + flush = 0;
> +
> + for (p = *head; p; p = p->next) {
> + if (!NAPI_GRO_CB(p)->same_flow)
> + continue;
> +
> + vh2 = (struct vxlanhdr *)(p->data + off);
> + if (vh->vx_vni ^ vh2->vx_vni) {
Why ^ instead of != ?
> + NAPI_GRO_CB(p)->same_flow = 0;
> + continue;
> + }
> + goto found;
> + }
> +
> +found:
> + skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
> +
> + off_eth = skb_gro_offset(skb);
> + hlen = off_eth + sizeof(*eh);
> + eh = skb_gro_header_fast(skb, off_eth);
> + if (skb_gro_header_hard(skb, hlen)) {
> + eh = skb_gro_header_slow(skb, hlen, off_eth);
> + if (unlikely(!eh))
> + goto out;
> + }
> + type = eh->h_proto;
> +
> + rcu_read_lock();
> + ptype = gro_find_receive_by_type(type);
> + if (ptype == NULL) {
> + flush = 1;
> + goto out_unlock;
> + }
> +
> + skb_gro_pull(skb, sizeof(*eh)); /* pull inner eth header */
> + pp = ptype->callbacks.gro_receive(head, skb);
> +
> +out_unlock:
> + rcu_read_unlock();
> +out:
> + NAPI_GRO_CB(skb)->flush |= flush;
> +
> + return pp;
> +}
> +
> +static int vxlan_gro_complete(struct sk_buff *skb, int nhoff)
> +{
> + struct ethhdr *eh;
> + struct packet_offload *ptype;
> + __be16 type;
> + /* 22 = 8 bytes for the vlxan header + 14 bytes for the inner eth header */
> + int vxlan_len = 22;
> + int err = -ENOSYS;
> +
> + eh = (struct ethhdr *)(skb->data + nhoff + sizeof (struct vxlanhdr));
> + type = eh->h_proto;
> +
> + rcu_read_lock();
> + ptype = gro_find_complete_by_type(type);
> + if (ptype != NULL)
> + err = ptype->callbacks.gro_complete(skb, nhoff + vxlan_len);
> +
> + rcu_read_unlock();
> + return err;
> +}
> +
> +static const struct net_offload vxlan_offload = {
> + .callbacks = {
> + .gro_receive = vxlan_gro_receive,
> + .gro_complete = vxlan_gro_complete,
> + },
> +};
> +
> /* Notify netdevs that UDP port started listening */
> static void vxlan_notify_add_rx_port(struct sock *sk)
> {
> @@ -568,6 +661,8 @@ static void vxlan_notify_add_rx_port(struct sock *sk)
> dev->netdev_ops->ndo_add_vxlan_port(dev, sa_family,
> port);
> }
> + if (sa_family == AF_INET)
> + udp_add_offload(&vxlan_offload, port);
> rcu_read_unlock();
> }
>
> @@ -585,6 +680,8 @@ static void vxlan_notify_del_rx_port(struct sock *sk)
> dev->netdev_ops->ndo_del_vxlan_port(dev, sa_family,
> port);
> }
> + if (sa_family == AF_INET)
> + udp_del_offload(&vxlan_offload, port);
> rcu_read_unlock();
> }
>
> @@ -1125,8 +1222,8 @@ static void vxlan_rcv(struct vxlan_sock *vs,
> * leave the CHECKSUM_UNNECESSARY, the device checksummed it
> * for us. Otherwise force the upper layers to verify it.
> */
> - if (skb->ip_summed != CHECKSUM_UNNECESSARY || !skb->encapsulation ||
> - !(vxlan->dev->features & NETIF_F_RXCSUM))
> + if ((skb->ip_summed != CHECKSUM_UNNECESSARY && skb->ip_summed != CHECKSUM_PARTIAL) ||
> + !skb->encapsulation || !(vxlan->dev->features & NETIF_F_RXCSUM))
> skb->ip_summed = CHECKSUM_NONE;
>
> skb->encapsulation = 0;
> --
> 1.7.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists