lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 14 Jan 2014 09:51:34 -0800
From:	Tom Herbert <therbert@...gle.com>
To:	Or Gerlitz <ogerlitz@...lanox.com>
Cc:	David Miller <davem@...emloft.net>,
	Linux Netdev List <netdev@...r.kernel.org>,
	Jerry Chu <hkchu@...gle.com>,
	Eric Dumazet <edumazet@...gle.com>,
	Herbert Xu <herbert@...dor.apana.org.au>,
	Yan Burman <yanb@...lanox.com>,
	Shlomo Pongratz <shlomop@...lanox.com>
Subject: Re: [PATCH net-next V4 1/3] net: Add GRO support for UDP
 encapsulating protocols

On Tue, Jan 14, 2014 at 8:00 AM, Or Gerlitz <ogerlitz@...lanox.com> wrote:
> Add GRO handlers for protocols that do UDP encapsulation, with the intent of
> being able to coalesce packets which encapsulate packets belonging to
> the same TCP session.
>
> For GRO purposes, the destination UDP port takes the role of the ether type
> field in the ethernet header or the next protocol in the IP header.
>
> The UDP GRO handler will only attempt to coalesce packets whose destination
> port is registered to have gro handler.
>
> Use a mark on the skb GRO CB data to disallow (flush) running the udp gro receive
> code twice on a packet. This solves the problem of udp encapsulated packets whose
> inner VM packet is udp and happen to carry a port which has registered offloads.
>
> Signed-off-by: Shlomo Pongratz <shlomop@...lanox.com>
> Signed-off-by: Or Gerlitz <ogerlitz@...lanox.com>
> ---
>  include/linux/netdevice.h |   10 +++-
>  include/net/protocol.h    |    3 +
>  net/core/dev.c            |    1 +
>  net/ipv4/udp_offload.c    |  157 +++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 170 insertions(+), 1 deletions(-)
>
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index a2a70cc..efb942f 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1652,7 +1652,10 @@ struct napi_gro_cb {
>         unsigned long age;
>
>         /* Used in ipv6_gro_receive() */
> -       int     proto;
> +       u16     proto;
> +
> +       /* Used in udp_gro_receive */
> +       u16     udp_mark;
>
>         /* used to support CHECKSUM_COMPLETE for tunneling protocols */
>         __wsum  csum;
> @@ -1691,6 +1694,11 @@ struct packet_offload {
>         struct list_head         list;
>  };
>
> +struct udp_offload {
> +       __be16                   port;
> +       struct offload_callbacks callbacks;
> +};
> +
>  /* often modified stats are per cpu, other are shared (netdev->stats) */
>  struct pcpu_sw_netstats {
>         u64     rx_packets;
> diff --git a/include/net/protocol.h b/include/net/protocol.h
> index 0e5f866..a7e986b 100644
> --- a/include/net/protocol.h
> +++ b/include/net/protocol.h
> @@ -108,6 +108,9 @@ int inet_del_offload(const struct net_offload *prot, unsigned char num);
>  void inet_register_protosw(struct inet_protosw *p);
>  void inet_unregister_protosw(struct inet_protosw *p);
>
> +int  udp_add_offload(struct udp_offload *prot);
> +void udp_del_offload(struct udp_offload *prot);
> +
>  #if IS_ENABLED(CONFIG_IPV6)
>  int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char num);
>  int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char num);
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 87312dc..aafc07a 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -3858,6 +3858,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
>                 NAPI_GRO_CB(skb)->same_flow = 0;
>                 NAPI_GRO_CB(skb)->flush = 0;
>                 NAPI_GRO_CB(skb)->free = 0;
> +               NAPI_GRO_CB(skb)->udp_mark = 0;
>
>                 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
>                 break;
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index 79c62bd..11785ac 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -14,6 +14,16 @@
>  #include <net/udp.h>
>  #include <net/protocol.h>
>
> +static DEFINE_SPINLOCK(udp_offload_lock);
> +static struct udp_offload_priv *udp_offload_base __read_mostly;
> +
> +struct udp_offload_priv {
> +       struct udp_offload      *offload;
> +       struct rcu_head         rcu;
> +       atomic_t                refcount;
> +       struct udp_offload_priv __rcu *next;
> +};
> +
>  static int udp4_ufo_send_check(struct sk_buff *skb)
>  {
>         if (!pskb_may_pull(skb, sizeof(struct udphdr)))
> @@ -89,10 +99,157 @@ out:
>         return segs;
>  }
>
> +int udp_add_offload(struct udp_offload *uo)
> +{
> +       struct udp_offload_priv **head = &udp_offload_base;
> +       struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_KERNEL);
> +
> +       if (!new_offload)
> +               return -ENOMEM;
> +
> +       new_offload->offload = uo;
> +       atomic_set(&new_offload->refcount, 1);
> +
> +       spin_lock(&udp_offload_lock);
> +       rcu_assign_pointer(new_offload->next, rcu_dereference(*head));
> +       rcu_assign_pointer(*head, rcu_dereference(new_offload));
> +       spin_unlock(&udp_offload_lock);
> +
> +       return 0;
> +}
> +EXPORT_SYMBOL(udp_add_offload);
> +
> +static void udp_offload_free_routine(struct rcu_head *head)
> +{
> +       struct udp_offload_priv *ou_priv = container_of(head, struct udp_offload_priv, rcu);
> +       kfree(ou_priv);
> +}
> +
> +static void udp_offload_put(struct udp_offload_priv *uo_priv)
> +{
> +       if (atomic_dec_and_test(&uo_priv->refcount))
> +               call_rcu(&uo_priv->rcu, udp_offload_free_routine);
> +}
> +
> +void udp_del_offload(struct udp_offload *uo)
> +{
> +       struct udp_offload_priv __rcu **head = &udp_offload_base;
> +       struct udp_offload_priv *uo_priv;
> +
> +       spin_lock(&udp_offload_lock);
> +
> +       uo_priv = rcu_dereference(*head);
> +       for (; uo_priv != NULL;
> +               uo_priv = rcu_dereference(*head)) {
> +
> +               if (uo_priv->offload == uo) {
> +                       rcu_assign_pointer(*head, rcu_dereference(uo_priv->next));
> +                       udp_offload_put(uo_priv);
> +                       goto unlock;
> +               }
> +               head = &uo_priv->next;
> +       }
> +       pr_warn("udp_del_offload: didn't find offload for port %d\n", htons(uo->port));
> +unlock:
> +       spin_unlock(&udp_offload_lock);
> +}
> +EXPORT_SYMBOL(udp_del_offload);
> +
> +static struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
> +{
> +       struct udp_offload_priv *uo_priv;
> +       struct sk_buff *p, **pp = NULL;
> +       struct udphdr *uh, *uh2;
> +       unsigned int hlen, off;
> +       int flush = 1;
> +
> +       if (NAPI_GRO_CB(skb)->udp_mark ||
> +           (!skb->encapsulation && skb->ip_summed != CHECKSUM_COMPLETE))
> +               goto out;
> +
> +       /* mark that this skb passed once through the udp gro layer */
> +       NAPI_GRO_CB(skb)->udp_mark = 1;
> +
> +       off  = skb_gro_offset(skb);
> +       hlen = off + sizeof(*uh);
> +       uh   = skb_gro_header_fast(skb, off);
> +       if (skb_gro_header_hard(skb, hlen)) {
> +               uh = skb_gro_header_slow(skb, hlen, off);
> +               if (unlikely(!uh))
> +                       goto out;
> +       }
> +
> +       rcu_read_lock();
> +       uo_priv = rcu_dereference(udp_offload_base);
> +       for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) {
> +               if (uo_priv->offload->port == uh->dest &&
> +                   uo_priv->offload->callbacks.gro_receive) {
> +                       atomic_inc(&uo_priv->refcount);
> +                       goto unflush;
> +               }
> +       }
> +       rcu_read_unlock();
> +       goto out;
> +
> +unflush:
> +       rcu_read_unlock();
> +       flush = cd
> +
> +       for (p = *head; p; p = p->next) {
> +               if (!NAPI_GRO_CB(p)->same_flow)
> +                       continue;
> +
> +               uh2 = (struct udphdr   *)(p->data + off);
> +               if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) {
> +                       NAPI_GRO_CB(p)->same_flow = 0;
> +                       continue;
> +               }
> +       }
> +
> +       skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
> +       pp = uo_priv->offload->callbacks.gro_receive(head, skb);
> +       udp_offload_put(uo_priv);
> +
> +out:
> +       NAPI_GRO_CB(skb)->flush |= flush;
> +       return pp;
> +}
> +
> +static int udp_gro_complete(struct sk_buff *skb, int nhoff)
> +{
> +       struct udp_offload_priv *uo_priv;
> +       __be16 newlen = htons(skb->len - nhoff);
> +       struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
> +       int err = -ENOSYS;
> +
> +       uh->len = newlen;
> +
> +       rcu_read_lock();
> +
> +       uo_priv = rcu_dereference(udp_offload_base);
> +       for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) {
> +               if (uo_priv->offload->port == uh->dest &&
> +                   uo_priv->offload->callbacks.gro_complete)
> +                       goto found;
> +       }
> +
> +       rcu_read_unlock();
> +       return err;
> +
> +found:
> +       atomic_inc(&uo_priv->refcount);

This is an expensive operation in the critical path. Can uo_priv be
protected by rcu also?

> +       rcu_read_unlock();
> +       err = uo_priv->offload->callbacks.gro_complete(skb, nhoff + sizeof(struct udphdr));
> +       udp_offload_put(uo_priv);
> +       return err;
> +}
> +
>  static const struct net_offload udpv4_offload = {
>         .callbacks = {
>                 .gso_send_check = udp4_ufo_send_check,
>                 .gso_segment = udp4_ufo_fragment,
> +               .gro_receive  = udp_gro_receive,
> +               .gro_complete = udp_gro_complete,
>         },
>  };
>
> --
> 1.7.1
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ