netdev - Re: [RFC PATCH 7/9] GSO: Support partial segmentation offload

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CALx6S35wK2_eR1e5OTHnQTkQRg763unS0sCMQU+Jk6e27F26Vw@mail.gmail.com>
Date:	Wed, 23 Mar 2016 10:09:47 -0700
From:	Tom Herbert <tom@...bertland.com>
To:	Alexander Duyck <aduyck@...antis.com>
Cc:	Edward Cree <ecree@...arflare.com>,
	Linux Kernel Network Developers <netdev@...r.kernel.org>,
	"David S. Miller" <davem@...emloft.net>,
	Alexander Duyck <alexander.duyck@...il.com>
Subject: Re: [RFC PATCH 7/9] GSO: Support partial segmentation offload

On Fri, Mar 18, 2016 at 4:25 PM, Alexander Duyck <aduyck@...antis.com> wrote:
> This patch adds support for something I am referring to as GSO partial.
> The basic idea is that we can support a broader range of devices for
> segmentation if we use fixed outer headers and have the hardware only
> really deal with segmenting the inner header.  The idea behind the naming
> is due to the fact that everything before csum_start will be fixed headers,
> and everything after will be the region that is handled by hardware.
>
Personally, I don't like the name ;-) This technique should be
"generic" to handle almost all GSO except those case where headers are
not fixed which we should be able to avoid as a design point in any
new encapsulations. Besides, what if someday we perform GSO on
something where csum_start is not set?

Can you add some description about strategy for dealing with ip_id?

Thanks,
Tom

> With the current implementation it allows us to add support for the
> following GSO types with an inner TSO or TSO6 offload:
> NETIF_F_GSO_GRE
> NETIF_F_GSO_GRE_CSUM
> NETIF_F_UDP_TUNNEL
> NETIF_F_UDP_TUNNEL_CSUM
>
> Signed-off-by: Alexander Duyck <aduyck@...antis.com>
> ---
>  include/linux/netdev_features.h |    7 ++++++-
>  include/linux/netdevice.h       |    2 ++
>  include/linux/skbuff.h          |    7 ++++++-
>  net/core/dev.c                  |   31 ++++++++++++++++++++++++++++++-
>  net/core/ethtool.c              |    1 +
>  net/core/skbuff.c               |   21 ++++++++++++++++++++-
>  net/ipv4/af_inet.c              |   12 ++++++++++--
>  net/ipv4/gre_offload.c          |   23 +++++++++++++++++++----
>  net/ipv4/tcp_offload.c          |   10 ++++++++--
>  net/ipv4/udp_offload.c          |   20 ++++++++++++++++----
>  net/ipv6/ip6_offload.c          |    9 ++++++++-
>  11 files changed, 126 insertions(+), 17 deletions(-)
>
> diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
> index a734bf43d190..8df3c5553af0 100644
> --- a/include/linux/netdev_features.h
> +++ b/include/linux/netdev_features.h
> @@ -48,8 +48,12 @@ enum {
>         NETIF_F_GSO_UDP_TUNNEL_BIT,     /* ... UDP TUNNEL with TSO */
>         NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT,/* ... UDP TUNNEL with TSO & CSUM */
>         NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */
> +       NETIF_F_GSO_PARTIAL_BIT,        /* ... Only segment inner-most L4
> +                                        *     in hardware and all other
> +                                        *     headers in software.
> +                                        */
>         /**/NETIF_F_GSO_LAST =          /* last bit, see GSO_MASK */
> -               NETIF_F_GSO_TUNNEL_REMCSUM_BIT,
> +               NETIF_F_GSO_PARTIAL_BIT,
>
>         NETIF_F_FCOE_CRC_BIT,           /* FCoE CRC32 */
>         NETIF_F_SCTP_CRC_BIT,           /* SCTP checksum offload */
> @@ -121,6 +125,7 @@ enum {
>  #define NETIF_F_GSO_UDP_TUNNEL __NETIF_F(GSO_UDP_TUNNEL)
>  #define NETIF_F_GSO_UDP_TUNNEL_CSUM __NETIF_F(GSO_UDP_TUNNEL_CSUM)
>  #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM)
> +#define NETIF_F_GSO_PARTIAL     __NETIF_F(GSO_PARTIAL)
>  #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
>  #define NETIF_F_HW_VLAN_STAG_RX        __NETIF_F(HW_VLAN_STAG_RX)
>  #define NETIF_F_HW_VLAN_STAG_TX        __NETIF_F(HW_VLAN_STAG_TX)
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 31474d9d8a96..427d748ad8f9 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1647,6 +1647,7 @@ struct net_device {
>         netdev_features_t       vlan_features;
>         netdev_features_t       hw_enc_features;
>         netdev_features_t       mpls_features;
> +       netdev_features_t       gso_partial_features;
>
>         int                     ifindex;
>         int                     group;
> @@ -4014,6 +4015,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
>         BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL != (NETIF_F_GSO_UDP_TUNNEL >> NETIF_F_GSO_SHIFT));
>         BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT));
>         BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT));
> +       BUILD_BUG_ON(SKB_GSO_PARTIAL != (NETIF_F_GSO_PARTIAL >> NETIF_F_GSO_SHIFT));
>
>         return (features & feature) == feature;
>  }
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 15d0df943466..c291a282f8b6 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -482,6 +482,8 @@ enum {
>         SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11,
>
>         SKB_GSO_TUNNEL_REMCSUM = 1 << 12,
> +
> +       SKB_GSO_PARTIAL = 1 << 13,
>  };
>
>  #if BITS_PER_LONG > 32
> @@ -3584,7 +3586,10 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
>   * Keeps track of level of encapsulation of network headers.
>   */
>  struct skb_gso_cb {
> -       int     mac_offset;
> +       union {
> +               int     mac_offset;
> +               int     data_offset;
> +       };
>         int     encap_level;
>         __wsum  csum;
>         __u16   csum_start;
> diff --git a/net/core/dev.c b/net/core/dev.c
> index edb7179bc051..666cf427898b 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -2711,6 +2711,19 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
>                         return ERR_PTR(err);
>         }
>
> +       /* Only report GSO partial support if it will enable us to
> +        * support segmentation on this frame without needing additional
> +        * work.
> +        */
> +       if (features & NETIF_F_GSO_PARTIAL) {
> +               netdev_features_t partial_features;
> +               struct net_device *dev = skb->dev;
> +
> +               partial_features = dev->features & dev->gso_partial_features;
> +               if (!skb_gso_ok(skb, features | partial_features))
> +                       features &= ~NETIF_F_GSO_PARTIAL;
> +       }
> +
>         BUILD_BUG_ON(SKB_SGO_CB_OFFSET +
>                      sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb));
>
> @@ -2841,6 +2854,14 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
>         if (skb->encapsulation)
>                 features &= dev->hw_enc_features;
>
> +       /* Support for GSO partial features requires software intervention
> +        * before we can actually process the packets so we need to strip
> +        * support for any partial features now and we can pull them back
> +        * in after we have partially segmented the frame.
> +        */
> +       if (skb_is_gso(skb) && !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL))
> +               features &= ~dev->gso_partial_features;
> +
>         if (skb_vlan_tagged(skb))
>                 features = netdev_intersect_features(features,
>                                                      dev->vlan_features |
> @@ -6702,6 +6723,14 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
>                 }
>         }
>
> +       /* GSO partial features require GSO partial be set */
> +       if ((features & dev->gso_partial_features) &&
> +           !(features & NETIF_F_GSO_PARTIAL)) {
> +               netdev_dbg(dev,
> +                          "Dropping partially supported GSO features since no GSO partial.\n");
> +               features &= ~dev->gso_partial_features;
> +       }
> +
>  #ifdef CONFIG_NET_RX_BUSY_POLL
>         if (dev->netdev_ops->ndo_busy_poll)
>                 features |= NETIF_F_BUSY_POLL;
> @@ -6982,7 +7011,7 @@ int register_netdevice(struct net_device *dev)
>
>         /* Make NETIF_F_SG inheritable to tunnel devices.
>          */
> -       dev->hw_enc_features |= NETIF_F_SG;
> +       dev->hw_enc_features |= NETIF_F_SG | NETIF_F_GSO_PARTIAL;
>
>         /* Make NETIF_F_SG inheritable to MPLS.
>          */
> diff --git a/net/core/ethtool.c b/net/core/ethtool.c
> index b3c39d531469..d1b278c6c29f 100644
> --- a/net/core/ethtool.c
> +++ b/net/core/ethtool.c
> @@ -88,6 +88,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
>         [NETIF_F_GSO_UDP_TUNNEL_BIT] =   "tx-udp_tnl-segmentation",
>         [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation",
>         [NETIF_F_GSO_TUNNEL_REMCSUM_BIT] = "tx-remcsum-segmentation",
> +       [NETIF_F_GSO_PARTIAL_BIT] =      "tx-gso-partial",
>
>         [NETIF_F_FCOE_CRC_BIT] =         "tx-checksum-fcoe-crc",
>         [NETIF_F_SCTP_CRC_BIT] =        "tx-checksum-sctp",
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index f044f970f1a6..bdcba77e164c 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -3076,8 +3076,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
>         struct sk_buff *frag_skb = head_skb;
>         unsigned int offset = doffset;
>         unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
> +       unsigned int partial_segs = 0;
>         unsigned int headroom;
> -       unsigned int len;
> +       unsigned int len = head_skb->len;
>         __be16 proto;
>         bool csum;
>         int sg = !!(features & NETIF_F_SG);
> @@ -3094,6 +3095,15 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
>
>         csum = !!can_checksum_protocol(features, proto);
>
> +       /* GSO partial only requires that we trim off any excess that
> +        * doesn't fit into an MSS sized block, so take care of that
> +        * now.
> +        */
> +       if (features & NETIF_F_GSO_PARTIAL) {
> +               partial_segs = len / mss;
> +               mss *= partial_segs;
> +       }
> +
>         headroom = skb_headroom(head_skb);
>         pos = skb_headlen(head_skb);
>
> @@ -3281,6 +3291,15 @@ perform_csum_check:
>          */
>         segs->prev = tail;
>
> +       /* Update GSO info on first skb in partial sequence. */
> +       if (partial_segs) {
> +               skb_shinfo(segs)->gso_size = mss / partial_segs;
> +               skb_shinfo(segs)->gso_segs = partial_segs;
> +               skb_shinfo(segs)->gso_type = skb_shinfo(head_skb)->gso_type |
> +                                            SKB_GSO_PARTIAL;
> +               SKB_GSO_CB(segs)->data_offset = skb_headroom(segs) + doffset;
> +       }
> +
>         /* Following permits correct backpressure, for protocols
>          * using skb_set_owner_w().
>          * Idea is to tranfert ownership from head_skb to last segment.
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> index 5e3885672907..d091f91fa25d 100644
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -1199,7 +1199,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
>         unsigned int offset = 0;
>         bool udpfrag, encap;
>         struct iphdr *iph;
> -       int proto;
> +       int proto, tot_len;
>         int nhoff;
>         int ihl;
>         int id;
> @@ -1269,10 +1269,18 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
>                         if (skb->next)
>                                 iph->frag_off |= htons(IP_MF);
>                         offset += skb->len - nhoff - ihl;
> +                       tot_len = skb->len - nhoff;
> +               } else if (skb_is_gso(skb)) {
> +                       iph->id = htons(id);
> +                       id += skb_shinfo(skb)->gso_segs;
> +                       tot_len = skb_shinfo(skb)->gso_size +
> +                                 SKB_GSO_CB(skb)->data_offset +
> +                                 skb->head - (unsigned char *)iph;
>                 } else {
>                         iph->id = htons(id++);
> +                       tot_len = skb->len - nhoff;
>                 }
> -               iph->tot_len = htons(skb->len - nhoff);
> +               iph->tot_len = htons(tot_len);
>                 ip_send_check(iph);
>                 if (encap)
>                         skb_reset_inner_headers(skb);
> diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
> index 7ea14ced9222..dea0390d65bb 100644
> --- a/net/ipv4/gre_offload.c
> +++ b/net/ipv4/gre_offload.c
> @@ -85,7 +85,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
>         skb = segs;
>         do {
>                 struct gre_base_hdr *greh;
> -               __be32 *pcsum;
> +               __sum16 *pcsum;
>
>                 /* Set up inner headers if we are offloading inner checksum */
>                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
> @@ -105,10 +105,25 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
>                         continue;
>
>                 greh = (struct gre_base_hdr *)skb_transport_header(skb);
> -               pcsum = (__be32 *)(greh + 1);
> +               pcsum = (__sum16 *)(greh + 1);
> +
> +               if (skb_is_gso(skb)) {
> +                       unsigned int partial_adj;
> +
> +                       /* Adjust checksum to account for the fact that
> +                        * the partial checksum is based on actual size
> +                        * whereas headers should be based on MSS size.
> +                        */
> +                       partial_adj = skb->len + skb_headroom(skb) -
> +                                     SKB_GSO_CB(skb)->data_offset -
> +                                     skb_shinfo(skb)->gso_size;
> +                       *pcsum = ~csum_fold((__force __wsum)htonl(partial_adj));
> +               } else {
> +                       *pcsum = 0;
> +               }
>
> -               *pcsum = 0;
> -               *(__sum16 *)pcsum = gso_make_checksum(skb, 0);
> +               *(pcsum + 1) = 0;
> +               *pcsum = gso_make_checksum(skb, 0);
>         } while ((skb = skb->next));
>  out:
>         return segs;
> diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
> index 1a2e9957c177..4e9b8f011473 100644
> --- a/net/ipv4/tcp_offload.c
> +++ b/net/ipv4/tcp_offload.c
> @@ -107,6 +107,12 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
>                 goto out;
>         }
>
> +       /* GSO partial only requires splitting the frame into an MSS
> +        * multiple and possibly a remainder.  So update the mss now.
> +        */
> +       if (features & NETIF_F_GSO_PARTIAL)
> +               mss = skb->len - (skb->len % mss);
> +
>         copy_destructor = gso_skb->destructor == tcp_wfree;
>         ooo_okay = gso_skb->ooo_okay;
>         /* All segments but the first should have ooo_okay cleared */
> @@ -131,7 +137,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
>         newcheck = ~csum_fold((__force __wsum)((__force u32)th->check +
>                                                (__force u32)delta));
>
> -       do {
> +       while (skb->next) {
>                 th->fin = th->psh = 0;
>                 th->check = newcheck;
>
> @@ -151,7 +157,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
>
>                 th->seq = htonl(seq);
>                 th->cwr = 0;
> -       } while (skb->next);
> +       }
>
>         /* Following permits TCP Small Queues to work well with GSO :
>          * The callback to TCP stack will be called at the time last frag
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index 8a3405a80260..5fcb93269afb 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -100,7 +100,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
>         udp_offset = outer_hlen - tnl_hlen;
>         skb = segs;
>         do {
> -               __be16 len;
> +               unsigned int len;
>
>                 if (remcsum)
>                         skb->ip_summed = CHECKSUM_NONE;
> @@ -118,14 +118,26 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
>                 skb_reset_mac_header(skb);
>                 skb_set_network_header(skb, mac_len);
>                 skb_set_transport_header(skb, udp_offset);
> -               len = htons(skb->len - udp_offset);
> +               len = skb->len - udp_offset;
>                 uh = udp_hdr(skb);
> -               uh->len = len;
> +
> +               /* If we are only performing partial GSO the inner header
> +                * will be using a length value equal to only one MSS sized
> +                * segment instead of the entire frame.
> +                */
> +               if (skb_is_gso(skb)) {
> +                       uh->len = htons(skb_shinfo(skb)->gso_size +
> +                                       SKB_GSO_CB(skb)->data_offset +
> +                                       skb->head - (unsigned char *)uh);
> +               } else {
> +                       uh->len = htons(len);
> +               }
>
>                 if (!need_csum)
>                         continue;
>
> -               uh->check = ~csum_fold(csum_add(partial, (__force __wsum)len));
> +               uh->check = ~csum_fold(csum_add(partial,
> +                                      (__force __wsum)htonl(len)));
>
>                 if (skb->encapsulation || !offload_csum) {
>                         uh->check = gso_make_checksum(skb, ~uh->check);
> diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
> index eeca943f12dc..d467053c226a 100644
> --- a/net/ipv6/ip6_offload.c
> +++ b/net/ipv6/ip6_offload.c
> @@ -63,6 +63,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
>         int proto;
>         struct frag_hdr *fptr;
>         unsigned int unfrag_ip6hlen;
> +       unsigned int payload_len;
>         u8 *prevhdr;
>         int offset = 0;
>         bool encap, udpfrag;
> @@ -117,7 +118,13 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
>
>         for (skb = segs; skb; skb = skb->next) {
>                 ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff);
> -               ipv6h->payload_len = htons(skb->len - nhoff - sizeof(*ipv6h));
> +               if (skb_is_gso(skb))
> +                       payload_len = skb_shinfo(skb)->gso_size +
> +                                     SKB_GSO_CB(skb)->data_offset +
> +                                     skb->head - (unsigned char *)(ipv6h + 1);
> +               else
> +                       payload_len = skb->len - nhoff - sizeof(*ipv6h);
> +               ipv6h->payload_len = htons(payload_len);
>                 skb->network_header = (u8 *)ipv6h - skb->head;
>
>                 if (udpfrag) {
>