lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAHsH6GuS+QCnk9L1qO9qyBZ05SCgQnvQp8Djf1UR09EpH9tYWQ@mail.gmail.com>
Date:   Fri, 20 Jan 2023 13:04:13 +0200
From:   Eyal Birger <eyal.birger@...il.com>
To:     antony.antony@...unet.com
Cc:     Steffen Klassert <steffen.klassert@...unet.com>,
        Herbert Xu <herbert@...dor.apana.org.au>,
        netdev@...r.kernel.org
Subject: Re: [PATCH 2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation.

Hi,

On Thu, Jan 19, 2023 at 10:00 PM Antony Antony
<antony.antony@...unet.com> wrote:
>
> From: Steffen Klassert <steffen.klassert@...unet.com>
>
> This patch enables the GRO codepath for IPv4 ESP in UDP encapsulated
> packets. Decapsulation happens at L2 and saves a full round through
> the stack for each packet. This is also needed to support HW offload
> for ESP in UDP encapsulation.
>
> Signed-off-by: Steffen Klassert <steffen.klassert@...unet.com>
> Co-developed-by: Antony Antony <antony.antony@...unet.com>
> Signed-off-by: Antony Antony <antony.antony@...unet.com>
> ---
>  include/net/gro.h       |  2 +-
>  include/net/xfrm.h      |  4 ++
>  net/ipv4/esp4_offload.c | 11 ++++-
>  net/ipv4/udp.c          |  4 +-
>  net/ipv4/xfrm4_input.c  | 99 +++++++++++++++++++++++++++++++++--------
>  5 files changed, 99 insertions(+), 21 deletions(-)
>
> diff --git a/include/net/gro.h b/include/net/gro.h
> index a4fab706240d..41c12c5d1ea1 100644
> --- a/include/net/gro.h
> +++ b/include/net/gro.h
> @@ -29,7 +29,7 @@ struct napi_gro_cb {
>         /* Number of segments aggregated. */
>         u16     count;
>
> -       /* Used in ipv6_gro_receive() and foo-over-udp */
> +       /* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
>         u16     proto;
>
>         /* jiffies when first packet was created/queued */
> diff --git a/include/net/xfrm.h b/include/net/xfrm.h
> index 3e1f70e8e424..74dba98fbf2c 100644
> --- a/include/net/xfrm.h
> +++ b/include/net/xfrm.h
> @@ -1666,6 +1666,8 @@ void xfrm_local_error(struct sk_buff *skb, int mtu);
>  int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb);
>  int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
>                     int encap_type);
> +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
> +                                       struct sk_buff *skb);
>  int xfrm4_transport_finish(struct sk_buff *skb, int async);
>  int xfrm4_rcv(struct sk_buff *skb);
>
> @@ -1706,6 +1708,8 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
>  void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
>  int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
>  int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
> +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
> +                                       struct sk_buff *skb);
>  int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
>                      int optlen);
>  #else
> diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
> index 77bb01032667..8769bb669fdd 100644
> --- a/net/ipv4/esp4_offload.c
> +++ b/net/ipv4/esp4_offload.c
> @@ -32,6 +32,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
>         int offset = skb_gro_offset(skb);
>         struct xfrm_offload *xo;
>         struct xfrm_state *x;
> +       int encap_type = 0;
>         __be32 seq;
>         __be32 spi;
>
> @@ -69,6 +70,14 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
>
>         xo->flags |= XFRM_GRO;
>
> +       if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP && skb->sk &&
> +           (udp_sk(skb->sk)->encap_type == UDP_ENCAP_ESPINUDP ||
> +            udp_sk(skb->sk)->encap_type == UDP_ENCAP_ESPINUDP_NON_IKE)) {
> +               encap_type = udp_sk(skb->sk)->encap_type;
> +               sock_put(skb->sk);
> +               skb->sk = NULL;
> +       }
> +
>         XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
>         XFRM_SPI_SKB_CB(skb)->family = AF_INET;
>         XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
> @@ -76,7 +85,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
>
>         /* We don't need to handle errors from xfrm_input, it does all
>          * the error handling and frees the resources on error. */
> -       xfrm_input(skb, IPPROTO_ESP, spi, 0);
> +       xfrm_input(skb, IPPROTO_ESP, spi, encap_type);
>
>         return ERR_PTR(-EINPROGRESS);
>  out_reset:
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index 9592fe3e444a..6a30d0210c4e 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -2729,9 +2729,11 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
>  #if IS_ENABLED(CONFIG_IPV6)
>                         if (sk->sk_family == AF_INET6)
>                                 up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv;
> -                       else
>  #endif
> +                       if (sk->sk_family == AF_INET) {
>                                 up->encap_rcv = xfrm4_udp_encap_rcv;
> +                               up->gro_receive = xfrm4_gro_udp_encap_rcv;
> +                       }
>  #endif
>                         fallthrough;
>                 case UDP_ENCAP_L2TPINUDP:
> diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
> index ad2afeef4f10..768d12491a48 100644
> --- a/net/ipv4/xfrm4_input.c
> +++ b/net/ipv4/xfrm4_input.c
> @@ -17,6 +17,8 @@
>  #include <linux/netfilter_ipv4.h>
>  #include <net/ip.h>
>  #include <net/xfrm.h>
> +#include <net/protocol.h>
> +#include <net/gro.h>
>
>  static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
>                                    struct sk_buff *skb)
> @@ -72,14 +74,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
>         return 0;
>  }
>
> -/* If it's a keepalive packet, then just eat it.
> - * If it's an encapsulated packet, then pass it to the
> - * IPsec xfrm input.
> - * Returns 0 if skb passed to xfrm or was dropped.
> - * Returns >0 if skb should be passed to UDP.
> - * Returns <0 if skb should be resubmitted (-ret is protocol)
> - */
> -int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> +static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
>  {
>         struct udp_sock *up = udp_sk(sk);
>         struct udphdr *uh;
> @@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
>         case UDP_ENCAP_ESPINUDP:
>                 /* Check if this is a keepalive packet.  If so, eat it. */
>                 if (len == 1 && udpdata[0] == 0xff) {
> -                       goto drop;
> +                       return -EINVAL;
>                 } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
>                         /* ESP Packet without Non-ESP header */
>                         len = sizeof(struct udphdr);
> @@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
>         case UDP_ENCAP_ESPINUDP_NON_IKE:
>                 /* Check if this is a keepalive packet.  If so, eat it. */
>                 if (len == 1 && udpdata[0] == 0xff) {
> -                       goto drop;
> +                       return -EINVAL;
>                 } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
>                            udpdata32[0] == 0 && udpdata32[1] == 0) {
>
> @@ -139,7 +134,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
>          * protocol to ESP, and then call into the transform receiver.
>          */
>         if (skb_unclone(skb, GFP_ATOMIC))
> -               goto drop;
> +               return -EINVAL;
>
>         /* Now we can update and verify the packet length... */
>         iph = ip_hdr(skb);
> @@ -147,24 +142,92 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
>         iph->tot_len = htons(ntohs(iph->tot_len) - len);
>         if (skb->len < iphlen + len) {
>                 /* packet is too small!?! */
> -               goto drop;
> +               return -EINVAL;
>         }
>
>         /* pull the data buffer up to the ESP header and set the
>          * transport header to point to ESP.  Keep UDP on the stack
>          * for later.
>          */
> -       __skb_pull(skb, len);
> -       skb_reset_transport_header(skb);
> +       if (pull) {
> +               __skb_pull(skb, len);
> +               skb_reset_transport_header(skb);
> +       } else {
> +               skb_set_transport_header(skb, len);
> +       }
>
>         /* process ESP */
> -       return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
> -
> -drop:
> -       kfree_skb(skb);
>         return 0;
>  }
>
> +/* If it's a keepalive packet, then just eat it.
> + * If it's an encapsulated packet, then pass it to the
> + * IPsec xfrm input.
> + * Returns 0 if skb passed to xfrm or was dropped.
> + * Returns >0 if skb should be passed to UDP.
> + * Returns <0 if skb should be resubmitted (-ret is protocol)
> + */
> +int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> +{
> +       int ret;
> +
> +       ret = __xfrm4_udp_encap_rcv(sk, skb, true);
> +       if (!ret)
> +               return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0,
> +                                      udp_sk(sk)->encap_type);
> +
> +       if (ret < 0) {
> +               kfree_skb(skb);
> +               return 0;
> +       }
> +
> +       return ret;
> +}
> +
> +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
> +                                       struct sk_buff *skb)
> +{
> +       int offset = skb_gro_offset(skb);
> +       const struct net_offload *ops;
> +       struct sk_buff *pp = NULL;
> +       int ret;
> +
> +       offset = offset - sizeof(struct udphdr);
> +
> +       if (!pskb_pull(skb, offset))
> +               return NULL;
> +
> +       if (!refcount_inc_not_zero(&sk->sk_refcnt))
> +               return NULL;
> +

Isn't a push needed in case of failure above?

> +       rcu_read_lock();
> +       ops = rcu_dereference(inet_offloads[IPPROTO_ESP]);
> +       if (!ops || !ops->callbacks.gro_receive)
> +               goto out;
> +
> +       ret = __xfrm4_udp_encap_rcv(sk, skb, false);
> +       if (ret)
> +               goto out;
> +
> +       skb->sk = sk;

Don't you need something like skb_set_owner_sk_safe() so the
destructor is also set?

> +       skb_push(skb, offset);
> +       NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
> +
> +       pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
> +       rcu_read_unlock();
> +
> +       return pp;
> +
> +out:
> +       rcu_read_unlock();
> +       sock_put(sk);
> +       skb_push(skb, offset);
> +       NAPI_GRO_CB(skb)->same_flow = 0;
> +       NAPI_GRO_CB(skb)->flush = 1;
> +
> +       return NULL;
> +}
> +
>  int xfrm4_rcv(struct sk_buff *skb)
>  {
>         return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);
> --
> 2.30.2
>

Eyal.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ