[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZN0AjcCdPx35cu+q@moon.secunet.de>
Date: Wed, 16 Aug 2023 18:59:57 +0200
From: Antony Antony <antony.antony@...unet.com>
To: Eyal Birger <eyal.birger@...il.com>
CC: <antony.antony@...unet.com>, Steffen Klassert
<steffen.klassert@...unet.com>, Herbert Xu <herbert@...dor.apana.org.au>,
<devel@...ux-ipsec.org>, <netdev@...r.kernel.org>
Subject: Re: [PATCH v4 ipsec-next 2/3] xfrm: Support GRO for IPv4 ESP in UDP
encapsulation.
Hi Eyal,
Thanks for your quick review. I have addressed the points you raised for
both v4 and send v5 patches.
On Wed, Aug 16, 2023 at 14:15:01 +0300, Eyal Birger wrote:
> Hi Antony,
>
> On Wed, Aug 16, 2023 at 12:57 PM Antony Antony
> <antony.antony@...unet.com> wrote:
> >
> > From: Steffen Klassert <steffen.klassert@...unet.com>
> >
> > This patch enables the GRO codepath for IPv4 ESP in UDP encapsulated
> > packets. Decapsulation happens at L2 and saves a full round through
> > the stack for each packet. This is also needed to support HW offload
> > for ESP in UDP encapsulation.
> >
> > Signed-off-by: Steffen Klassert <steffen.klassert@...unet.com>
> > Co-developed-by: Antony Antony <antony.antony@...unet.com>
> > Signed-off-by: Antony Antony <antony.antony@...unet.com>
> > ---
> > include/net/gro.h | 2 +-
> > include/net/xfrm.h | 4 ++
> > net/ipv4/esp4_offload.c | 6 ++-
> > net/ipv4/udp.c | 16 ++++++-
> > net/ipv4/xfrm4_input.c | 98 ++++++++++++++++++++++++++++++++---------
> > 5 files changed, 103 insertions(+), 23 deletions(-)
> >
> > diff --git a/include/net/gro.h b/include/net/gro.h
> > index a4fab706240d..41c12c5d1ea1 100644
> > --- a/include/net/gro.h
> > +++ b/include/net/gro.h
> > @@ -29,7 +29,7 @@ struct napi_gro_cb {
> > /* Number of segments aggregated. */
> > u16 count;
> >
> > - /* Used in ipv6_gro_receive() and foo-over-udp */
> > + /* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
> > u16 proto;
> >
> > /* jiffies when first packet was created/queued */
> > diff --git a/include/net/xfrm.h b/include/net/xfrm.h
> > index 33ee3f5936e6..e980f442ddcd 100644
> > --- a/include/net/xfrm.h
> > +++ b/include/net/xfrm.h
> > @@ -1671,6 +1671,8 @@ void xfrm_local_error(struct sk_buff *skb, int mtu);
> > int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb);
> > int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
> > int encap_type);
> > +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
> > + struct sk_buff *skb);
>
> Why does this function need to be declared twice in this file?
no need. Actully the following patch was removed it:) It is fixed in v5.
>
> > int xfrm4_transport_finish(struct sk_buff *skb, int async);
> > int xfrm4_rcv(struct sk_buff *skb);
> >
> > @@ -1711,6 +1713,8 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
> > void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
> > int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
> > int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
> > +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
> > + struct sk_buff *skb);
> > int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
> > int optlen);
> > #else
> > diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
> > index 77bb01032667..34ebfdf0e986 100644
> > --- a/net/ipv4/esp4_offload.c
> > +++ b/net/ipv4/esp4_offload.c
> > @@ -32,6 +32,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
> > int offset = skb_gro_offset(skb);
> > struct xfrm_offload *xo;
> > struct xfrm_state *x;
> > + int encap_type = 0;
> > __be32 seq;
> > __be32 spi;
> >
> > @@ -69,6 +70,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
> >
> > xo->flags |= XFRM_GRO;
> >
> > + if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
> > + encap_type = UDP_ENCAP_ESPINUDP;
> > +
> > XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
> > XFRM_SPI_SKB_CB(skb)->family = AF_INET;
> > XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
> > @@ -76,7 +80,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
> >
> > /* We don't need to handle errors from xfrm_input, it does all
> > * the error handling and frees the resources on error. */
> > - xfrm_input(skb, IPPROTO_ESP, spi, 0);
> > + xfrm_input(skb, IPPROTO_ESP, spi, encap_type);
> >
> > return ERR_PTR(-EINPROGRESS);
> > out_reset:
> > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> > index aa32afd871ee..337607b17ebd 100644
> > --- a/net/ipv4/udp.c
> > +++ b/net/ipv4/udp.c
> > @@ -2681,6 +2681,17 @@ void udp_destroy_sock(struct sock *sk)
> > }
> > }
> >
> > +static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family,
> > + struct udp_sock *up)
> > +{
> > +#ifdef CONFIG_XFRM
> > + if (up->gro_enabled && encap_type == UDP_ENCAP_ESPINUDP) {
> > + if (family == AF_INET)
> > + up->gro_receive = xfrm4_gro_udp_encap_rcv;
> > + }
> > +#endif
> > +}
> > +
> > /*
> > * Socket option code for UDP
> > */
> > @@ -2730,12 +2741,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
> > case 0:
> > #ifdef CONFIG_XFRM
> > case UDP_ENCAP_ESPINUDP:
> > + set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, up);
> > + fallthrough;
> > case UDP_ENCAP_ESPINUDP_NON_IKE:
> > #if IS_ENABLED(CONFIG_IPV6)
> > if (sk->sk_family == AF_INET6)
> > up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv;
> > - else
> > #endif
> > + if (sk->sk_family == AF_INET)
>
> Why is this change needed?
It is not necessary. I removed it in v5.
>
> > up->encap_rcv = xfrm4_udp_encap_rcv;
> > #endif
> > fallthrough;
> > @@ -2773,6 +2786,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
> > udp_tunnel_encap_enable(sk->sk_socket);
> > up->gro_enabled = valbool;
> > up->accept_udp_l4 = valbool;
> > + set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, up);
> > release_sock(sk);
> > break;
> >
> > diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
> > index ad2afeef4f10..b57f477c745e 100644
> > --- a/net/ipv4/xfrm4_input.c
> > +++ b/net/ipv4/xfrm4_input.c
> > @@ -17,6 +17,8 @@
> > #include <linux/netfilter_ipv4.h>
> > #include <net/ip.h>
> > #include <net/xfrm.h>
> > +#include <net/protocol.h>
> > +#include <net/gro.h>
> >
> > static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
> > struct sk_buff *skb)
> > @@ -72,14 +74,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
> > return 0;
> > }
> >
> > -/* If it's a keepalive packet, then just eat it.
> > - * If it's an encapsulated packet, then pass it to the
> > - * IPsec xfrm input.
> > - * Returns 0 if skb passed to xfrm or was dropped.
> > - * Returns >0 if skb should be passed to UDP.
> > - * Returns <0 if skb should be resubmitted (-ret is protocol)
> > - */
> > -int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> > +static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
> > {
> > struct udp_sock *up = udp_sk(sk);
> > struct udphdr *uh;
> > @@ -90,8 +85,8 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> > __be32 *udpdata32;
> > __u16 encap_type = up->encap_type;
> >
> > - /* if this is not encapsulated socket, then just return now */
> > - if (!encap_type)
> > + /* if unknown encap_type then just return now */
> > + if (encap_type != UDP_ENCAP_ESPINUDP && encap_type != UDP_ENCAP_ESPINUDP_NON_IKE)
>
> This change is unclear to me - the patch adds support for GRO on
> UDP_ENCAP_ESPINUDP.
yes.
> How can we now get other encap types here? and why wasn't the old condition ok?
In the current code the old check is enoguh. I removed new code in v5.
>
> > return 1;
> >
> > /* If this is a paged skb, make sure we pull up
> > @@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> > case UDP_ENCAP_ESPINUDP:
> > /* Check if this is a keepalive packet. If so, eat it. */
> > if (len == 1 && udpdata[0] == 0xff) {
> > - goto drop;
> > + return -EINVAL;
> > } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
> > /* ESP Packet without Non-ESP header */
> > len = sizeof(struct udphdr);
> > @@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> > case UDP_ENCAP_ESPINUDP_NON_IKE:
> > /* Check if this is a keepalive packet. If so, eat it. */
> > if (len == 1 && udpdata[0] == 0xff) {
> > - goto drop;
> > + return -EINVAL;
> > } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
> > udpdata32[0] == 0 && udpdata32[1] == 0) {
> >
> > @@ -139,7 +134,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> > * protocol to ESP, and then call into the transform receiver.
> > */
> > if (skb_unclone(skb, GFP_ATOMIC))
> > - goto drop;
> > + return -EINVAL;
> >
> > /* Now we can update and verify the packet length... */
> > iph = ip_hdr(skb);
> > @@ -147,24 +142,87 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> > iph->tot_len = htons(ntohs(iph->tot_len) - len);
> > if (skb->len < iphlen + len) {
> > /* packet is too small!?! */
> > - goto drop;
> > + return -EINVAL;
> > }
> >
> > /* pull the data buffer up to the ESP header and set the
> > * transport header to point to ESP. Keep UDP on the stack
> > * for later.
> > */
> > - __skb_pull(skb, len);
> > - skb_reset_transport_header(skb);
> > + if (pull) {
> > + __skb_pull(skb, len);
> > + skb_reset_transport_header(skb);
> > + } else {
> > + skb_set_transport_header(skb, len);
> > + }
> >
> > /* process ESP */
> > - return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
> > -
> > -drop:
> > - kfree_skb(skb);
> > return 0;
> > }
> >
> > +/* If it's a keepalive packet, then just eat it.
> > + * If it's an encapsulated packet, then pass it to the
> > + * IPsec xfrm input.
> > + * Returns 0 if skb passed to xfrm or was dropped.
> > + * Returns >0 if skb should be passed to UDP.
> > + * Returns <0 if skb should be resubmitted (-ret is protocol)
> > + */
> > +int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> > +{
> > + int ret;
> > +
> > + ret = __xfrm4_udp_encap_rcv(sk, skb, true);
> > + if (!ret)
> > + return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0,
> > + udp_sk(sk)->encap_type);
> > +
> > + if (ret < 0) {
> > + kfree_skb(skb);
> > + return 0;
> > + }
> > +
> > + return ret;
> > +}
> > +
> > +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
> > + struct sk_buff *skb)
> > +{
> > + int offset = skb_gro_offset(skb);
> > + const struct net_offload *ops;
> > + struct sk_buff *pp = NULL;
> > + int ret;
> > +
> > + offset = offset - sizeof(struct udphdr);
> > +
> > + if (!pskb_pull(skb, offset))
> > + return NULL;
> > +
> > + rcu_read_lock();
> > + ops = rcu_dereference(inet_offloads[IPPROTO_ESP]);
> > + if (!ops || !ops->callbacks.gro_receive)
> > + goto out;
> > +
> > + ret = __xfrm4_udp_encap_rcv(sk, skb, false);
> > + if (ret)
> > + goto out;
> > +
> > + skb_push(skb, offset);
> > + NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
> > +
> > + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
> > + rcu_read_unlock();
> > +
> > + return pp;
> > +
> > +out:
> > + rcu_read_unlock();
> > + skb_push(skb, offset);
> > + NAPI_GRO_CB(skb)->same_flow = 0;
> > + NAPI_GRO_CB(skb)->flush = 1;
> > +
> > + return NULL;
> > +}
> > +
> > int xfrm4_rcv(struct sk_buff *skb)
> > {
> > return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);
> > --
> > 2.30.2
> >
Powered by blists - more mailing lists