lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Sun, 02 Oct 2022 16:13:51 +0200
From:   Paolo Abeni <pabeni@...hat.com>
To:     Eric Dumazet <eric.dumazet@...il.com>,
        "David S . Miller" <davem@...emloft.net>,
        Jakub Kicinski <kuba@...nel.org>
Cc:     netdev <netdev@...r.kernel.org>,
        Eric Dumazet <edumazet@...gle.com>,
        Coco Li <lixiaoyan@...gle.com>
Subject: Re: [PATCH v2 net-next] gro: add support of (hw)gro packets to gro
 stack

On Fri, 2022-09-30 at 15:09 -0700, Eric Dumazet wrote:
> From: Coco Li <lixiaoyan@...gle.com>
> 
> Current GRO stack only supports incoming packets containing
> one frame/MSS.
> 
> This patch changes GRO to accept packets that are already GRO.
> 
> HW-GRO (aka RSC for some vendors) is very often limited in presence
> of interleaved packets. Linux SW GRO stack can complete the job
> and provide larger GRO packets, thus reducing rate of ACK packets
> and cpu overhead.
> 
> This also means BIG TCP can still be used, even if HW-GRO/RSC was
> able to cook ~64 KB GRO packets.
> 
> v2: fix logic in tcp_gro_receive()
> 
>     Only support TCP for the moment (Paolo)
> 
> Co-Developed-by: Eric Dumazet <edumazet@...gle.com>
> Signed-off-by: Eric Dumazet <edumazet@...gle.com>
> Signed-off-by: Coco Li <lixiaoyan@...gle.com>
> ---
>  net/core/gro.c         | 18 ++++++++++++++----
>  net/ipv4/tcp_offload.c | 17 +++++++++++++++--
>  2 files changed, 29 insertions(+), 6 deletions(-)
> 
> diff --git a/net/core/gro.c b/net/core/gro.c
> index b4190eb084672fb4f2be8b437eccb4e8507ff63f..bc9451743307bc380cca96ae6995aa0a3b83d185 100644
> --- a/net/core/gro.c
> +++ b/net/core/gro.c
> @@ -160,6 +160,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
>  	unsigned int gro_max_size;
>  	unsigned int new_truesize;
>  	struct sk_buff *lp;
> +	int segs;
>  
>  	/* pairs with WRITE_ONCE() in netif_set_gro_max_size() */
>  	gro_max_size = READ_ONCE(p->dev->gro_max_size);
> @@ -175,6 +176,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
>  			return -E2BIG;
>  	}
>  
> +	segs = NAPI_GRO_CB(skb)->count;
>  	lp = NAPI_GRO_CB(p)->last;
>  	pinfo = skb_shinfo(lp);
>  
> @@ -265,7 +267,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
>  	lp = p;
>  
>  done:
> -	NAPI_GRO_CB(p)->count++;
> +	NAPI_GRO_CB(p)->count += segs;
>  	p->data_len += len;
>  	p->truesize += delta_truesize;
>  	p->len += len;
> @@ -496,8 +498,15 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
>  		BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed),
>  					 sizeof(u32))); /* Avoid slow unaligned acc */
>  		*(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0;
> -		NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb);
> +		NAPI_GRO_CB(skb)->flush = skb_has_frag_list(skb);
>  		NAPI_GRO_CB(skb)->is_atomic = 1;
> +		NAPI_GRO_CB(skb)->count = 1;
> +		if (unlikely(skb_is_gso(skb))) {
> +			NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs;
> +			/* Only support TCP at the moment. */
> +			if (!skb_is_gso_tcp(skb))
> +				NAPI_GRO_CB(skb)->flush = 1;
> +		}
>  
>  		/* Setup for GRO checksum validation */
>  		switch (skb->ip_summed) {
> @@ -545,10 +554,10 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
>  	else
>  		gro_list->count++;
>  
> -	NAPI_GRO_CB(skb)->count = 1;
>  	NAPI_GRO_CB(skb)->age = jiffies;
>  	NAPI_GRO_CB(skb)->last = skb;
> -	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
> +	if (!skb_is_gso(skb))
> +		skb_shinfo(skb)->gso_size = skb_gro_len(skb);
>  	list_add(&skb->list, &gro_list->list);
>  	ret = GRO_HELD;
>  
> @@ -660,6 +669,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
>  
>  	skb->encapsulation = 0;
>  	skb_shinfo(skb)->gso_type = 0;
> +	skb_shinfo(skb)->gso_size = 0;
>  	if (unlikely(skb->slow_gro)) {
>  		skb_orphan(skb);
>  		skb_ext_reset(skb);
> diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
> index a844a0d38482d916251f3aca4555c75c9770820c..45dda788938704c3f762256266d9ea29b6ded4a5 100644
> --- a/net/ipv4/tcp_offload.c
> +++ b/net/ipv4/tcp_offload.c
> @@ -255,7 +255,15 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
>  
>  	mss = skb_shinfo(p)->gso_size;
>  
> -	flush |= (len - 1) >= mss;
> +	/* If skb is a GRO packet, make sure its gso_size matches prior packet mss.
> +	 * If it is a single frame, do not aggregate it if its length
> +	 * is bigger than our mss.
> +	 */
> +	if (unlikely(skb_is_gso(skb)))
> +		flush |= (mss != skb_shinfo(skb)->gso_size);
> +	else
> +		flush |= (len - 1) >= mss;
> +
>  	flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
>  #ifdef CONFIG_TLS_DEVICE
>  	flush |= p->decrypted ^ skb->decrypted;
> @@ -269,7 +277,12 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
>  	tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
>  
>  out_check_final:
> -	flush = len < mss;
> +	/* Force a flush if last segment is smaller than mss. */
> +	if (unlikely(skb_is_gso(skb)))
> +		flush = len != NAPI_GRO_CB(skb)->count * skb_shinfo(skb)->gso_size;
> +	else
> +		flush = len < mss;
> +
>  	flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH |
>  					TCP_FLAG_RST | TCP_FLAG_SYN |
>  					TCP_FLAG_FIN));

LGTM, thanks!

Acked-by: Paolo Abeni <pabeni@...hat.com>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ