lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260116072945.125661-1-tao03.wang@horizon.auto>
Date: Fri, 16 Jan 2026 15:29:45 +0800
From: Tao Wang <tao03.wang@...izon.auto>
To: <linux@...linux.org.uk>
CC: <alexandre.torgue@...s.st.com>, <andrew+netdev@...n.ch>,
	<davem@...emloft.net>, <edumazet@...gle.com>, <horms@...nel.org>,
	<kuba@...nel.org>, <linux-doc@...r.kernel.org>,
	<linux-kernel@...r.kernel.org>, <maxime.chevallier@...tlin.com>,
	<mcoquelin.stm32@...il.com>, <netdev@...r.kernel.org>, <pabeni@...hat.com>,
	<tao03.wang@...izon.auto>
Subject: Re: Re: [PATCH net v2] net: stmmac: fix transmit queue timed out after resume

> > > While I agree with the change for stmmac_tso_xmit(), please explain why
> > > the change in stmmac_free_tx_buffer() is necessary.
> > >
> > > It seems to me that if this is missing in stmmac_free_tx_buffer(), the
> > > driver should have more problems than just TSO.
> > 
> > The change in stmmac_free_tx_buffer() is intended to be generic for all
> > users of last_segment, not only for the TSO path.
> 
> However, transmit is a hotpath, so work needs to be minimised for good
> performance. We don't want anything that is unnecessary in these paths.
> 
> If we always explicitly set .last_segment when adding any packet to the
> ring, then there is absolutely no need to also do so when freeing them.
> 
> Also, I think there's a similar issue with .is_jumbo.
> 
> So, I think it would make more sense to have some helpers for setting
> up the tx_skbuff_dma entry. Maybe something like the below? I'll see
> if I can measure the performance impact of this later today, but I
> can't guarantee I'll get to that.
> 
> The idea here is to ensure that all members with the exception of
> xsk_meta are fully initialised when an entry is populated.
> 
> I haven't removed anything in the tx_q->tx_skbuff_dma entry release
> path yet, but with this in place, we should be able to eliminate the
> clearance of these in stmmac_tx_clean() and stmmac_free_tx_buffer().
> 
> Note that the driver assumes setting .buf to zero means the entry is
> cleared. dma_addr_t is a cookie which is device specific, and zero
> may be a valid DMA cookie. Only DMA_MAPPING_ERROR is invalid, and
> can be assumed to hold any meaning in driver code. So that needs
> fixing as well.
> 
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> index a8a78fe7d01f..0e605d0f6a94 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> @@ -1874,6 +1874,34 @@ static int init_dma_rx_desc_rings(struct net_device *dev,
>  	return ret;
>  }
>  
> +static void stmmac_set_tx_dma_entry(struct stmmac_tx_queue *tx_q,
> +				    unsigned int entry,
> +				    enum stmmac_txbuf_type type,
> +				    dma_addr_t addr, size_t len,
> +				    bool map_as_page)
> +{
> +	tx_q->tx_skbuff_dma[entry].buf = addr;
> +	tx_q->tx_skbuff_dma[entry].len = len;
> +	tx_q->tx_skbuff_dma[entry].buf_type = type;
> +	tx_q->tx_skbuff_dma[entry].map_as_page = map_as_page;
> +	tx_q->tx_skbuff_dma[entry].last_segment = false;
> +	tx_q->tx_skbuff_dma[entry].is_jumbo = false;
> +}
> +
> +static void stmmac_set_tx_skb_dma_entry(struct stmmac_tx_queue *tx_q,
> +					unsigned int entry, dma_addr_t addr,
> +					size_t len, bool map_as_page)
> +{
> +	stmmac_set_tx_dma_entry(tx_q, entry, STMMAC_TXBUF_T_SKB, addr, len,
> +				map_as_page);
> +}
> +
> +static void stmmac_set_tx_dma_last_segment(struct stmmac_tx_queue *tx_q,
> +					   unsigned int entry)
> +{
> +	tx_q->tx_skbuff_dma[entry].last_segment = true;
> +}
> +
>  /**
>   * __init_dma_tx_desc_rings - init the TX descriptor ring (per queue)
>   * @priv: driver private structure
> @@ -1919,11 +1947,8 @@ static int __init_dma_tx_desc_rings(struct stmmac_priv *priv,
>  			p = tx_q->dma_tx + i;
>  
>  		stmmac_clear_desc(priv, p);
> +		stmmac_set_tx_skb_dma_entry(tx_q, i, 0, 0, false);
>  
> -		tx_q->tx_skbuff_dma[i].buf = 0;
> -		tx_q->tx_skbuff_dma[i].map_as_page = false;
> -		tx_q->tx_skbuff_dma[i].len = 0;
> -		tx_q->tx_skbuff_dma[i].last_segment = false;
>  		tx_q->tx_skbuff[i] = NULL;
>  	}
>  
> @@ -2649,19 +2674,15 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
>  		meta = xsk_buff_get_metadata(pool, xdp_desc.addr);
>  		xsk_buff_raw_dma_sync_for_device(pool, dma_addr, xdp_desc.len);
>  
> -		tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XSK_TX;
> -
>  		/* To return XDP buffer to XSK pool, we simple call
>  		 * xsk_tx_completed(), so we don't need to fill up
>  		 * 'buf' and 'xdpf'.
>  		 */
> -		tx_q->tx_skbuff_dma[entry].buf = 0;
> -		tx_q->xdpf[entry] = NULL;
> +		stmmac_set_tx_dma_entry(tx_q, entry, STMMAC_TXBUF_T_XSK_TX,
> +					0, xdp_desc.len, false);
> +		stmmac_set_tx_dma_last_segment(tx_q, entry);
>  
> -		tx_q->tx_skbuff_dma[entry].map_as_page = false;
> -		tx_q->tx_skbuff_dma[entry].len = xdp_desc.len;
> -		tx_q->tx_skbuff_dma[entry].last_segment = true;
> -		tx_q->tx_skbuff_dma[entry].is_jumbo = false;
> +		tx_q->xdpf[entry] = NULL;
>  
>  		stmmac_set_desc_addr(priv, tx_desc, dma_addr);
>  
> @@ -2836,6 +2857,9 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue,
>  			tx_q->tx_skbuff_dma[entry].map_as_page = false;
>  		}
>  
> +		/* This looks at tx_q->tx_skbuff_dma[tx_q->dirty_tx].is_jumbo
> +		 * and tx_q->tx_skbuff_dma[tx_q->dirty_tx].last_segment
> +		 */
>  		stmmac_clean_desc3(priv, tx_q, p);
>  
>  		tx_q->tx_skbuff_dma[entry].last_segment = false;
> @@ -4494,10 +4518,8 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
>  	 * this DMA buffer right after the DMA engine completely finishes the
>  	 * full buffer transmission.
>  	 */
> -	tx_q->tx_skbuff_dma[tx_q->cur_tx].buf = des;
> -	tx_q->tx_skbuff_dma[tx_q->cur_tx].len = skb_headlen(skb);
> -	tx_q->tx_skbuff_dma[tx_q->cur_tx].map_as_page = false;
> -	tx_q->tx_skbuff_dma[tx_q->cur_tx].buf_type = STMMAC_TXBUF_T_SKB;
> +	stmmac_set_tx_skb_dma_entry(tx_q, tx_q->cur_tx, des, skb_headlen(skb),
> +				    false);
>  
>  	/* Prepare fragments */
>  	for (i = 0; i < nfrags; i++) {
> @@ -4512,17 +4534,14 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
>  		stmmac_tso_allocator(priv, des, skb_frag_size(frag),
>  				     (i == nfrags - 1), queue);
>  
> -		tx_q->tx_skbuff_dma[tx_q->cur_tx].buf = des;
> -		tx_q->tx_skbuff_dma[tx_q->cur_tx].len = skb_frag_size(frag);
> -		tx_q->tx_skbuff_dma[tx_q->cur_tx].map_as_page = true;
> -		tx_q->tx_skbuff_dma[tx_q->cur_tx].buf_type = STMMAC_TXBUF_T_SKB;
> +		stmmac_set_tx_skb_dma_entry(tx_q, tx_q->cur_tx, des,
> +					    skb_frag_size(frag), true);
>  	}
>  
> -	tx_q->tx_skbuff_dma[tx_q->cur_tx].last_segment = true;
> +	stmmac_set_tx_dma_last_segment(tx_q, tx_q->cur_tx);
>  
>  	/* Only the last descriptor gets to point to the skb. */
>  	tx_q->tx_skbuff[tx_q->cur_tx] = skb;
> -	tx_q->tx_skbuff_dma[tx_q->cur_tx].buf_type = STMMAC_TXBUF_T_SKB;
>  
>  	/* Manage tx mitigation */
>  	tx_packets = (tx_q->cur_tx + 1) - first_tx;
> @@ -4774,23 +4793,18 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
>  		if (dma_mapping_error(priv->device, des))
>  			goto dma_map_err; /* should reuse desc w/o issues */
>  
> -		tx_q->tx_skbuff_dma[entry].buf = des;
> -
> +		stmmac_set_tx_skb_dma_entry(tx_q, entry, des, len, true);
>  		stmmac_set_desc_addr(priv, desc, des);
>  
> -		tx_q->tx_skbuff_dma[entry].map_as_page = true;
> -		tx_q->tx_skbuff_dma[entry].len = len;
> -		tx_q->tx_skbuff_dma[entry].last_segment = last_segment;
> -		tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_SKB;
> -
>  		/* Prepare the descriptor and set the own bit too */
>  		stmmac_prepare_tx_desc(priv, desc, 0, len, csum_insertion,
>  				priv->mode, 1, last_segment, skb->len);
>  	}
>  
> +	stmmac_set_tx_dma_last_segment(tx_q, entry);
> +
>  	/* Only the last descriptor gets to point to the skb. */
>  	tx_q->tx_skbuff[entry] = skb;
> -	tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_SKB;
>  
>  	/* According to the coalesce parameter the IC bit for the latest
>  	 * segment is reset and the timer re-started to clean the tx status.
> @@ -4869,14 +4883,13 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
>  		if (dma_mapping_error(priv->device, des))
>  			goto dma_map_err;
>  
> -		tx_q->tx_skbuff_dma[first_entry].buf = des;
> -		tx_q->tx_skbuff_dma[first_entry].buf_type = STMMAC_TXBUF_T_SKB;
> -		tx_q->tx_skbuff_dma[first_entry].map_as_page = false;
> +		stmmac_set_tx_skb_dma_entry(tx_q, first_entry, des, nopaged_len,
> +					    false);
>  
>  		stmmac_set_desc_addr(priv, first, des);
>  
> -		tx_q->tx_skbuff_dma[first_entry].len = nopaged_len;
> -		tx_q->tx_skbuff_dma[first_entry].last_segment = last_segment;
> +		if (last_segment)
> +			stmmac_set_tx_dma_last_segment(tx_q, first_entry);
>  
>  		if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
>  			     priv->hwts_tx_en)) {
> @@ -5064,6 +5077,7 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue,
>  	struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue];
>  	bool csum = !priv->plat->tx_queues_cfg[queue].coe_unsupported;
>  	unsigned int entry = tx_q->cur_tx;
> +	enum stmmac_txbuf_type buf_type;
>  	struct dma_desc *tx_desc;
>  	dma_addr_t dma_addr;
>  	bool set_ic;
> @@ -5091,7 +5105,7 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue,
>  		if (dma_mapping_error(priv->device, dma_addr))
>  			return STMMAC_XDP_CONSUMED;
>  
> -		tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XDP_NDO;
> +		buf_type = STMMAC_TXBUF_T_XDP_NDO;
>  	} else {
>  		struct page *page = virt_to_page(xdpf->data);
>  
> @@ -5100,14 +5114,12 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue,
>  		dma_sync_single_for_device(priv->device, dma_addr,
>  					   xdpf->len, DMA_BIDIRECTIONAL);
>  
> -		tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XDP_TX;
> +		buf_type = STMMAC_TXBUF_T_XDP_TX;
>  	}
>  
> -	tx_q->tx_skbuff_dma[entry].buf = dma_addr;
> -	tx_q->tx_skbuff_dma[entry].map_as_page = false;
> -	tx_q->tx_skbuff_dma[entry].len = xdpf->len;
> -	tx_q->tx_skbuff_dma[entry].last_segment = true;
> -	tx_q->tx_skbuff_dma[entry].is_jumbo = false;
> +	stmmac_set_tx_dma_entry(tx_q, entry, buf_type, dma_addr, xdpf->len,
> +				false);
> +	stmmac_set_tx_dma_last_segment(tx_q, entry);
>  
>  	tx_q->xdpf[entry] = xdpf;

Since the changes are relatively large, I suggest splitting them into 
a separate optimization patch.As I cannot validate the is_jumbo scenario, 
I have dropped the changes to stmmac_free_tx_buffer.I will submit a 
separate patch focusing only on fixing the TSO case.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ