[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260116072945.125661-1-tao03.wang@horizon.auto>
Date: Fri, 16 Jan 2026 15:29:45 +0800
From: Tao Wang <tao03.wang@...izon.auto>
To: <linux@...linux.org.uk>
CC: <alexandre.torgue@...s.st.com>, <andrew+netdev@...n.ch>,
<davem@...emloft.net>, <edumazet@...gle.com>, <horms@...nel.org>,
<kuba@...nel.org>, <linux-doc@...r.kernel.org>,
<linux-kernel@...r.kernel.org>, <maxime.chevallier@...tlin.com>,
<mcoquelin.stm32@...il.com>, <netdev@...r.kernel.org>, <pabeni@...hat.com>,
<tao03.wang@...izon.auto>
Subject: Re: Re: [PATCH net v2] net: stmmac: fix transmit queue timed out after resume
> > > While I agree with the change for stmmac_tso_xmit(), please explain why
> > > the change in stmmac_free_tx_buffer() is necessary.
> > >
> > > It seems to me that if this is missing in stmmac_free_tx_buffer(), the
> > > driver should have more problems than just TSO.
> >
> > The change in stmmac_free_tx_buffer() is intended to be generic for all
> > users of last_segment, not only for the TSO path.
>
> However, transmit is a hotpath, so work needs to be minimised for good
> performance. We don't want anything that is unnecessary in these paths.
>
> If we always explicitly set .last_segment when adding any packet to the
> ring, then there is absolutely no need to also do so when freeing them.
>
> Also, I think there's a similar issue with .is_jumbo.
>
> So, I think it would make more sense to have some helpers for setting
> up the tx_skbuff_dma entry. Maybe something like the below? I'll see
> if I can measure the performance impact of this later today, but I
> can't guarantee I'll get to that.
>
> The idea here is to ensure that all members with the exception of
> xsk_meta are fully initialised when an entry is populated.
>
> I haven't removed anything in the tx_q->tx_skbuff_dma entry release
> path yet, but with this in place, we should be able to eliminate the
> clearance of these in stmmac_tx_clean() and stmmac_free_tx_buffer().
>
> Note that the driver assumes setting .buf to zero means the entry is
> cleared. dma_addr_t is a cookie which is device specific, and zero
> may be a valid DMA cookie. Only DMA_MAPPING_ERROR is invalid, and
> can be assumed to hold any meaning in driver code. So that needs
> fixing as well.
>
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> index a8a78fe7d01f..0e605d0f6a94 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> @@ -1874,6 +1874,34 @@ static int init_dma_rx_desc_rings(struct net_device *dev,
> return ret;
> }
>
> +static void stmmac_set_tx_dma_entry(struct stmmac_tx_queue *tx_q,
> + unsigned int entry,
> + enum stmmac_txbuf_type type,
> + dma_addr_t addr, size_t len,
> + bool map_as_page)
> +{
> + tx_q->tx_skbuff_dma[entry].buf = addr;
> + tx_q->tx_skbuff_dma[entry].len = len;
> + tx_q->tx_skbuff_dma[entry].buf_type = type;
> + tx_q->tx_skbuff_dma[entry].map_as_page = map_as_page;
> + tx_q->tx_skbuff_dma[entry].last_segment = false;
> + tx_q->tx_skbuff_dma[entry].is_jumbo = false;
> +}
> +
> +static void stmmac_set_tx_skb_dma_entry(struct stmmac_tx_queue *tx_q,
> + unsigned int entry, dma_addr_t addr,
> + size_t len, bool map_as_page)
> +{
> + stmmac_set_tx_dma_entry(tx_q, entry, STMMAC_TXBUF_T_SKB, addr, len,
> + map_as_page);
> +}
> +
> +static void stmmac_set_tx_dma_last_segment(struct stmmac_tx_queue *tx_q,
> + unsigned int entry)
> +{
> + tx_q->tx_skbuff_dma[entry].last_segment = true;
> +}
> +
> /**
> * __init_dma_tx_desc_rings - init the TX descriptor ring (per queue)
> * @priv: driver private structure
> @@ -1919,11 +1947,8 @@ static int __init_dma_tx_desc_rings(struct stmmac_priv *priv,
> p = tx_q->dma_tx + i;
>
> stmmac_clear_desc(priv, p);
> + stmmac_set_tx_skb_dma_entry(tx_q, i, 0, 0, false);
>
> - tx_q->tx_skbuff_dma[i].buf = 0;
> - tx_q->tx_skbuff_dma[i].map_as_page = false;
> - tx_q->tx_skbuff_dma[i].len = 0;
> - tx_q->tx_skbuff_dma[i].last_segment = false;
> tx_q->tx_skbuff[i] = NULL;
> }
>
> @@ -2649,19 +2674,15 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
> meta = xsk_buff_get_metadata(pool, xdp_desc.addr);
> xsk_buff_raw_dma_sync_for_device(pool, dma_addr, xdp_desc.len);
>
> - tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XSK_TX;
> -
> /* To return XDP buffer to XSK pool, we simple call
> * xsk_tx_completed(), so we don't need to fill up
> * 'buf' and 'xdpf'.
> */
> - tx_q->tx_skbuff_dma[entry].buf = 0;
> - tx_q->xdpf[entry] = NULL;
> + stmmac_set_tx_dma_entry(tx_q, entry, STMMAC_TXBUF_T_XSK_TX,
> + 0, xdp_desc.len, false);
> + stmmac_set_tx_dma_last_segment(tx_q, entry);
>
> - tx_q->tx_skbuff_dma[entry].map_as_page = false;
> - tx_q->tx_skbuff_dma[entry].len = xdp_desc.len;
> - tx_q->tx_skbuff_dma[entry].last_segment = true;
> - tx_q->tx_skbuff_dma[entry].is_jumbo = false;
> + tx_q->xdpf[entry] = NULL;
>
> stmmac_set_desc_addr(priv, tx_desc, dma_addr);
>
> @@ -2836,6 +2857,9 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue,
> tx_q->tx_skbuff_dma[entry].map_as_page = false;
> }
>
> + /* This looks at tx_q->tx_skbuff_dma[tx_q->dirty_tx].is_jumbo
> + * and tx_q->tx_skbuff_dma[tx_q->dirty_tx].last_segment
> + */
> stmmac_clean_desc3(priv, tx_q, p);
>
> tx_q->tx_skbuff_dma[entry].last_segment = false;
> @@ -4494,10 +4518,8 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
> * this DMA buffer right after the DMA engine completely finishes the
> * full buffer transmission.
> */
> - tx_q->tx_skbuff_dma[tx_q->cur_tx].buf = des;
> - tx_q->tx_skbuff_dma[tx_q->cur_tx].len = skb_headlen(skb);
> - tx_q->tx_skbuff_dma[tx_q->cur_tx].map_as_page = false;
> - tx_q->tx_skbuff_dma[tx_q->cur_tx].buf_type = STMMAC_TXBUF_T_SKB;
> + stmmac_set_tx_skb_dma_entry(tx_q, tx_q->cur_tx, des, skb_headlen(skb),
> + false);
>
> /* Prepare fragments */
> for (i = 0; i < nfrags; i++) {
> @@ -4512,17 +4534,14 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
> stmmac_tso_allocator(priv, des, skb_frag_size(frag),
> (i == nfrags - 1), queue);
>
> - tx_q->tx_skbuff_dma[tx_q->cur_tx].buf = des;
> - tx_q->tx_skbuff_dma[tx_q->cur_tx].len = skb_frag_size(frag);
> - tx_q->tx_skbuff_dma[tx_q->cur_tx].map_as_page = true;
> - tx_q->tx_skbuff_dma[tx_q->cur_tx].buf_type = STMMAC_TXBUF_T_SKB;
> + stmmac_set_tx_skb_dma_entry(tx_q, tx_q->cur_tx, des,
> + skb_frag_size(frag), true);
> }
>
> - tx_q->tx_skbuff_dma[tx_q->cur_tx].last_segment = true;
> + stmmac_set_tx_dma_last_segment(tx_q, tx_q->cur_tx);
>
> /* Only the last descriptor gets to point to the skb. */
> tx_q->tx_skbuff[tx_q->cur_tx] = skb;
> - tx_q->tx_skbuff_dma[tx_q->cur_tx].buf_type = STMMAC_TXBUF_T_SKB;
>
> /* Manage tx mitigation */
> tx_packets = (tx_q->cur_tx + 1) - first_tx;
> @@ -4774,23 +4793,18 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
> if (dma_mapping_error(priv->device, des))
> goto dma_map_err; /* should reuse desc w/o issues */
>
> - tx_q->tx_skbuff_dma[entry].buf = des;
> -
> + stmmac_set_tx_skb_dma_entry(tx_q, entry, des, len, true);
> stmmac_set_desc_addr(priv, desc, des);
>
> - tx_q->tx_skbuff_dma[entry].map_as_page = true;
> - tx_q->tx_skbuff_dma[entry].len = len;
> - tx_q->tx_skbuff_dma[entry].last_segment = last_segment;
> - tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_SKB;
> -
> /* Prepare the descriptor and set the own bit too */
> stmmac_prepare_tx_desc(priv, desc, 0, len, csum_insertion,
> priv->mode, 1, last_segment, skb->len);
> }
>
> + stmmac_set_tx_dma_last_segment(tx_q, entry);
> +
> /* Only the last descriptor gets to point to the skb. */
> tx_q->tx_skbuff[entry] = skb;
> - tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_SKB;
>
> /* According to the coalesce parameter the IC bit for the latest
> * segment is reset and the timer re-started to clean the tx status.
> @@ -4869,14 +4883,13 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
> if (dma_mapping_error(priv->device, des))
> goto dma_map_err;
>
> - tx_q->tx_skbuff_dma[first_entry].buf = des;
> - tx_q->tx_skbuff_dma[first_entry].buf_type = STMMAC_TXBUF_T_SKB;
> - tx_q->tx_skbuff_dma[first_entry].map_as_page = false;
> + stmmac_set_tx_skb_dma_entry(tx_q, first_entry, des, nopaged_len,
> + false);
>
> stmmac_set_desc_addr(priv, first, des);
>
> - tx_q->tx_skbuff_dma[first_entry].len = nopaged_len;
> - tx_q->tx_skbuff_dma[first_entry].last_segment = last_segment;
> + if (last_segment)
> + stmmac_set_tx_dma_last_segment(tx_q, first_entry);
>
> if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
> priv->hwts_tx_en)) {
> @@ -5064,6 +5077,7 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue,
> struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue];
> bool csum = !priv->plat->tx_queues_cfg[queue].coe_unsupported;
> unsigned int entry = tx_q->cur_tx;
> + enum stmmac_txbuf_type buf_type;
> struct dma_desc *tx_desc;
> dma_addr_t dma_addr;
> bool set_ic;
> @@ -5091,7 +5105,7 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue,
> if (dma_mapping_error(priv->device, dma_addr))
> return STMMAC_XDP_CONSUMED;
>
> - tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XDP_NDO;
> + buf_type = STMMAC_TXBUF_T_XDP_NDO;
> } else {
> struct page *page = virt_to_page(xdpf->data);
>
> @@ -5100,14 +5114,12 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue,
> dma_sync_single_for_device(priv->device, dma_addr,
> xdpf->len, DMA_BIDIRECTIONAL);
>
> - tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XDP_TX;
> + buf_type = STMMAC_TXBUF_T_XDP_TX;
> }
>
> - tx_q->tx_skbuff_dma[entry].buf = dma_addr;
> - tx_q->tx_skbuff_dma[entry].map_as_page = false;
> - tx_q->tx_skbuff_dma[entry].len = xdpf->len;
> - tx_q->tx_skbuff_dma[entry].last_segment = true;
> - tx_q->tx_skbuff_dma[entry].is_jumbo = false;
> + stmmac_set_tx_dma_entry(tx_q, entry, buf_type, dma_addr, xdpf->len,
> + false);
> + stmmac_set_tx_dma_last_segment(tx_q, entry);
>
> tx_q->xdpf[entry] = xdpf;
Since the changes are relatively large, I suggest splitting them into
a separate optimization patch.As I cannot validate the is_jumbo scenario,
I have dropped the changes to stmmac_free_tx_buffer.I will submit a
separate patch focusing only on fixing the TSO case.
Powered by blists - more mailing lists