[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <DEJKKYXTM4TH.2MK2CNLW7L5D3@bootlin.com>
Date: Thu, 27 Nov 2025 16:07:52 +0100
From: Théo Lebrun <theo.lebrun@...tlin.com>
To: "Paolo Valerio" <pvalerio@...hat.com>, <netdev@...r.kernel.org>
Cc: "Nicolas Ferre" <nicolas.ferre@...rochip.com>, "Claudiu Beznea"
<claudiu.beznea@...on.dev>, "Andrew Lunn" <andrew+netdev@...n.ch>, "David
S. Miller" <davem@...emloft.net>, "Eric Dumazet" <edumazet@...gle.com>,
"Jakub Kicinski" <kuba@...nel.org>, "Paolo Abeni" <pabeni@...hat.com>,
"Lorenzo Bianconi" <lorenzo@...nel.org>
Subject: Re: [PATCH RFC net-next 6/6] cadence: macb/gem: introduce xmit
support
Hello Paolo, netdev,
On Wed Nov 19, 2025 at 2:53 PM CET, Paolo Valerio wrote:
> Add XDP_TX verdict support, also introduce ndo_xdp_xmit function for
> redirection, and update macb_tx_unmap() to handle both skbs and xdp
> frames advertising NETDEV_XDP_ACT_NDO_XMIT capability and the ability
> to process XDP_TX verdicts.
>
> Signed-off-by: Paolo Valerio <pvalerio@...hat.com>
> ---
> drivers/net/ethernet/cadence/macb_main.c | 166 +++++++++++++++++++++--
> 1 file changed, 153 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
> index eeda1a3871a6..bd62d3febeb1 100644
> --- a/drivers/net/ethernet/cadence/macb_main.c
> +++ b/drivers/net/ethernet/cadence/macb_main.c
> @@ -969,6 +969,17 @@ static int macb_halt_tx(struct macb *bp)
> bp, TSR);
> }
>
> +static void release_buff(void *buff, enum macb_tx_buff_type type, int budget)
> +{
> + if (type == MACB_TYPE_SKB) {
> + napi_consume_skb(buff, budget);
> + } else if (type == MACB_TYPE_XDP_TX) {
> + xdp_return_frame_rx_napi(buff);
> + } else {
> + xdp_return_frame(buff);
> + }
> +}
> +
> static void macb_tx_unmap(struct macb *bp, struct macb_tx_buff *tx_buff,
> int budget)
> {
> @@ -983,10 +994,7 @@ static void macb_tx_unmap(struct macb *bp, struct macb_tx_buff *tx_buff,
> }
>
> if (tx_buff->data) {
> - if (tx_buff->type != MACB_TYPE_SKB)
> - netdev_err(bp->dev, "BUG: Unexpected tx buffer type while unmapping (%d)",
> - tx_buff->type);
> - napi_consume_skb(tx_buff->data, budget);
> + release_buff(tx_buff->data, tx_buff->type, budget);
> tx_buff->data = NULL;
> }
> }
> @@ -1076,8 +1084,8 @@ static void macb_tx_error_task(struct work_struct *work)
> tx_buff = macb_tx_buff(queue, tail);
>
> if (tx_buff->type != MACB_TYPE_SKB)
> - netdev_err(bp->dev, "BUG: Unexpected tx buffer type (%d)",
> - tx_buff->type);
> + goto unmap;
> +
> skb = tx_buff->data;
>
> if (ctrl & MACB_BIT(TX_USED)) {
> @@ -1118,6 +1126,7 @@ static void macb_tx_error_task(struct work_struct *work)
> desc->ctrl = ctrl | MACB_BIT(TX_USED);
> }
>
> +unmap:
> macb_tx_unmap(bp, tx_buff, 0);
> }
>
> @@ -1196,6 +1205,7 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
> spin_lock_irqsave(&queue->tx_ptr_lock, flags);
> head = queue->tx_head;
> for (tail = queue->tx_tail; tail != head && packets < budget; tail++) {
> + void *data = NULL;
> struct macb_tx_buff *tx_buff;
> struct sk_buff *skb;
> struct macb_dma_desc *desc;
> @@ -1218,11 +1228,16 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
> for (;; tail++) {
> tx_buff = macb_tx_buff(queue, tail);
>
> - if (tx_buff->type == MACB_TYPE_SKB)
> - skb = tx_buff->data;
> + if (tx_buff->type != MACB_TYPE_SKB) {
> + data = tx_buff->data;
> + goto unmap;
> + }
>
> /* First, update TX stats if needed */
> - if (skb) {
> + if (tx_buff->type == MACB_TYPE_SKB && tx_buff->data) {
> + data = tx_buff->data;
> + skb = tx_buff->data;
> +
> if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
> !ptp_one_step_sync(skb))
> gem_ptp_do_txstamp(bp, skb, desc);
> @@ -1238,6 +1253,7 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
> bytes += skb->len;
> }
>
> +unmap:
> /* Now we can safely release resources */
> macb_tx_unmap(bp, tx_buff, budget);
>
> @@ -1245,7 +1261,7 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
> * WARNING: at this point skb has been freed by
> * macb_tx_unmap().
> */
> - if (skb)
> + if (data)
> break;
> }
> }
> @@ -1357,8 +1373,124 @@ static void discard_partial_frame(struct macb_queue *queue, unsigned int begin,
> */
> }
>
> +static int macb_xdp_submit_frame(struct macb *bp, struct xdp_frame *xdpf,
> + struct net_device *dev, dma_addr_t addr)
> +{
> + enum macb_tx_buff_type buff_type;
> + struct macb_tx_buff *tx_buff;
> + int cpu = smp_processor_id();
> + struct macb_dma_desc *desc;
> + struct macb_queue *queue;
> + unsigned long flags;
> + dma_addr_t mapping;
> + u16 queue_index;
> + int err = 0;
> + u32 ctrl;
> +
> + queue_index = cpu % bp->num_queues;
> + queue = &bp->queues[queue_index];
> + buff_type = !addr ? MACB_TYPE_XDP_NDO : MACB_TYPE_XDP_TX;
I am not the biggest fan of piggy-backing on !!addr to know which
codepath called us. If the macb_xdp_submit_frame() call in gem_xdp_run()
ever gives an addr=0 coming from macb_get_addr(bp, desc), then we will
be submitting NDO typed frames and creating additional DMA mappings
which would be a really hard to debug bug.
> + spin_lock_irqsave(&queue->tx_ptr_lock, flags);
> +
> + /* This is a hard error, log it. */
> + if (CIRC_SPACE(queue->tx_head, queue->tx_tail,
> + bp->tx_ring_size) < 1) {
Hard wrapped line is not required, it fits in one line.
> + netif_stop_subqueue(dev, queue_index);
> + netdev_dbg(bp->dev, "tx_head = %u, tx_tail = %u\n",
> + queue->tx_head, queue->tx_tail);
> + err = -ENOMEM;
> + goto unlock;
> + }
> +
> + if (!addr) {
> + mapping = dma_map_single(&bp->pdev->dev,
> + xdpf->data,
> + xdpf->len, DMA_TO_DEVICE);
> + if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
> + err = -ENOMEM;
> + goto unlock;
> + }
> + } else {
> + mapping = addr;
> + dma_sync_single_for_device(&bp->pdev->dev, mapping,
> + xdpf->len, DMA_BIDIRECTIONAL);
> + }
> +
> + unsigned int tx_head = queue->tx_head + 1;
Middle scope variable definition. Weirdly named as it isn't storing the
current head offset but the future head offset.
> + ctrl = MACB_BIT(TX_USED);
> + desc = macb_tx_desc(queue, tx_head);
> + desc->ctrl = ctrl;
> +
> + desc = macb_tx_desc(queue, queue->tx_head);
> + tx_buff = macb_tx_buff(queue, queue->tx_head);
> + tx_buff->data = xdpf;
> + tx_buff->type = buff_type;
> + tx_buff->mapping = mapping;
> + tx_buff->size = xdpf->len;
> + tx_buff->mapped_as_page = false;
> +
> + ctrl = (u32)tx_buff->size;
> + ctrl |= MACB_BIT(TX_LAST);
> +
> + if (unlikely(macb_tx_ring_wrap(bp, queue->tx_head) == (bp->tx_ring_size - 1)))
> + ctrl |= MACB_BIT(TX_WRAP);
> +
> + /* Set TX buffer descriptor */
> + macb_set_addr(bp, desc, tx_buff->mapping);
> + /* desc->addr must be visible to hardware before clearing
> + * 'TX_USED' bit in desc->ctrl.
> + */
> + wmb();
> + desc->ctrl = ctrl;
> + queue->tx_head = tx_head;
> +
> + /* Make newly initialized descriptor visible to hardware */
> + wmb();
> +
> + spin_lock(&bp->lock);
> + macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
> + spin_unlock(&bp->lock);
> +
> + if (CIRC_SPACE(queue->tx_head, queue->tx_tail, bp->tx_ring_size) < 1)
> + netif_stop_subqueue(dev, queue_index);
The above 30~40 lines are super similar to macb_start_xmit() &
macb_tx_map(). They implement almost the same logic; can we avoid the
duplication?
> +
> +unlock:
> + spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);
> +
> + if (err)
> + release_buff(xdpf, buff_type, 0);
> +
> + return err;
> +}
> +
> +static int
> +macb_xdp_xmit(struct net_device *dev, int num_frame,
> + struct xdp_frame **frames, u32 flags)
> +{
> + struct macb *bp = netdev_priv(dev);
> + u32 xmitted = 0;
> + int i;
> +
> + if (!macb_is_gem(bp))
> + return -EOPNOTSUPP;
> +
> + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
> + return -EINVAL;
> +
> + for (i = 0; i < num_frame; i++) {
> + if (macb_xdp_submit_frame(bp, frames[i], dev, 0))
> + break;
> +
> + xmitted++;
> + }
> +
> + return xmitted;
> +}
> +
> static u32 gem_xdp_run(struct macb_queue *queue, struct xdp_buff *xdp,
> - struct net_device *dev)
> + struct net_device *dev, dma_addr_t addr)
> {
> struct bpf_prog *prog;
> u32 act = XDP_PASS;
> @@ -1379,6 +1511,12 @@ static u32 gem_xdp_run(struct macb_queue *queue, struct xdp_buff *xdp,
> break;
> }
> goto out;
> + case XDP_TX:
> + struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
> +
> + if (!xdpf || macb_xdp_submit_frame(queue->bp, xdpf, dev, addr))
> + act = XDP_DROP;
> + goto out;
> default:
> bpf_warn_invalid_xdp_action(dev, prog, act);
> fallthrough;
> @@ -1467,7 +1605,7 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
> false);
> xdp_buff_clear_frags_flag(&xdp);
>
> - ret = gem_xdp_run(queue, &xdp, bp->dev);
> + ret = gem_xdp_run(queue, &xdp, bp->dev, addr);
> if (ret == XDP_REDIRECT)
> xdp_flush = true;
>
> @@ -4546,6 +4684,7 @@ static const struct net_device_ops macb_netdev_ops = {
> .ndo_hwtstamp_get = macb_hwtstamp_get,
> .ndo_setup_tc = macb_setup_tc,
> .ndo_bpf = macb_xdp,
> + .ndo_xdp_xmit = macb_xdp_xmit,
I'd expect macb_xdp_xmit() to be called gem_xdp_xmit() as well.
Thanks,
--
Théo Lebrun, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com
Powered by blists - more mailing lists