lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <878qfkzt2h.fsf@redhat.com>
Date: Tue, 02 Dec 2025 18:34:14 +0100
From: Paolo Valerio <pvalerio@...hat.com>
To: Théo Lebrun <theo.lebrun@...tlin.com>,
 netdev@...r.kernel.org
Cc: Nicolas Ferre <nicolas.ferre@...rochip.com>, Claudiu Beznea
 <claudiu.beznea@...on.dev>, Andrew Lunn <andrew+netdev@...n.ch>, "David
 S. Miller" <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>, Jakub
 Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>, Lorenzo
 Bianconi <lorenzo@...nel.org>
Subject: Re: [PATCH RFC net-next 6/6] cadence: macb/gem: introduce xmit support

On 27 Nov 2025 at 04:07:52 PM, Théo Lebrun <theo.lebrun@...tlin.com> wrote:

> Hello Paolo, netdev,
>
> On Wed Nov 19, 2025 at 2:53 PM CET, Paolo Valerio wrote:
>> Add XDP_TX verdict support, also introduce ndo_xdp_xmit function for
>> redirection, and update macb_tx_unmap() to handle both skbs and xdp
>> frames advertising NETDEV_XDP_ACT_NDO_XMIT capability and the ability
>> to process XDP_TX verdicts.
>>
>> Signed-off-by: Paolo Valerio <pvalerio@...hat.com>
>> ---
>>  drivers/net/ethernet/cadence/macb_main.c | 166 +++++++++++++++++++++--
>>  1 file changed, 153 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
>> index eeda1a3871a6..bd62d3febeb1 100644
>> --- a/drivers/net/ethernet/cadence/macb_main.c
>> +++ b/drivers/net/ethernet/cadence/macb_main.c
>> @@ -969,6 +969,17 @@ static int macb_halt_tx(struct macb *bp)
>>  					bp, TSR);
>>  }
>>  
>> +static void release_buff(void *buff, enum macb_tx_buff_type type, int budget)
>> +{
>> +	if (type == MACB_TYPE_SKB) {
>> +		napi_consume_skb(buff, budget);
>> +	} else if (type == MACB_TYPE_XDP_TX) {
>> +		xdp_return_frame_rx_napi(buff);
>> +	} else {
>> +		xdp_return_frame(buff);
>> +	}
>> +}
>> +
>>  static void macb_tx_unmap(struct macb *bp, struct macb_tx_buff *tx_buff,
>>  			  int budget)
>>  {
>> @@ -983,10 +994,7 @@ static void macb_tx_unmap(struct macb *bp, struct macb_tx_buff *tx_buff,
>>  	}
>>  
>>  	if (tx_buff->data) {
>> -		if (tx_buff->type != MACB_TYPE_SKB)
>> -			netdev_err(bp->dev, "BUG: Unexpected tx buffer type while unmapping (%d)",
>> -				   tx_buff->type);
>> -		napi_consume_skb(tx_buff->data, budget);
>> +		release_buff(tx_buff->data, tx_buff->type, budget);
>>  		tx_buff->data = NULL;
>>  	}
>>  }
>> @@ -1076,8 +1084,8 @@ static void macb_tx_error_task(struct work_struct *work)
>>  		tx_buff = macb_tx_buff(queue, tail);
>>  
>>  		if (tx_buff->type != MACB_TYPE_SKB)
>> -			netdev_err(bp->dev, "BUG: Unexpected tx buffer type (%d)",
>> -				   tx_buff->type);
>> +			goto unmap;
>> +
>>  		skb = tx_buff->data;
>>  
>>  		if (ctrl & MACB_BIT(TX_USED)) {
>> @@ -1118,6 +1126,7 @@ static void macb_tx_error_task(struct work_struct *work)
>>  			desc->ctrl = ctrl | MACB_BIT(TX_USED);
>>  		}
>>  
>> +unmap:
>>  		macb_tx_unmap(bp, tx_buff, 0);
>>  	}
>>  
>> @@ -1196,6 +1205,7 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
>>  	spin_lock_irqsave(&queue->tx_ptr_lock, flags);
>>  	head = queue->tx_head;
>>  	for (tail = queue->tx_tail; tail != head && packets < budget; tail++) {
>> +		void			*data = NULL;
>>  		struct macb_tx_buff	*tx_buff;
>>  		struct sk_buff		*skb;
>>  		struct macb_dma_desc	*desc;
>> @@ -1218,11 +1228,16 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
>>  		for (;; tail++) {
>>  			tx_buff = macb_tx_buff(queue, tail);
>>  
>> -			if (tx_buff->type == MACB_TYPE_SKB)
>> -				skb = tx_buff->data;
>> +			if (tx_buff->type != MACB_TYPE_SKB) {
>> +				data = tx_buff->data;
>> +				goto unmap;
>> +			}
>>  
>>  			/* First, update TX stats if needed */
>> -			if (skb) {
>> +			if (tx_buff->type == MACB_TYPE_SKB && tx_buff->data) {
>> +				data = tx_buff->data;
>> +				skb = tx_buff->data;
>> +
>>  				if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
>>  				    !ptp_one_step_sync(skb))
>>  					gem_ptp_do_txstamp(bp, skb, desc);
>> @@ -1238,6 +1253,7 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
>>  				bytes += skb->len;
>>  			}
>>  
>> +unmap:
>>  			/* Now we can safely release resources */
>>  			macb_tx_unmap(bp, tx_buff, budget);
>>  
>> @@ -1245,7 +1261,7 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
>>  			 * WARNING: at this point skb has been freed by
>>  			 * macb_tx_unmap().
>>  			 */
>> -			if (skb)
>> +			if (data)
>>  				break;
>>  		}
>>  	}
>> @@ -1357,8 +1373,124 @@ static void discard_partial_frame(struct macb_queue *queue, unsigned int begin,
>>  	 */
>>  }
>>  
>> +static int macb_xdp_submit_frame(struct macb *bp, struct xdp_frame *xdpf,
>> +				 struct net_device *dev, dma_addr_t addr)
>> +{
>> +	enum macb_tx_buff_type buff_type;
>> +	struct macb_tx_buff *tx_buff;
>> +	int cpu = smp_processor_id();
>> +	struct macb_dma_desc *desc;
>> +	struct macb_queue *queue;
>> +	unsigned long flags;
>> +	dma_addr_t mapping;
>> +	u16 queue_index;
>> +	int err = 0;
>> +	u32 ctrl;
>> +
>> +	queue_index = cpu % bp->num_queues;
>> +	queue = &bp->queues[queue_index];
>> +	buff_type = !addr ? MACB_TYPE_XDP_NDO : MACB_TYPE_XDP_TX;
>
> I am not the biggest fan of piggy-backing on !!addr to know which
> codepath called us. If the macb_xdp_submit_frame() call in gem_xdp_run()
> ever gives an addr=0 coming from macb_get_addr(bp, desc), then we will
> be submitting NDO typed frames and creating additional DMA mappings
> which would be a really hard to debug bug.
>

I guess we can add a separate boolean, WDYT?

>> +	spin_lock_irqsave(&queue->tx_ptr_lock, flags);
>> +
>> +	/* This is a hard error, log it. */
>> +	if (CIRC_SPACE(queue->tx_head, queue->tx_tail,
>> +		       bp->tx_ring_size) < 1) {
>
> Hard wrapped line is not required, it fits in one line.
>

ack to this and all the remaning ones

>> +		netif_stop_subqueue(dev, queue_index);
>> +		netdev_dbg(bp->dev, "tx_head = %u, tx_tail = %u\n",
>> +			   queue->tx_head, queue->tx_tail);
>> +		err = -ENOMEM;
>> +		goto unlock;
>> +	}
>> +
>> +	if (!addr) {
>> +		mapping = dma_map_single(&bp->pdev->dev,
>> +					 xdpf->data,
>> +					 xdpf->len, DMA_TO_DEVICE);
>> +		if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
>> +			err = -ENOMEM;
>> +			goto unlock;
>> +		}
>> +	} else {
>> +		mapping = addr;
>> +		dma_sync_single_for_device(&bp->pdev->dev, mapping,
>> +					   xdpf->len, DMA_BIDIRECTIONAL);
>> +	}
>> +
>> +	unsigned int tx_head = queue->tx_head + 1;
>
> Middle scope variable definition. Weirdly named as it isn't storing the
> current head offset but the future head offset.
>
>> +	ctrl = MACB_BIT(TX_USED);
>> +	desc = macb_tx_desc(queue, tx_head);
>> +	desc->ctrl = ctrl;
>> +
>> +	desc = macb_tx_desc(queue, queue->tx_head);
>> +	tx_buff = macb_tx_buff(queue, queue->tx_head);
>> +	tx_buff->data = xdpf;
>> +	tx_buff->type = buff_type;
>> +	tx_buff->mapping = mapping;
>> +	tx_buff->size = xdpf->len;
>> +	tx_buff->mapped_as_page = false;
>> +
>> +	ctrl = (u32)tx_buff->size;
>> +	ctrl |= MACB_BIT(TX_LAST);
>> +
>> +	if (unlikely(macb_tx_ring_wrap(bp, queue->tx_head) == (bp->tx_ring_size - 1)))
>> +		ctrl |= MACB_BIT(TX_WRAP);
>> +
>> +	/* Set TX buffer descriptor */
>> +	macb_set_addr(bp, desc, tx_buff->mapping);
>> +	/* desc->addr must be visible to hardware before clearing
>> +	 * 'TX_USED' bit in desc->ctrl.
>> +	 */
>> +	wmb();
>> +	desc->ctrl = ctrl;
>> +	queue->tx_head = tx_head;
>> +
>> +	/* Make newly initialized descriptor visible to hardware */
>> +	wmb();
>> +
>> +	spin_lock(&bp->lock);
>> +	macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
>> +	spin_unlock(&bp->lock);
>> +
>> +	if (CIRC_SPACE(queue->tx_head, queue->tx_tail, bp->tx_ring_size) < 1)
>> +		netif_stop_subqueue(dev, queue_index);
>
> The above 30~40 lines are super similar to macb_start_xmit() &
> macb_tx_map(). They implement almost the same logic; can we avoid the
> duplication?
>
>> +
>> +unlock:
>> +	spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);
>> +
>> +	if (err)
>> +		release_buff(xdpf, buff_type, 0);
>> +
>> +	return err;
>> +}
>> +
>> +static int
>> +macb_xdp_xmit(struct net_device *dev, int num_frame,
>> +	      struct xdp_frame **frames, u32 flags)
>> +{
>> +	struct macb *bp = netdev_priv(dev);
>> +	u32 xmitted = 0;
>> +	int i;
>> +
>> +	if (!macb_is_gem(bp))
>> +		return -EOPNOTSUPP;
>> +
>> +	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
>> +		return -EINVAL;
>> +
>> +	for (i = 0; i < num_frame; i++) {
>> +		if (macb_xdp_submit_frame(bp, frames[i], dev, 0))
>> +			break;
>> +
>> +		xmitted++;
>> +	}
>> +
>> +	return xmitted;
>> +}
>> +
>>  static u32 gem_xdp_run(struct macb_queue *queue, struct xdp_buff *xdp,
>> -		       struct net_device *dev)
>> +		       struct net_device *dev, dma_addr_t addr)
>>  {
>>  	struct bpf_prog *prog;
>>  	u32 act = XDP_PASS;
>> @@ -1379,6 +1511,12 @@ static u32 gem_xdp_run(struct macb_queue *queue, struct xdp_buff *xdp,
>>  			break;
>>  		}
>>  		goto out;
>> +	case XDP_TX:
>> +		struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
>> +
>> +		if (!xdpf || macb_xdp_submit_frame(queue->bp, xdpf, dev, addr))
>> +			act = XDP_DROP;
>> +		goto out;
>>  	default:
>>  		bpf_warn_invalid_xdp_action(dev, prog, act);
>>  		fallthrough;
>> @@ -1467,7 +1605,7 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
>>  				 false);
>>  		xdp_buff_clear_frags_flag(&xdp);
>>  
>> -		ret = gem_xdp_run(queue, &xdp, bp->dev);
>> +		ret = gem_xdp_run(queue, &xdp, bp->dev, addr);
>>  		if (ret == XDP_REDIRECT)
>>  			xdp_flush = true;
>>  
>> @@ -4546,6 +4684,7 @@ static const struct net_device_ops macb_netdev_ops = {
>>  	.ndo_hwtstamp_get	= macb_hwtstamp_get,
>>  	.ndo_setup_tc		= macb_setup_tc,
>>  	.ndo_bpf		= macb_xdp,
>> +	.ndo_xdp_xmit		= macb_xdp_xmit,
>
> I'd expect macb_xdp_xmit() to be called gem_xdp_xmit() as well.
>
> Thanks,
>
> --
> Théo Lebrun, Bootlin
> Embedded Linux and Kernel engineering
> https://bootlin.com


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ