lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aWZusgu160VJFqpT@lizhi-Precision-Tower-5810>
Date: Tue, 13 Jan 2026 11:11:30 -0500
From: Frank Li <Frank.li@....com>
To: Wei Fang <wei.fang@....com>
Cc: shenwei.wang@....com, xiaoning.wang@....com, andrew+netdev@...n.ch,
	davem@...emloft.net, edumazet@...gle.com, kuba@...nel.org,
	pabeni@...hat.com, ast@...nel.org, daniel@...earbox.net,
	hawk@...nel.org, john.fastabend@...il.com, sdf@...ichev.me,
	netdev@...r.kernel.org, linux-kernel@...r.kernel.org,
	imx@...ts.linux.dev, bpf@...r.kernel.org
Subject: Re: [PATCH net-next 05/11] net: fec: add fec_enet_rx_queue_xdp() for
 XDP path

On Tue, Jan 13, 2026 at 11:29:33AM +0800, Wei Fang wrote:
> Currently, the processing of XDP path packets and protocol stack packets
> are both mixed in fec_enet_rx_queue(), which makes the logic somewhat
> confusing and debugging more difficult. Furthermore, some logic is not
> needed by each other. For example, the kernel path does not need to call
> xdp_init_buff(), and XDP path does not support swap_buffer(), etc. This
> prevents XDP from achieving its maximum performance. Therefore, XDP path
> packets processing has been separated from fec_enet_rx_queue() by adding
> the fec_enet_rx_queue_xdp() function to optimize XDP path logic and
> improve XDP performance.
>
> The XDP performance on the iMX93 platform was compared before and after
> applying this patch. Detailed results are as follows and we can see the
> performance has been improved.
>
> Env: i.MX93, packet size 64 bytes including FCS, only single core and RX
> BD ring are used to receive packets, flow-control is off.
>
> Before the patch is applied:
> root@...93evk:~# ./xdp-bench tx eth0
> Summary                   396,868 rx/s                  0 err,drop/s
> Summary                   396,024 rx/s                  0 err,drop/s
> Summary                   402,105 rx/s                  0 err,drop/s
> Summary                   402,501 rx/s                  0 err,drop/s
>
> root@...93evk:~# ./xdp-bench drop eth0
> Summary                   684,781 rx/s                  0 err/s
> Summary                   675,746 rx/s                  0 err/s
> Summary                   667,000 rx/s                  0 err/s
> Summary                   667,960 rx/s                  0 err/s
>
> root@...93evk:~# ./xdp-bench pass eth0
> Summary                   208,552 rx/s                  0 err,drop/s
> Summary                   208,654 rx/s                  0 err,drop/s
> Summary                   208,502 rx/s                  0 err,drop/s
> Summary                   208,797 rx/s                  0 err,drop/s
>
> root@...93evk:~# ./xdp-bench redirect eth0 eth0
> eth0->eth0                311,210 rx/s                  0 err,drop/s      311,208 xmit/s
> eth0->eth0                310,808 rx/s                  0 err,drop/s      310,809 xmit/s
> eth0->eth0                311,340 rx/s                  0 err,drop/s      311,339 xmit/s
> eth0->eth0                312,030 rx/s                  0 err,drop/s      312,031 xmit/s
>
> After the patch is applied:
> root@...93evk:~# ./xdp-bench tx eth0
> Summary                   409,975 rx/s                  0 err,drop/s
> Summary                   411,073 rx/s                  0 err,drop/s
> Summary                   410,940 rx/s                  0 err,drop/s
> Summary                   407,818 rx/s                  0 err,drop/s
>
> root@...93evk:~# ./xdp-bench drop eth0
> Summary                   700,681 rx/s                  0 err/s
> Summary                   698,102 rx/s                  0 err/s
> Summary                   695,025 rx/s                  0 err/s
> Summary                   698,639 rx/s                  0 err/s
>
> root@...93evk:~# ./xdp-bench pass eth0
> Summary                   211,356 rx/s                  0 err,drop/s
> Summary                   210,629 rx/s                  0 err,drop/s
> Summary                   210,395 rx/s                  0 err,drop/s
> Summary                   210,884 rx/s                  0 err,drop/s
> `/-fec_enet_run_xdp
> root@...93evk:~# ./xdp-bench redirect eth0 eth0
> eth0->eth0                320,351 rx/s                  0 err,drop/s      320,348 xmit/s
> eth0->eth0                318,988 rx/s                  0 err,drop/s      318,988 xmit/s
> eth0->eth0                320,300 rx/s                  0 err,drop/s      320,306 xmit/s
> eth0->eth0                320,156 rx/s                  0 err,drop/s      320,150 xmit/s

Can you just keep 1 or 2 test result in commit message, and remove
"root@...93evk".

>
> Signed-off-by: Wei Fang <wei.fang@....com>
> ---
>  drivers/net/ethernet/freescale/fec_main.c | 300 ++++++++++++++--------
>  1 file changed, 189 insertions(+), 111 deletions(-)
>
> diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
> index 7e8ac9d2a5ff..0b114a68cd8e 100644
> --- a/drivers/net/ethernet/freescale/fec_main.c
> +++ b/drivers/net/ethernet/freescale/fec_main.c
> @@ -79,7 +79,7 @@ static void set_multicast_list(struct net_device *ndev);
>  static void fec_enet_itr_coal_set(struct net_device *ndev);
>  static int fec_enet_xdp_tx_xmit(struct fec_enet_private *fep,
>  				int cpu, struct xdp_buff *xdp,
> -				u32 dma_sync_len);
> +				u32 dma_sync_len, int queue);
>
>  #define DRIVER_NAME	"fec"
>
> @@ -1665,71 +1665,6 @@ static int fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq,
>  	return 0;
>  }
>
> -static u32
> -fec_enet_run_xdp(struct fec_enet_private *fep, struct bpf_prog *prog,
> -		 struct xdp_buff *xdp, struct fec_enet_priv_rx_q *rxq, int cpu)
> -{
> -	unsigned int sync, len = xdp->data_end - xdp->data;
> -	u32 ret = FEC_ENET_XDP_PASS;
> -	struct page *page;
> -	int err;
> -	u32 act;
> -
> -	act = bpf_prog_run_xdp(prog, xdp);
> -
> -	/* Due xdp_adjust_tail and xdp_adjust_head: DMA sync for_device cover
> -	 * max len CPU touch
> -	 */
> -	sync = xdp->data_end - xdp->data;
> -	sync = max(sync, len);
> -
> -	switch (act) {
> -	case XDP_PASS:
> -		rxq->stats[RX_XDP_PASS]++;
> -		ret = FEC_ENET_XDP_PASS;
> -		break;
> -
> -	case XDP_REDIRECT:
> -		rxq->stats[RX_XDP_REDIRECT]++;
> -		err = xdp_do_redirect(fep->netdev, xdp, prog);
> -		if (unlikely(err))
> -			goto xdp_err;
> -
> -		ret = FEC_ENET_XDP_REDIR;
> -		break;
> -
> -	case XDP_TX:
> -		rxq->stats[RX_XDP_TX]++;
> -		err = fec_enet_xdp_tx_xmit(fep, cpu, xdp, sync);
> -		if (unlikely(err)) {
> -			rxq->stats[RX_XDP_TX_ERRORS]++;
> -			goto xdp_err;
> -		}
> -
> -		ret = FEC_ENET_XDP_TX;
> -		break;
> -
> -	default:
> -		bpf_warn_invalid_xdp_action(fep->netdev, prog, act);
> -		fallthrough;
> -
> -	case XDP_ABORTED:
> -		fallthrough;    /* handle aborts by dropping packet */
> -
> -	case XDP_DROP:
> -		rxq->stats[RX_XDP_DROP]++;
> -xdp_err:
> -		ret = FEC_ENET_XDP_CONSUMED;
> -		page = virt_to_head_page(xdp->data);
> -		page_pool_put_page(rxq->page_pool, page, sync, true);
> -		if (act != XDP_DROP)
> -			trace_xdp_exception(fep->netdev, prog, act);
> -		break;
> -	}
> -
> -	return ret;
> -}
> -
>  static void fec_enet_rx_vlan(const struct net_device *ndev, struct sk_buff *skb)
>  {
>  	if (ndev->features & NETIF_F_HW_VLAN_CTAG_RX) {
> @@ -1839,26 +1774,20 @@ static struct sk_buff *fec_build_skb(struct fec_enet_private *fep,
>   * not been given to the system, we just set the empty indicator,
>   * effectively tossing the packet.
>   */
> -static int
> -fec_enet_rx_queue(struct net_device *ndev, u16 queue_id, int budget)
> +static int fec_enet_rx_queue(struct fec_enet_private *fep,
> +			     int queue, int budget)
>  {
> -	struct fec_enet_private *fep = netdev_priv(ndev);
> -	struct fec_enet_priv_rx_q *rxq;
> -	struct bufdesc *bdp;
> -	unsigned short status;
> -	struct  sk_buff *skb;
> -	ushort	pkt_len;
> -	int	pkt_received = 0;
> -	int	index = 0;
> -	bool	need_swap = fep->quirks & FEC_QUIRK_SWAP_FRAME;
> -	u32 data_start = FEC_ENET_XDP_HEADROOM + fep->rx_shift;
> -	struct bpf_prog *xdp_prog = READ_ONCE(fep->xdp_prog);
> -	u32 ret, xdp_result = FEC_ENET_XDP_PASS;
> +	struct fec_enet_priv_rx_q *rxq = fep->rx_queue[queue];
> +	bool need_swap = fep->quirks & FEC_QUIRK_SWAP_FRAME;
> +	struct net_device *ndev = fep->netdev;
> +	struct bufdesc *bdp = rxq->bd.cur;
>  	u32 sub_len = 4 + fep->rx_shift;
> -	int cpu = smp_processor_id();
> -	struct xdp_buff xdp;
> +	int pkt_received = 0;
> +	u16 status, pkt_len;
> +	struct sk_buff *skb;
>  	struct page *page;
> -	__fec32 cbd_bufaddr;
> +	dma_addr_t dma;
> +	int index;
>
>  #if defined(CONFIG_COLDFIRE) && !defined(CONFIG_COLDFIRE_COHERENT_DMA)
>  	/*
> @@ -1867,21 +1796,17 @@ fec_enet_rx_queue(struct net_device *ndev, u16 queue_id, int budget)
>  	 */
>  	flush_cache_all();
>  #endif
> -	rxq = fep->rx_queue[queue_id];
>
>  	/* First, grab all of the stats for the incoming packet.
>  	 * These get messed up if we get called due to a busy condition.
>  	 */
> -	bdp = rxq->bd.cur;
> -	xdp_init_buff(&xdp, PAGE_SIZE << fep->pagepool_order, &rxq->xdp_rxq);
> -

This patch is quite big, can you slip to more small ones.


>  	while (!((status = fec16_to_cpu(bdp->cbd_sc)) & BD_ENET_RX_EMPTY)) {
>
>  		if (pkt_received >= budget)
>  			break;
>  		pkt_received++;
>
> -		writel(FEC_ENET_RXF_GET(queue_id), fep->hwp + FEC_IEVENT);
> +		writel(FEC_ENET_RXF_GET(queue), fep->hwp + FEC_IEVENT);

you can keep queue_id or use small patch rename it.

>
>  		/* Check for errors. */
>  		status ^= BD_ENET_RX_LAST;
> @@ -1895,29 +1820,16 @@ fec_enet_rx_queue(struct net_device *ndev, u16 queue_id, int budget)
>
>  		index = fec_enet_get_bd_index(bdp, &rxq->bd);
>  		page = rxq->rx_buf[index];
> -		cbd_bufaddr = bdp->cbd_bufaddr;
> +		dma = fec32_to_cpu(bdp->cbd_bufaddr);
>  		if (fec_enet_update_cbd(rxq, bdp, index)) {
>  			ndev->stats.rx_dropped++;
>  			goto rx_processing_done;
>  		}
>
> -		dma_sync_single_for_cpu(&fep->pdev->dev,
> -					fec32_to_cpu(cbd_bufaddr),
> -					pkt_len,
> +		dma_sync_single_for_cpu(&fep->pdev->dev, dma, pkt_len,
>  					DMA_FROM_DEVICE);

the same here, Add local variable dma should be in new patch.

>  		prefetch(page_address(page));
>
> -		if (xdp_prog) {
> -			xdp_buff_clear_frags_flag(&xdp);
> -			/* subtract 16bit shift and FCS */
> -			xdp_prepare_buff(&xdp, page_address(page),
> -					 data_start, pkt_len - sub_len, false);
> -			ret = fec_enet_run_xdp(fep, xdp_prog, &xdp, rxq, cpu);
> -			xdp_result |= ret;
> -			if (ret != FEC_ENET_XDP_PASS)
> -				goto rx_processing_done;
> -		}
> -
>  		if (unlikely(need_swap)) {
>  			u8 *data;
>
> @@ -1964,9 +1876,171 @@ fec_enet_rx_queue(struct net_device *ndev, u16 queue_id, int budget)
>  		 */
>  		writel(0, rxq->bd.reg_desc_active);
>  	}
> +
> +	rxq->bd.cur = bdp;
> +
> +	return pkt_received;
> +}
> +
> +static void fec_xdp_drop(struct fec_enet_priv_rx_q *rxq,
> +			 struct xdp_buff *xdp, u32 sync)
> +{
> +	struct page *page = virt_to_head_page(xdp->data);
> +
> +	page_pool_put_page(rxq->page_pool, page, sync, true);
> +}
> +
> +static int fec_enet_rx_queue_xdp(struct fec_enet_private *fep, int queue,
> +				 int budget, struct bpf_prog *prog)
> +{

Or create new patch to duplicate TX and RX first. the changen rx part.

Frank

> +	u32 data_start = FEC_ENET_XDP_HEADROOM + fep->rx_shift;
> +	struct fec_enet_priv_rx_q *rxq = fep->rx_queue[queue];
> +	struct net_device *ndev = fep->netdev;
> +	struct bufdesc *bdp = rxq->bd.cur;
> +	u32 sub_len = 4 + fep->rx_shift;
> +	int cpu = smp_processor_id();
> +	int pkt_received = 0;
> +	struct sk_buff *skb;
> +	u16 status, pkt_len;
> +	struct xdp_buff xdp;
> +	struct page *page;
> +	u32 xdp_res = 0;
> +	dma_addr_t dma;
> +	int index, err;
> +	u32 act, sync;
> +
> +#if defined(CONFIG_COLDFIRE) && !defined(CONFIG_COLDFIRE_COHERENT_DMA)
> +	/*
> +	 * Hacky flush of all caches instead of using the DMA API for the TSO
> +	 * headers.
> +	 */
> +	flush_cache_all();
> +#endif
> +
> +	xdp_init_buff(&xdp, PAGE_SIZE << fep->pagepool_order, &rxq->xdp_rxq);
> +
> +	while (!((status = fec16_to_cpu(bdp->cbd_sc)) & BD_ENET_RX_EMPTY)) {
> +		if (pkt_received >= budget)
> +			break;
> +		pkt_received++;
> +
> +		writel(FEC_ENET_RXF_GET(queue), fep->hwp + FEC_IEVENT);
> +
> +		/* Check for errors. */
> +		status ^= BD_ENET_RX_LAST;
> +		if (unlikely(fec_rx_error_check(ndev, status)))
> +			goto rx_processing_done;
> +
> +		/* Process the incoming frame. */
> +		ndev->stats.rx_packets++;
> +		pkt_len = fec16_to_cpu(bdp->cbd_datlen);
> +		ndev->stats.rx_bytes += pkt_len - fep->rx_shift;
> +
> +		index = fec_enet_get_bd_index(bdp, &rxq->bd);
> +		page = rxq->rx_buf[index];
> +		dma = fec32_to_cpu(bdp->cbd_bufaddr);
> +
> +		if (fec_enet_update_cbd(rxq, bdp, index)) {
> +			ndev->stats.rx_dropped++;
> +			goto rx_processing_done;
> +		}
> +
> +		dma_sync_single_for_cpu(&fep->pdev->dev, dma, pkt_len,
> +					DMA_FROM_DEVICE);
> +		prefetch(page_address(page));
> +
> +		xdp_buff_clear_frags_flag(&xdp);
> +		/* subtract 16bit shift and FCS */
> +		pkt_len -= sub_len;
> +		xdp_prepare_buff(&xdp, page_address(page), data_start,
> +				 pkt_len, false);
> +
> +		act = bpf_prog_run_xdp(prog, &xdp);
> +		/* Due xdp_adjust_tail and xdp_adjust_head: DMA sync
> +		 * for_device cover max len CPU touch.
> +		 */
> +		sync = xdp.data_end - xdp.data;
> +		sync = max(sync, pkt_len);
> +
> +		switch (act) {
> +		case XDP_PASS:
> +			rxq->stats[RX_XDP_PASS]++;
> +			/* The packet length includes FCS, but we don't want to
> +			 * include that when passing upstream as it messes up
> +			 * bridging applications.
> +			 */
> +			skb = fec_build_skb(fep, rxq, bdp, page, pkt_len);
> +			if (!skb) {
> +				fec_xdp_drop(rxq, &xdp, sync);
> +				trace_xdp_exception(ndev, prog, XDP_PASS);
> +			} else {
> +				napi_gro_receive(&fep->napi, skb);
> +			}
> +			break;
> +		case XDP_REDIRECT:
> +			rxq->stats[RX_XDP_REDIRECT]++;
> +			err = xdp_do_redirect(ndev, &xdp, prog);
> +			if (unlikely(err)) {
> +				fec_xdp_drop(rxq, &xdp, sync);
> +				trace_xdp_exception(ndev, prog, XDP_REDIRECT);
> +			} else {
> +				xdp_res |= FEC_ENET_XDP_REDIR;
> +			}
> +			break;
> +		case XDP_TX:
> +			rxq->stats[RX_XDP_TX]++;
> +			err = fec_enet_xdp_tx_xmit(fep, cpu, &xdp, sync, queue);
> +			if (unlikely(err)) {
> +				rxq->stats[RX_XDP_TX_ERRORS]++;
> +				fec_xdp_drop(rxq, &xdp, sync);
> +				trace_xdp_exception(ndev, prog, XDP_TX);
> +			}
> +			break;
> +		default:
> +			bpf_warn_invalid_xdp_action(ndev, prog, act);
> +			fallthrough;
> +		case XDP_ABORTED:
> +			/* handle aborts by dropping packet */
> +			fallthrough;
> +		case XDP_DROP:
> +			rxq->stats[RX_XDP_DROP]++;
> +			fec_xdp_drop(rxq, &xdp, sync);
> +			break;
> +		}
> +
> +rx_processing_done:
> +		/* Clear the status flags for this buffer */
> +		status &= ~BD_ENET_RX_STATS;
> +		/* Mark the buffer empty */
> +		status |= BD_ENET_RX_EMPTY;
> +
> +		if (fep->bufdesc_ex) {
> +			struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
> +
> +			ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT);
> +			ebdp->cbd_prot = 0;
> +			ebdp->cbd_bdu = 0;
> +		}
> +
> +		/* Make sure the updates to rest of the descriptor are
> +		 * performed before transferring ownership.
> +		 */
> +		dma_wmb();
> +		bdp->cbd_sc = cpu_to_fec16(status);
> +
> +		/* Update BD pointer to next entry */
> +		bdp = fec_enet_get_nextdesc(bdp, &rxq->bd);
> +
> +		/* Doing this here will keep the FEC running while we process
> +		 * incoming frames. On a heavily loaded network, we should be
> +		 * able to keep up at the expense of system resources.
> +		 */
> +		writel(0, rxq->bd.reg_desc_active);
> +	}
> +
>  	rxq->bd.cur = bdp;
>
> -	if (xdp_result & FEC_ENET_XDP_REDIR)
> +	if (xdp_res & FEC_ENET_XDP_REDIR)
>  		xdp_do_flush();
>
>  	return pkt_received;
> @@ -1975,11 +2049,17 @@ fec_enet_rx_queue(struct net_device *ndev, u16 queue_id, int budget)
>  static int fec_enet_rx(struct net_device *ndev, int budget)
>  {
>  	struct fec_enet_private *fep = netdev_priv(ndev);
> +	struct bpf_prog *prog = READ_ONCE(fep->xdp_prog);
>  	int i, done = 0;
>
>  	/* Make sure that AVB queues are processed first. */
> -	for (i = fep->num_rx_queues - 1; i >= 0; i--)
> -		done += fec_enet_rx_queue(ndev, i, budget - done);
> +	for (i = fep->num_rx_queues - 1; i >= 0; i--) {
> +		if (prog)
> +			done += fec_enet_rx_queue_xdp(fep, i, budget - done,
> +						      prog);
> +		else
> +			done += fec_enet_rx_queue(fep, i, budget - done);
> +	}
>
>  	return done;
>  }
> @@ -3961,14 +4041,12 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
>
>  static int fec_enet_xdp_tx_xmit(struct fec_enet_private *fep,
>  				int cpu, struct xdp_buff *xdp,
> -				u32 dma_sync_len)
> +				u32 dma_sync_len, int queue)
>  {
> -	struct fec_enet_priv_tx_q *txq;
> +	struct fec_enet_priv_tx_q *txq = fep->tx_queue[queue];
>  	struct netdev_queue *nq;
> -	int queue, ret;
> +	int ret;
>
> -	queue = fec_enet_xdp_get_tx_queue(fep, cpu);
> -	txq = fep->tx_queue[queue];
>  	nq = netdev_get_tx_queue(fep->netdev, queue);
>
>  	__netif_tx_lock(nq, cpu);
> --
> 2.34.1
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ