[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aWZusgu160VJFqpT@lizhi-Precision-Tower-5810>
Date: Tue, 13 Jan 2026 11:11:30 -0500
From: Frank Li <Frank.li@....com>
To: Wei Fang <wei.fang@....com>
Cc: shenwei.wang@....com, xiaoning.wang@....com, andrew+netdev@...n.ch,
davem@...emloft.net, edumazet@...gle.com, kuba@...nel.org,
pabeni@...hat.com, ast@...nel.org, daniel@...earbox.net,
hawk@...nel.org, john.fastabend@...il.com, sdf@...ichev.me,
netdev@...r.kernel.org, linux-kernel@...r.kernel.org,
imx@...ts.linux.dev, bpf@...r.kernel.org
Subject: Re: [PATCH net-next 05/11] net: fec: add fec_enet_rx_queue_xdp() for
XDP path
On Tue, Jan 13, 2026 at 11:29:33AM +0800, Wei Fang wrote:
> Currently, the processing of XDP path packets and protocol stack packets
> are both mixed in fec_enet_rx_queue(), which makes the logic somewhat
> confusing and debugging more difficult. Furthermore, some logic is not
> needed by each other. For example, the kernel path does not need to call
> xdp_init_buff(), and XDP path does not support swap_buffer(), etc. This
> prevents XDP from achieving its maximum performance. Therefore, XDP path
> packets processing has been separated from fec_enet_rx_queue() by adding
> the fec_enet_rx_queue_xdp() function to optimize XDP path logic and
> improve XDP performance.
>
> The XDP performance on the iMX93 platform was compared before and after
> applying this patch. Detailed results are as follows and we can see the
> performance has been improved.
>
> Env: i.MX93, packet size 64 bytes including FCS, only single core and RX
> BD ring are used to receive packets, flow-control is off.
>
> Before the patch is applied:
> root@...93evk:~# ./xdp-bench tx eth0
> Summary 396,868 rx/s 0 err,drop/s
> Summary 396,024 rx/s 0 err,drop/s
> Summary 402,105 rx/s 0 err,drop/s
> Summary 402,501 rx/s 0 err,drop/s
>
> root@...93evk:~# ./xdp-bench drop eth0
> Summary 684,781 rx/s 0 err/s
> Summary 675,746 rx/s 0 err/s
> Summary 667,000 rx/s 0 err/s
> Summary 667,960 rx/s 0 err/s
>
> root@...93evk:~# ./xdp-bench pass eth0
> Summary 208,552 rx/s 0 err,drop/s
> Summary 208,654 rx/s 0 err,drop/s
> Summary 208,502 rx/s 0 err,drop/s
> Summary 208,797 rx/s 0 err,drop/s
>
> root@...93evk:~# ./xdp-bench redirect eth0 eth0
> eth0->eth0 311,210 rx/s 0 err,drop/s 311,208 xmit/s
> eth0->eth0 310,808 rx/s 0 err,drop/s 310,809 xmit/s
> eth0->eth0 311,340 rx/s 0 err,drop/s 311,339 xmit/s
> eth0->eth0 312,030 rx/s 0 err,drop/s 312,031 xmit/s
>
> After the patch is applied:
> root@...93evk:~# ./xdp-bench tx eth0
> Summary 409,975 rx/s 0 err,drop/s
> Summary 411,073 rx/s 0 err,drop/s
> Summary 410,940 rx/s 0 err,drop/s
> Summary 407,818 rx/s 0 err,drop/s
>
> root@...93evk:~# ./xdp-bench drop eth0
> Summary 700,681 rx/s 0 err/s
> Summary 698,102 rx/s 0 err/s
> Summary 695,025 rx/s 0 err/s
> Summary 698,639 rx/s 0 err/s
>
> root@...93evk:~# ./xdp-bench pass eth0
> Summary 211,356 rx/s 0 err,drop/s
> Summary 210,629 rx/s 0 err,drop/s
> Summary 210,395 rx/s 0 err,drop/s
> Summary 210,884 rx/s 0 err,drop/s
> `/-fec_enet_run_xdp
> root@...93evk:~# ./xdp-bench redirect eth0 eth0
> eth0->eth0 320,351 rx/s 0 err,drop/s 320,348 xmit/s
> eth0->eth0 318,988 rx/s 0 err,drop/s 318,988 xmit/s
> eth0->eth0 320,300 rx/s 0 err,drop/s 320,306 xmit/s
> eth0->eth0 320,156 rx/s 0 err,drop/s 320,150 xmit/s
Can you just keep 1 or 2 test result in commit message, and remove
"root@...93evk".
>
> Signed-off-by: Wei Fang <wei.fang@....com>
> ---
> drivers/net/ethernet/freescale/fec_main.c | 300 ++++++++++++++--------
> 1 file changed, 189 insertions(+), 111 deletions(-)
>
> diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
> index 7e8ac9d2a5ff..0b114a68cd8e 100644
> --- a/drivers/net/ethernet/freescale/fec_main.c
> +++ b/drivers/net/ethernet/freescale/fec_main.c
> @@ -79,7 +79,7 @@ static void set_multicast_list(struct net_device *ndev);
> static void fec_enet_itr_coal_set(struct net_device *ndev);
> static int fec_enet_xdp_tx_xmit(struct fec_enet_private *fep,
> int cpu, struct xdp_buff *xdp,
> - u32 dma_sync_len);
> + u32 dma_sync_len, int queue);
>
> #define DRIVER_NAME "fec"
>
> @@ -1665,71 +1665,6 @@ static int fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq,
> return 0;
> }
>
> -static u32
> -fec_enet_run_xdp(struct fec_enet_private *fep, struct bpf_prog *prog,
> - struct xdp_buff *xdp, struct fec_enet_priv_rx_q *rxq, int cpu)
> -{
> - unsigned int sync, len = xdp->data_end - xdp->data;
> - u32 ret = FEC_ENET_XDP_PASS;
> - struct page *page;
> - int err;
> - u32 act;
> -
> - act = bpf_prog_run_xdp(prog, xdp);
> -
> - /* Due xdp_adjust_tail and xdp_adjust_head: DMA sync for_device cover
> - * max len CPU touch
> - */
> - sync = xdp->data_end - xdp->data;
> - sync = max(sync, len);
> -
> - switch (act) {
> - case XDP_PASS:
> - rxq->stats[RX_XDP_PASS]++;
> - ret = FEC_ENET_XDP_PASS;
> - break;
> -
> - case XDP_REDIRECT:
> - rxq->stats[RX_XDP_REDIRECT]++;
> - err = xdp_do_redirect(fep->netdev, xdp, prog);
> - if (unlikely(err))
> - goto xdp_err;
> -
> - ret = FEC_ENET_XDP_REDIR;
> - break;
> -
> - case XDP_TX:
> - rxq->stats[RX_XDP_TX]++;
> - err = fec_enet_xdp_tx_xmit(fep, cpu, xdp, sync);
> - if (unlikely(err)) {
> - rxq->stats[RX_XDP_TX_ERRORS]++;
> - goto xdp_err;
> - }
> -
> - ret = FEC_ENET_XDP_TX;
> - break;
> -
> - default:
> - bpf_warn_invalid_xdp_action(fep->netdev, prog, act);
> - fallthrough;
> -
> - case XDP_ABORTED:
> - fallthrough; /* handle aborts by dropping packet */
> -
> - case XDP_DROP:
> - rxq->stats[RX_XDP_DROP]++;
> -xdp_err:
> - ret = FEC_ENET_XDP_CONSUMED;
> - page = virt_to_head_page(xdp->data);
> - page_pool_put_page(rxq->page_pool, page, sync, true);
> - if (act != XDP_DROP)
> - trace_xdp_exception(fep->netdev, prog, act);
> - break;
> - }
> -
> - return ret;
> -}
> -
> static void fec_enet_rx_vlan(const struct net_device *ndev, struct sk_buff *skb)
> {
> if (ndev->features & NETIF_F_HW_VLAN_CTAG_RX) {
> @@ -1839,26 +1774,20 @@ static struct sk_buff *fec_build_skb(struct fec_enet_private *fep,
> * not been given to the system, we just set the empty indicator,
> * effectively tossing the packet.
> */
> -static int
> -fec_enet_rx_queue(struct net_device *ndev, u16 queue_id, int budget)
> +static int fec_enet_rx_queue(struct fec_enet_private *fep,
> + int queue, int budget)
> {
> - struct fec_enet_private *fep = netdev_priv(ndev);
> - struct fec_enet_priv_rx_q *rxq;
> - struct bufdesc *bdp;
> - unsigned short status;
> - struct sk_buff *skb;
> - ushort pkt_len;
> - int pkt_received = 0;
> - int index = 0;
> - bool need_swap = fep->quirks & FEC_QUIRK_SWAP_FRAME;
> - u32 data_start = FEC_ENET_XDP_HEADROOM + fep->rx_shift;
> - struct bpf_prog *xdp_prog = READ_ONCE(fep->xdp_prog);
> - u32 ret, xdp_result = FEC_ENET_XDP_PASS;
> + struct fec_enet_priv_rx_q *rxq = fep->rx_queue[queue];
> + bool need_swap = fep->quirks & FEC_QUIRK_SWAP_FRAME;
> + struct net_device *ndev = fep->netdev;
> + struct bufdesc *bdp = rxq->bd.cur;
> u32 sub_len = 4 + fep->rx_shift;
> - int cpu = smp_processor_id();
> - struct xdp_buff xdp;
> + int pkt_received = 0;
> + u16 status, pkt_len;
> + struct sk_buff *skb;
> struct page *page;
> - __fec32 cbd_bufaddr;
> + dma_addr_t dma;
> + int index;
>
> #if defined(CONFIG_COLDFIRE) && !defined(CONFIG_COLDFIRE_COHERENT_DMA)
> /*
> @@ -1867,21 +1796,17 @@ fec_enet_rx_queue(struct net_device *ndev, u16 queue_id, int budget)
> */
> flush_cache_all();
> #endif
> - rxq = fep->rx_queue[queue_id];
>
> /* First, grab all of the stats for the incoming packet.
> * These get messed up if we get called due to a busy condition.
> */
> - bdp = rxq->bd.cur;
> - xdp_init_buff(&xdp, PAGE_SIZE << fep->pagepool_order, &rxq->xdp_rxq);
> -
This patch is quite big, can you slip to more small ones.
> while (!((status = fec16_to_cpu(bdp->cbd_sc)) & BD_ENET_RX_EMPTY)) {
>
> if (pkt_received >= budget)
> break;
> pkt_received++;
>
> - writel(FEC_ENET_RXF_GET(queue_id), fep->hwp + FEC_IEVENT);
> + writel(FEC_ENET_RXF_GET(queue), fep->hwp + FEC_IEVENT);
you can keep queue_id or use small patch rename it.
>
> /* Check for errors. */
> status ^= BD_ENET_RX_LAST;
> @@ -1895,29 +1820,16 @@ fec_enet_rx_queue(struct net_device *ndev, u16 queue_id, int budget)
>
> index = fec_enet_get_bd_index(bdp, &rxq->bd);
> page = rxq->rx_buf[index];
> - cbd_bufaddr = bdp->cbd_bufaddr;
> + dma = fec32_to_cpu(bdp->cbd_bufaddr);
> if (fec_enet_update_cbd(rxq, bdp, index)) {
> ndev->stats.rx_dropped++;
> goto rx_processing_done;
> }
>
> - dma_sync_single_for_cpu(&fep->pdev->dev,
> - fec32_to_cpu(cbd_bufaddr),
> - pkt_len,
> + dma_sync_single_for_cpu(&fep->pdev->dev, dma, pkt_len,
> DMA_FROM_DEVICE);
the same here, Add local variable dma should be in new patch.
> prefetch(page_address(page));
>
> - if (xdp_prog) {
> - xdp_buff_clear_frags_flag(&xdp);
> - /* subtract 16bit shift and FCS */
> - xdp_prepare_buff(&xdp, page_address(page),
> - data_start, pkt_len - sub_len, false);
> - ret = fec_enet_run_xdp(fep, xdp_prog, &xdp, rxq, cpu);
> - xdp_result |= ret;
> - if (ret != FEC_ENET_XDP_PASS)
> - goto rx_processing_done;
> - }
> -
> if (unlikely(need_swap)) {
> u8 *data;
>
> @@ -1964,9 +1876,171 @@ fec_enet_rx_queue(struct net_device *ndev, u16 queue_id, int budget)
> */
> writel(0, rxq->bd.reg_desc_active);
> }
> +
> + rxq->bd.cur = bdp;
> +
> + return pkt_received;
> +}
> +
> +static void fec_xdp_drop(struct fec_enet_priv_rx_q *rxq,
> + struct xdp_buff *xdp, u32 sync)
> +{
> + struct page *page = virt_to_head_page(xdp->data);
> +
> + page_pool_put_page(rxq->page_pool, page, sync, true);
> +}
> +
> +static int fec_enet_rx_queue_xdp(struct fec_enet_private *fep, int queue,
> + int budget, struct bpf_prog *prog)
> +{
Or create new patch to duplicate TX and RX first. the changen rx part.
Frank
> + u32 data_start = FEC_ENET_XDP_HEADROOM + fep->rx_shift;
> + struct fec_enet_priv_rx_q *rxq = fep->rx_queue[queue];
> + struct net_device *ndev = fep->netdev;
> + struct bufdesc *bdp = rxq->bd.cur;
> + u32 sub_len = 4 + fep->rx_shift;
> + int cpu = smp_processor_id();
> + int pkt_received = 0;
> + struct sk_buff *skb;
> + u16 status, pkt_len;
> + struct xdp_buff xdp;
> + struct page *page;
> + u32 xdp_res = 0;
> + dma_addr_t dma;
> + int index, err;
> + u32 act, sync;
> +
> +#if defined(CONFIG_COLDFIRE) && !defined(CONFIG_COLDFIRE_COHERENT_DMA)
> + /*
> + * Hacky flush of all caches instead of using the DMA API for the TSO
> + * headers.
> + */
> + flush_cache_all();
> +#endif
> +
> + xdp_init_buff(&xdp, PAGE_SIZE << fep->pagepool_order, &rxq->xdp_rxq);
> +
> + while (!((status = fec16_to_cpu(bdp->cbd_sc)) & BD_ENET_RX_EMPTY)) {
> + if (pkt_received >= budget)
> + break;
> + pkt_received++;
> +
> + writel(FEC_ENET_RXF_GET(queue), fep->hwp + FEC_IEVENT);
> +
> + /* Check for errors. */
> + status ^= BD_ENET_RX_LAST;
> + if (unlikely(fec_rx_error_check(ndev, status)))
> + goto rx_processing_done;
> +
> + /* Process the incoming frame. */
> + ndev->stats.rx_packets++;
> + pkt_len = fec16_to_cpu(bdp->cbd_datlen);
> + ndev->stats.rx_bytes += pkt_len - fep->rx_shift;
> +
> + index = fec_enet_get_bd_index(bdp, &rxq->bd);
> + page = rxq->rx_buf[index];
> + dma = fec32_to_cpu(bdp->cbd_bufaddr);
> +
> + if (fec_enet_update_cbd(rxq, bdp, index)) {
> + ndev->stats.rx_dropped++;
> + goto rx_processing_done;
> + }
> +
> + dma_sync_single_for_cpu(&fep->pdev->dev, dma, pkt_len,
> + DMA_FROM_DEVICE);
> + prefetch(page_address(page));
> +
> + xdp_buff_clear_frags_flag(&xdp);
> + /* subtract 16bit shift and FCS */
> + pkt_len -= sub_len;
> + xdp_prepare_buff(&xdp, page_address(page), data_start,
> + pkt_len, false);
> +
> + act = bpf_prog_run_xdp(prog, &xdp);
> + /* Due xdp_adjust_tail and xdp_adjust_head: DMA sync
> + * for_device cover max len CPU touch.
> + */
> + sync = xdp.data_end - xdp.data;
> + sync = max(sync, pkt_len);
> +
> + switch (act) {
> + case XDP_PASS:
> + rxq->stats[RX_XDP_PASS]++;
> + /* The packet length includes FCS, but we don't want to
> + * include that when passing upstream as it messes up
> + * bridging applications.
> + */
> + skb = fec_build_skb(fep, rxq, bdp, page, pkt_len);
> + if (!skb) {
> + fec_xdp_drop(rxq, &xdp, sync);
> + trace_xdp_exception(ndev, prog, XDP_PASS);
> + } else {
> + napi_gro_receive(&fep->napi, skb);
> + }
> + break;
> + case XDP_REDIRECT:
> + rxq->stats[RX_XDP_REDIRECT]++;
> + err = xdp_do_redirect(ndev, &xdp, prog);
> + if (unlikely(err)) {
> + fec_xdp_drop(rxq, &xdp, sync);
> + trace_xdp_exception(ndev, prog, XDP_REDIRECT);
> + } else {
> + xdp_res |= FEC_ENET_XDP_REDIR;
> + }
> + break;
> + case XDP_TX:
> + rxq->stats[RX_XDP_TX]++;
> + err = fec_enet_xdp_tx_xmit(fep, cpu, &xdp, sync, queue);
> + if (unlikely(err)) {
> + rxq->stats[RX_XDP_TX_ERRORS]++;
> + fec_xdp_drop(rxq, &xdp, sync);
> + trace_xdp_exception(ndev, prog, XDP_TX);
> + }
> + break;
> + default:
> + bpf_warn_invalid_xdp_action(ndev, prog, act);
> + fallthrough;
> + case XDP_ABORTED:
> + /* handle aborts by dropping packet */
> + fallthrough;
> + case XDP_DROP:
> + rxq->stats[RX_XDP_DROP]++;
> + fec_xdp_drop(rxq, &xdp, sync);
> + break;
> + }
> +
> +rx_processing_done:
> + /* Clear the status flags for this buffer */
> + status &= ~BD_ENET_RX_STATS;
> + /* Mark the buffer empty */
> + status |= BD_ENET_RX_EMPTY;
> +
> + if (fep->bufdesc_ex) {
> + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
> +
> + ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT);
> + ebdp->cbd_prot = 0;
> + ebdp->cbd_bdu = 0;
> + }
> +
> + /* Make sure the updates to rest of the descriptor are
> + * performed before transferring ownership.
> + */
> + dma_wmb();
> + bdp->cbd_sc = cpu_to_fec16(status);
> +
> + /* Update BD pointer to next entry */
> + bdp = fec_enet_get_nextdesc(bdp, &rxq->bd);
> +
> + /* Doing this here will keep the FEC running while we process
> + * incoming frames. On a heavily loaded network, we should be
> + * able to keep up at the expense of system resources.
> + */
> + writel(0, rxq->bd.reg_desc_active);
> + }
> +
> rxq->bd.cur = bdp;
>
> - if (xdp_result & FEC_ENET_XDP_REDIR)
> + if (xdp_res & FEC_ENET_XDP_REDIR)
> xdp_do_flush();
>
> return pkt_received;
> @@ -1975,11 +2049,17 @@ fec_enet_rx_queue(struct net_device *ndev, u16 queue_id, int budget)
> static int fec_enet_rx(struct net_device *ndev, int budget)
> {
> struct fec_enet_private *fep = netdev_priv(ndev);
> + struct bpf_prog *prog = READ_ONCE(fep->xdp_prog);
> int i, done = 0;
>
> /* Make sure that AVB queues are processed first. */
> - for (i = fep->num_rx_queues - 1; i >= 0; i--)
> - done += fec_enet_rx_queue(ndev, i, budget - done);
> + for (i = fep->num_rx_queues - 1; i >= 0; i--) {
> + if (prog)
> + done += fec_enet_rx_queue_xdp(fep, i, budget - done,
> + prog);
> + else
> + done += fec_enet_rx_queue(fep, i, budget - done);
> + }
>
> return done;
> }
> @@ -3961,14 +4041,12 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
>
> static int fec_enet_xdp_tx_xmit(struct fec_enet_private *fep,
> int cpu, struct xdp_buff *xdp,
> - u32 dma_sync_len)
> + u32 dma_sync_len, int queue)
> {
> - struct fec_enet_priv_tx_q *txq;
> + struct fec_enet_priv_tx_q *txq = fep->tx_queue[queue];
> struct netdev_queue *nq;
> - int queue, ret;
> + int ret;
>
> - queue = fec_enet_xdp_get_tx_queue(fep, cpu);
> - txq = fep->tx_queue[queue];
> nq = netdev_get_tx_queue(fep->netdev, queue);
>
> __netif_tx_lock(nq, cpu);
> --
> 2.34.1
>
Powered by blists - more mailing lists