[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20260113032939.3705137-6-wei.fang@nxp.com>
Date: Tue, 13 Jan 2026 11:29:33 +0800
From: Wei Fang <wei.fang@....com>
To: shenwei.wang@....com,
xiaoning.wang@....com,
frank.li@....com,
andrew+netdev@...n.ch,
davem@...emloft.net,
edumazet@...gle.com,
kuba@...nel.org,
pabeni@...hat.com,
ast@...nel.org,
daniel@...earbox.net,
hawk@...nel.org,
john.fastabend@...il.com,
sdf@...ichev.me
Cc: netdev@...r.kernel.org,
linux-kernel@...r.kernel.org,
imx@...ts.linux.dev,
bpf@...r.kernel.org
Subject: [PATCH net-next 05/11] net: fec: add fec_enet_rx_queue_xdp() for XDP path
Currently, the processing of XDP path packets and protocol stack packets
are both mixed in fec_enet_rx_queue(), which makes the logic somewhat
confusing and debugging more difficult. Furthermore, some logic is not
needed by each other. For example, the kernel path does not need to call
xdp_init_buff(), and XDP path does not support swap_buffer(), etc. This
prevents XDP from achieving its maximum performance. Therefore, XDP path
packets processing has been separated from fec_enet_rx_queue() by adding
the fec_enet_rx_queue_xdp() function to optimize XDP path logic and
improve XDP performance.
The XDP performance on the iMX93 platform was compared before and after
applying this patch. Detailed results are as follows and we can see the
performance has been improved.
Env: i.MX93, packet size 64 bytes including FCS, only single core and RX
BD ring are used to receive packets, flow-control is off.
Before the patch is applied:
root@...93evk:~# ./xdp-bench tx eth0
Summary 396,868 rx/s 0 err,drop/s
Summary 396,024 rx/s 0 err,drop/s
Summary 402,105 rx/s 0 err,drop/s
Summary 402,501 rx/s 0 err,drop/s
root@...93evk:~# ./xdp-bench drop eth0
Summary 684,781 rx/s 0 err/s
Summary 675,746 rx/s 0 err/s
Summary 667,000 rx/s 0 err/s
Summary 667,960 rx/s 0 err/s
root@...93evk:~# ./xdp-bench pass eth0
Summary 208,552 rx/s 0 err,drop/s
Summary 208,654 rx/s 0 err,drop/s
Summary 208,502 rx/s 0 err,drop/s
Summary 208,797 rx/s 0 err,drop/s
root@...93evk:~# ./xdp-bench redirect eth0 eth0
eth0->eth0 311,210 rx/s 0 err,drop/s 311,208 xmit/s
eth0->eth0 310,808 rx/s 0 err,drop/s 310,809 xmit/s
eth0->eth0 311,340 rx/s 0 err,drop/s 311,339 xmit/s
eth0->eth0 312,030 rx/s 0 err,drop/s 312,031 xmit/s
After the patch is applied:
root@...93evk:~# ./xdp-bench tx eth0
Summary 409,975 rx/s 0 err,drop/s
Summary 411,073 rx/s 0 err,drop/s
Summary 410,940 rx/s 0 err,drop/s
Summary 407,818 rx/s 0 err,drop/s
root@...93evk:~# ./xdp-bench drop eth0
Summary 700,681 rx/s 0 err/s
Summary 698,102 rx/s 0 err/s
Summary 695,025 rx/s 0 err/s
Summary 698,639 rx/s 0 err/s
root@...93evk:~# ./xdp-bench pass eth0
Summary 211,356 rx/s 0 err,drop/s
Summary 210,629 rx/s 0 err,drop/s
Summary 210,395 rx/s 0 err,drop/s
Summary 210,884 rx/s 0 err,drop/s
root@...93evk:~# ./xdp-bench redirect eth0 eth0
eth0->eth0 320,351 rx/s 0 err,drop/s 320,348 xmit/s
eth0->eth0 318,988 rx/s 0 err,drop/s 318,988 xmit/s
eth0->eth0 320,300 rx/s 0 err,drop/s 320,306 xmit/s
eth0->eth0 320,156 rx/s 0 err,drop/s 320,150 xmit/s
Signed-off-by: Wei Fang <wei.fang@....com>
---
drivers/net/ethernet/freescale/fec_main.c | 300 ++++++++++++++--------
1 file changed, 189 insertions(+), 111 deletions(-)
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 7e8ac9d2a5ff..0b114a68cd8e 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -79,7 +79,7 @@ static void set_multicast_list(struct net_device *ndev);
static void fec_enet_itr_coal_set(struct net_device *ndev);
static int fec_enet_xdp_tx_xmit(struct fec_enet_private *fep,
int cpu, struct xdp_buff *xdp,
- u32 dma_sync_len);
+ u32 dma_sync_len, int queue);
#define DRIVER_NAME "fec"
@@ -1665,71 +1665,6 @@ static int fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq,
return 0;
}
-static u32
-fec_enet_run_xdp(struct fec_enet_private *fep, struct bpf_prog *prog,
- struct xdp_buff *xdp, struct fec_enet_priv_rx_q *rxq, int cpu)
-{
- unsigned int sync, len = xdp->data_end - xdp->data;
- u32 ret = FEC_ENET_XDP_PASS;
- struct page *page;
- int err;
- u32 act;
-
- act = bpf_prog_run_xdp(prog, xdp);
-
- /* Due xdp_adjust_tail and xdp_adjust_head: DMA sync for_device cover
- * max len CPU touch
- */
- sync = xdp->data_end - xdp->data;
- sync = max(sync, len);
-
- switch (act) {
- case XDP_PASS:
- rxq->stats[RX_XDP_PASS]++;
- ret = FEC_ENET_XDP_PASS;
- break;
-
- case XDP_REDIRECT:
- rxq->stats[RX_XDP_REDIRECT]++;
- err = xdp_do_redirect(fep->netdev, xdp, prog);
- if (unlikely(err))
- goto xdp_err;
-
- ret = FEC_ENET_XDP_REDIR;
- break;
-
- case XDP_TX:
- rxq->stats[RX_XDP_TX]++;
- err = fec_enet_xdp_tx_xmit(fep, cpu, xdp, sync);
- if (unlikely(err)) {
- rxq->stats[RX_XDP_TX_ERRORS]++;
- goto xdp_err;
- }
-
- ret = FEC_ENET_XDP_TX;
- break;
-
- default:
- bpf_warn_invalid_xdp_action(fep->netdev, prog, act);
- fallthrough;
-
- case XDP_ABORTED:
- fallthrough; /* handle aborts by dropping packet */
-
- case XDP_DROP:
- rxq->stats[RX_XDP_DROP]++;
-xdp_err:
- ret = FEC_ENET_XDP_CONSUMED;
- page = virt_to_head_page(xdp->data);
- page_pool_put_page(rxq->page_pool, page, sync, true);
- if (act != XDP_DROP)
- trace_xdp_exception(fep->netdev, prog, act);
- break;
- }
-
- return ret;
-}
-
static void fec_enet_rx_vlan(const struct net_device *ndev, struct sk_buff *skb)
{
if (ndev->features & NETIF_F_HW_VLAN_CTAG_RX) {
@@ -1839,26 +1774,20 @@ static struct sk_buff *fec_build_skb(struct fec_enet_private *fep,
* not been given to the system, we just set the empty indicator,
* effectively tossing the packet.
*/
-static int
-fec_enet_rx_queue(struct net_device *ndev, u16 queue_id, int budget)
+static int fec_enet_rx_queue(struct fec_enet_private *fep,
+ int queue, int budget)
{
- struct fec_enet_private *fep = netdev_priv(ndev);
- struct fec_enet_priv_rx_q *rxq;
- struct bufdesc *bdp;
- unsigned short status;
- struct sk_buff *skb;
- ushort pkt_len;
- int pkt_received = 0;
- int index = 0;
- bool need_swap = fep->quirks & FEC_QUIRK_SWAP_FRAME;
- u32 data_start = FEC_ENET_XDP_HEADROOM + fep->rx_shift;
- struct bpf_prog *xdp_prog = READ_ONCE(fep->xdp_prog);
- u32 ret, xdp_result = FEC_ENET_XDP_PASS;
+ struct fec_enet_priv_rx_q *rxq = fep->rx_queue[queue];
+ bool need_swap = fep->quirks & FEC_QUIRK_SWAP_FRAME;
+ struct net_device *ndev = fep->netdev;
+ struct bufdesc *bdp = rxq->bd.cur;
u32 sub_len = 4 + fep->rx_shift;
- int cpu = smp_processor_id();
- struct xdp_buff xdp;
+ int pkt_received = 0;
+ u16 status, pkt_len;
+ struct sk_buff *skb;
struct page *page;
- __fec32 cbd_bufaddr;
+ dma_addr_t dma;
+ int index;
#if defined(CONFIG_COLDFIRE) && !defined(CONFIG_COLDFIRE_COHERENT_DMA)
/*
@@ -1867,21 +1796,17 @@ fec_enet_rx_queue(struct net_device *ndev, u16 queue_id, int budget)
*/
flush_cache_all();
#endif
- rxq = fep->rx_queue[queue_id];
/* First, grab all of the stats for the incoming packet.
* These get messed up if we get called due to a busy condition.
*/
- bdp = rxq->bd.cur;
- xdp_init_buff(&xdp, PAGE_SIZE << fep->pagepool_order, &rxq->xdp_rxq);
-
while (!((status = fec16_to_cpu(bdp->cbd_sc)) & BD_ENET_RX_EMPTY)) {
if (pkt_received >= budget)
break;
pkt_received++;
- writel(FEC_ENET_RXF_GET(queue_id), fep->hwp + FEC_IEVENT);
+ writel(FEC_ENET_RXF_GET(queue), fep->hwp + FEC_IEVENT);
/* Check for errors. */
status ^= BD_ENET_RX_LAST;
@@ -1895,29 +1820,16 @@ fec_enet_rx_queue(struct net_device *ndev, u16 queue_id, int budget)
index = fec_enet_get_bd_index(bdp, &rxq->bd);
page = rxq->rx_buf[index];
- cbd_bufaddr = bdp->cbd_bufaddr;
+ dma = fec32_to_cpu(bdp->cbd_bufaddr);
if (fec_enet_update_cbd(rxq, bdp, index)) {
ndev->stats.rx_dropped++;
goto rx_processing_done;
}
- dma_sync_single_for_cpu(&fep->pdev->dev,
- fec32_to_cpu(cbd_bufaddr),
- pkt_len,
+ dma_sync_single_for_cpu(&fep->pdev->dev, dma, pkt_len,
DMA_FROM_DEVICE);
prefetch(page_address(page));
- if (xdp_prog) {
- xdp_buff_clear_frags_flag(&xdp);
- /* subtract 16bit shift and FCS */
- xdp_prepare_buff(&xdp, page_address(page),
- data_start, pkt_len - sub_len, false);
- ret = fec_enet_run_xdp(fep, xdp_prog, &xdp, rxq, cpu);
- xdp_result |= ret;
- if (ret != FEC_ENET_XDP_PASS)
- goto rx_processing_done;
- }
-
if (unlikely(need_swap)) {
u8 *data;
@@ -1964,9 +1876,171 @@ fec_enet_rx_queue(struct net_device *ndev, u16 queue_id, int budget)
*/
writel(0, rxq->bd.reg_desc_active);
}
+
+ rxq->bd.cur = bdp;
+
+ return pkt_received;
+}
+
+static void fec_xdp_drop(struct fec_enet_priv_rx_q *rxq,
+ struct xdp_buff *xdp, u32 sync)
+{
+ struct page *page = virt_to_head_page(xdp->data);
+
+ page_pool_put_page(rxq->page_pool, page, sync, true);
+}
+
+static int fec_enet_rx_queue_xdp(struct fec_enet_private *fep, int queue,
+ int budget, struct bpf_prog *prog)
+{
+ u32 data_start = FEC_ENET_XDP_HEADROOM + fep->rx_shift;
+ struct fec_enet_priv_rx_q *rxq = fep->rx_queue[queue];
+ struct net_device *ndev = fep->netdev;
+ struct bufdesc *bdp = rxq->bd.cur;
+ u32 sub_len = 4 + fep->rx_shift;
+ int cpu = smp_processor_id();
+ int pkt_received = 0;
+ struct sk_buff *skb;
+ u16 status, pkt_len;
+ struct xdp_buff xdp;
+ struct page *page;
+ u32 xdp_res = 0;
+ dma_addr_t dma;
+ int index, err;
+ u32 act, sync;
+
+#if defined(CONFIG_COLDFIRE) && !defined(CONFIG_COLDFIRE_COHERENT_DMA)
+ /*
+ * Hacky flush of all caches instead of using the DMA API for the TSO
+ * headers.
+ */
+ flush_cache_all();
+#endif
+
+ xdp_init_buff(&xdp, PAGE_SIZE << fep->pagepool_order, &rxq->xdp_rxq);
+
+ while (!((status = fec16_to_cpu(bdp->cbd_sc)) & BD_ENET_RX_EMPTY)) {
+ if (pkt_received >= budget)
+ break;
+ pkt_received++;
+
+ writel(FEC_ENET_RXF_GET(queue), fep->hwp + FEC_IEVENT);
+
+ /* Check for errors. */
+ status ^= BD_ENET_RX_LAST;
+ if (unlikely(fec_rx_error_check(ndev, status)))
+ goto rx_processing_done;
+
+ /* Process the incoming frame. */
+ ndev->stats.rx_packets++;
+ pkt_len = fec16_to_cpu(bdp->cbd_datlen);
+ ndev->stats.rx_bytes += pkt_len - fep->rx_shift;
+
+ index = fec_enet_get_bd_index(bdp, &rxq->bd);
+ page = rxq->rx_buf[index];
+ dma = fec32_to_cpu(bdp->cbd_bufaddr);
+
+ if (fec_enet_update_cbd(rxq, bdp, index)) {
+ ndev->stats.rx_dropped++;
+ goto rx_processing_done;
+ }
+
+ dma_sync_single_for_cpu(&fep->pdev->dev, dma, pkt_len,
+ DMA_FROM_DEVICE);
+ prefetch(page_address(page));
+
+ xdp_buff_clear_frags_flag(&xdp);
+ /* subtract 16bit shift and FCS */
+ pkt_len -= sub_len;
+ xdp_prepare_buff(&xdp, page_address(page), data_start,
+ pkt_len, false);
+
+ act = bpf_prog_run_xdp(prog, &xdp);
+ /* Due xdp_adjust_tail and xdp_adjust_head: DMA sync
+ * for_device cover max len CPU touch.
+ */
+ sync = xdp.data_end - xdp.data;
+ sync = max(sync, pkt_len);
+
+ switch (act) {
+ case XDP_PASS:
+ rxq->stats[RX_XDP_PASS]++;
+ /* The packet length includes FCS, but we don't want to
+ * include that when passing upstream as it messes up
+ * bridging applications.
+ */
+ skb = fec_build_skb(fep, rxq, bdp, page, pkt_len);
+ if (!skb) {
+ fec_xdp_drop(rxq, &xdp, sync);
+ trace_xdp_exception(ndev, prog, XDP_PASS);
+ } else {
+ napi_gro_receive(&fep->napi, skb);
+ }
+ break;
+ case XDP_REDIRECT:
+ rxq->stats[RX_XDP_REDIRECT]++;
+ err = xdp_do_redirect(ndev, &xdp, prog);
+ if (unlikely(err)) {
+ fec_xdp_drop(rxq, &xdp, sync);
+ trace_xdp_exception(ndev, prog, XDP_REDIRECT);
+ } else {
+ xdp_res |= FEC_ENET_XDP_REDIR;
+ }
+ break;
+ case XDP_TX:
+ rxq->stats[RX_XDP_TX]++;
+ err = fec_enet_xdp_tx_xmit(fep, cpu, &xdp, sync, queue);
+ if (unlikely(err)) {
+ rxq->stats[RX_XDP_TX_ERRORS]++;
+ fec_xdp_drop(rxq, &xdp, sync);
+ trace_xdp_exception(ndev, prog, XDP_TX);
+ }
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(ndev, prog, act);
+ fallthrough;
+ case XDP_ABORTED:
+ /* handle aborts by dropping packet */
+ fallthrough;
+ case XDP_DROP:
+ rxq->stats[RX_XDP_DROP]++;
+ fec_xdp_drop(rxq, &xdp, sync);
+ break;
+ }
+
+rx_processing_done:
+ /* Clear the status flags for this buffer */
+ status &= ~BD_ENET_RX_STATS;
+ /* Mark the buffer empty */
+ status |= BD_ENET_RX_EMPTY;
+
+ if (fep->bufdesc_ex) {
+ struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
+
+ ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT);
+ ebdp->cbd_prot = 0;
+ ebdp->cbd_bdu = 0;
+ }
+
+ /* Make sure the updates to rest of the descriptor are
+ * performed before transferring ownership.
+ */
+ dma_wmb();
+ bdp->cbd_sc = cpu_to_fec16(status);
+
+ /* Update BD pointer to next entry */
+ bdp = fec_enet_get_nextdesc(bdp, &rxq->bd);
+
+ /* Doing this here will keep the FEC running while we process
+ * incoming frames. On a heavily loaded network, we should be
+ * able to keep up at the expense of system resources.
+ */
+ writel(0, rxq->bd.reg_desc_active);
+ }
+
rxq->bd.cur = bdp;
- if (xdp_result & FEC_ENET_XDP_REDIR)
+ if (xdp_res & FEC_ENET_XDP_REDIR)
xdp_do_flush();
return pkt_received;
@@ -1975,11 +2049,17 @@ fec_enet_rx_queue(struct net_device *ndev, u16 queue_id, int budget)
static int fec_enet_rx(struct net_device *ndev, int budget)
{
struct fec_enet_private *fep = netdev_priv(ndev);
+ struct bpf_prog *prog = READ_ONCE(fep->xdp_prog);
int i, done = 0;
/* Make sure that AVB queues are processed first. */
- for (i = fep->num_rx_queues - 1; i >= 0; i--)
- done += fec_enet_rx_queue(ndev, i, budget - done);
+ for (i = fep->num_rx_queues - 1; i >= 0; i--) {
+ if (prog)
+ done += fec_enet_rx_queue_xdp(fep, i, budget - done,
+ prog);
+ else
+ done += fec_enet_rx_queue(fep, i, budget - done);
+ }
return done;
}
@@ -3961,14 +4041,12 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
static int fec_enet_xdp_tx_xmit(struct fec_enet_private *fep,
int cpu, struct xdp_buff *xdp,
- u32 dma_sync_len)
+ u32 dma_sync_len, int queue)
{
- struct fec_enet_priv_tx_q *txq;
+ struct fec_enet_priv_tx_q *txq = fep->tx_queue[queue];
struct netdev_queue *nq;
- int queue, ret;
+ int ret;
- queue = fec_enet_xdp_get_tx_queue(fep, cpu);
- txq = fep->tx_queue[queue];
nq = netdev_get_tx_queue(fep->netdev, queue);
__netif_tx_lock(nq, cpu);
--
2.34.1
Powered by blists - more mailing lists