[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220930193751.1249054-1-shenwei.wang@nxp.com>
Date: Fri, 30 Sep 2022 14:37:51 -0500
From: Shenwei Wang <shenwei.wang@....com>
To: "David S . Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, Andrew Lunn <andrew@...n.ch>
Cc: Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Jesper Dangaard Brouer <hawk@...nel.org>,
John Fastabend <john.fastabend@...il.com>,
Wei Fang <wei.fang@....com>, netdev@...r.kernel.org,
linux-kernel@...r.kernel.org, imx@...ts.linux.dev,
Shenwei Wang <shenwei.wang@....com>
Subject: [PATCH 1/1] net: fec: using page pool to manage RX buffers
This patch optimizes the RX buffer management by using the page
pool. The purpose for this change is to prepare for the following
XDP support. The current driver uses one frame per page for easy
management.
The following are the comparing result between page pool implementation
and the original implementation (non page pool).
--- Page Pool implementation ----
shenwei@...0:~$ iperf -c 10.81.16.245 -w 2m -i 1
------------------------------------------------------------
Client connecting to 10.81.16.245, TCP port 5001
TCP window size: 416 KByte (WARNING: requested 1.91 MByte)
------------------------------------------------------------
[ 1] local 10.81.17.20 port 43204 connected with 10.81.16.245 port 5001
[ ID] Interval Transfer Bandwidth
[ 1] 0.0000-1.0000 sec 111 MBytes 933 Mbits/sec
[ 1] 1.0000-2.0000 sec 111 MBytes 934 Mbits/sec
[ 1] 2.0000-3.0000 sec 112 MBytes 935 Mbits/sec
[ 1] 3.0000-4.0000 sec 111 MBytes 933 Mbits/sec
[ 1] 4.0000-5.0000 sec 111 MBytes 934 Mbits/sec
[ 1] 5.0000-6.0000 sec 111 MBytes 933 Mbits/sec
[ 1] 6.0000-7.0000 sec 111 MBytes 931 Mbits/sec
[ 1] 7.0000-8.0000 sec 112 MBytes 935 Mbits/sec
[ 1] 8.0000-9.0000 sec 111 MBytes 933 Mbits/sec
[ 1] 9.0000-10.0000 sec 112 MBytes 935 Mbits/sec
[ 1] 0.0000-10.0077 sec 1.09 GBytes 933 Mbits/sec
--- Non Page Pool implementation ----
shenwei@...0:~$ iperf -c 10.81.16.245 -w 2m -i 1
------------------------------------------------------------
Client connecting to 10.81.16.245, TCP port 5001
TCP window size: 416 KByte (WARNING: requested 1.91 MByte)
------------------------------------------------------------
[ 1] local 10.81.17.20 port 49154 connected with 10.81.16.245 port 5001
[ ID] Interval Transfer Bandwidth
[ 1] 0.0000-1.0000 sec 104 MBytes 868 Mbits/sec
[ 1] 1.0000-2.0000 sec 105 MBytes 878 Mbits/sec
[ 1] 2.0000-3.0000 sec 105 MBytes 881 Mbits/sec
[ 1] 3.0000-4.0000 sec 105 MBytes 879 Mbits/sec
[ 1] 4.0000-5.0000 sec 105 MBytes 878 Mbits/sec
[ 1] 5.0000-6.0000 sec 105 MBytes 878 Mbits/sec
[ 1] 6.0000-7.0000 sec 104 MBytes 875 Mbits/sec
[ 1] 7.0000-8.0000 sec 104 MBytes 875 Mbits/sec
[ 1] 8.0000-9.0000 sec 104 MBytes 873 Mbits/sec
[ 1] 9.0000-10.0000 sec 104 MBytes 875 Mbits/sec
[ 1] 0.0000-10.0073 sec 1.02 GBytes 875 Mbits/sec
Signed-off-by: Shenwei Wang <shenwei.wang@....com>
---
drivers/net/ethernet/freescale/Kconfig | 1 +
drivers/net/ethernet/freescale/fec.h | 35 ++++-
drivers/net/ethernet/freescale/fec_main.c | 155 ++++++++++++++--------
3 files changed, 134 insertions(+), 57 deletions(-)
diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig
index b7bf45cec29d..ce866ae3df03 100644
--- a/drivers/net/ethernet/freescale/Kconfig
+++ b/drivers/net/ethernet/freescale/Kconfig
@@ -28,6 +28,7 @@ config FEC
depends on PTP_1588_CLOCK_OPTIONAL
select CRC32
select PHYLIB
+ select PAGE_POOL
imply NET_SELFTESTS
help
Say Y here if you want to use the built-in 10/100 Fast ethernet
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index b0100fe3c9e4..7be56cc1ddb4 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -17,6 +17,7 @@
#include <linux/clocksource.h>
#include <linux/net_tstamp.h>
#include <linux/pm_qos.h>
+#include <linux/bpf.h>
#include <linux/ptp_clock_kernel.h>
#include <linux/timecounter.h>
#include <dt-bindings/firmware/imx/rsrc.h>
@@ -346,8 +347,11 @@ struct bufdesc_ex {
* the skbuffer directly.
*/
+#define FEC_ENET_XDP_HEADROOM (XDP_PACKET_HEADROOM)
+
#define FEC_ENET_RX_PAGES 256
-#define FEC_ENET_RX_FRSIZE 2048
+#define FEC_ENET_RX_FRSIZE (PAGE_SIZE - FEC_ENET_XDP_HEADROOM \
+ - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
#define FEC_ENET_RX_FRPPG (PAGE_SIZE / FEC_ENET_RX_FRSIZE)
#define RX_RING_SIZE (FEC_ENET_RX_FRPPG * FEC_ENET_RX_PAGES)
#define FEC_ENET_TX_FRSIZE 2048
@@ -517,6 +521,22 @@ struct bufdesc_prop {
unsigned char dsize_log2;
};
+struct fec_enet_priv_txrx_info {
+ int offset;
+ struct page *page;
+ struct sk_buff *skb;
+};
+
+struct fec_enet_xdp_stats {
+ u64 xdp_pass;
+ u64 xdp_drop;
+ u64 xdp_xmit;
+ u64 xdp_redirect;
+ u64 xdp_xmit_err;
+ u64 xdp_tx;
+ u64 xdp_tx_err;
+};
+
struct fec_enet_priv_tx_q {
struct bufdesc_prop bd;
unsigned char *tx_bounce[TX_RING_SIZE];
@@ -532,7 +552,15 @@ struct fec_enet_priv_tx_q {
struct fec_enet_priv_rx_q {
struct bufdesc_prop bd;
- struct sk_buff *rx_skbuff[RX_RING_SIZE];
+ struct fec_enet_priv_txrx_info rx_skb_info[RX_RING_SIZE];
+
+ /* page_pool */
+ struct page_pool *page_pool;
+ struct xdp_rxq_info xdp_rxq;
+ struct fec_enet_xdp_stats stats;
+
+ /* rx queue number, in the range 0-7 */
+ u8 id;
};
struct fec_stop_mode_gpr {
@@ -644,6 +672,9 @@ struct fec_enet_private {
struct imx_sc_ipc *ipc_handle;
+ /* XDP BPF Program */
+ struct bpf_prog *xdp_prog;
+
u64 ethtool_stats[];
};
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 59921218a8a4..643b4f6627ab 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -66,6 +66,8 @@
#include <linux/mfd/syscon.h>
#include <linux/regmap.h>
#include <soc/imx/cpuidle.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
#include <asm/cacheflush.h>
@@ -422,6 +424,49 @@ fec_enet_clear_csum(struct sk_buff *skb, struct net_device *ndev)
return 0;
}
+static int
+fec_enet_create_page_pool(struct fec_enet_private *fep,
+ struct fec_enet_priv_rx_q *rxq, int size)
+{
+ struct bpf_prog *xdp_prog = READ_ONCE(fep->xdp_prog);
+ struct page_pool_params pp_params = {
+ .order = 0,
+ .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
+ .pool_size = size,
+ .nid = dev_to_node(&fep->pdev->dev),
+ .dev = &fep->pdev->dev,
+ .dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE,
+ .offset = FEC_ENET_XDP_HEADROOM,
+ .max_len = FEC_ENET_RX_FRSIZE,
+ };
+ int err;
+
+ rxq->page_pool = page_pool_create(&pp_params);
+ if (IS_ERR(rxq->page_pool)) {
+ err = PTR_ERR(rxq->page_pool);
+ rxq->page_pool = NULL;
+ return err;
+ }
+
+ err = xdp_rxq_info_reg(&rxq->xdp_rxq, fep->netdev, rxq->id, 0);
+ if (err < 0)
+ goto err_free_pp;
+
+ err = xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, MEM_TYPE_PAGE_POOL,
+ rxq->page_pool);
+ if (err)
+ goto err_unregister_rxq;
+
+ return 0;
+
+err_unregister_rxq:
+ xdp_rxq_info_unreg(&rxq->xdp_rxq);
+err_free_pp:
+ page_pool_destroy(rxq->page_pool);
+ rxq->page_pool = NULL;
+ return err;
+}
+
static struct bufdesc *
fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q *txq,
struct sk_buff *skb,
@@ -1450,7 +1495,7 @@ static void fec_enet_tx(struct net_device *ndev)
fec_enet_tx_queue(ndev, i);
}
-static int
+static int __maybe_unused
fec_enet_new_rxbdp(struct net_device *ndev, struct bufdesc *bdp, struct sk_buff *skb)
{
struct fec_enet_private *fep = netdev_priv(ndev);
@@ -1470,8 +1515,9 @@ fec_enet_new_rxbdp(struct net_device *ndev, struct bufdesc *bdp, struct sk_buff
return 0;
}
-static bool fec_enet_copybreak(struct net_device *ndev, struct sk_buff **skb,
- struct bufdesc *bdp, u32 length, bool swap)
+static bool __maybe_unused
+fec_enet_copybreak(struct net_device *ndev, struct sk_buff **skb,
+ struct bufdesc *bdp, u32 length, bool swap)
{
struct fec_enet_private *fep = netdev_priv(ndev);
struct sk_buff *new_skb;
@@ -1496,6 +1542,21 @@ static bool fec_enet_copybreak(struct net_device *ndev, struct sk_buff **skb,
return true;
}
+static void fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq,
+ struct bufdesc *bdp, int index)
+{
+ struct page *new_page;
+ dma_addr_t phys_addr;
+
+ new_page = page_pool_dev_alloc_pages(rxq->page_pool);
+ WARN_ON(!new_page);
+ rxq->rx_skb_info[index].page = new_page;
+
+ rxq->rx_skb_info[index].offset = FEC_ENET_XDP_HEADROOM;
+ phys_addr = page_pool_get_dma_addr(new_page) + FEC_ENET_XDP_HEADROOM;
+ bdp->cbd_bufaddr = cpu_to_fec32(phys_addr);
+}
+
/* During a receive, the bd_rx.cur points to the current incoming buffer.
* When we update through the ring, if the next incoming buffer has
* not been given to the system, we just set the empty indicator,
@@ -1508,7 +1569,6 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
struct fec_enet_priv_rx_q *rxq;
struct bufdesc *bdp;
unsigned short status;
- struct sk_buff *skb_new = NULL;
struct sk_buff *skb;
ushort pkt_len;
__u8 *data;
@@ -1517,8 +1577,8 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
bool vlan_packet_rcvd = false;
u16 vlan_tag;
int index = 0;
- bool is_copybreak;
bool need_swap = fep->quirks & FEC_QUIRK_SWAP_FRAME;
+ struct page *page;
#ifdef CONFIG_M532x
flush_cache_all();
@@ -1570,31 +1630,25 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
ndev->stats.rx_bytes += pkt_len;
index = fec_enet_get_bd_index(bdp, &rxq->bd);
- skb = rxq->rx_skbuff[index];
+ page = rxq->rx_skb_info[index].page;
+ dma_sync_single_for_cpu(&fep->pdev->dev,
+ fec32_to_cpu(bdp->cbd_bufaddr),
+ pkt_len,
+ DMA_FROM_DEVICE);
+ prefetch(page_address(page));
+ fec_enet_update_cbd(rxq, bdp, index);
/* The packet length includes FCS, but we don't want to
* include that when passing upstream as it messes up
* bridging applications.
*/
- is_copybreak = fec_enet_copybreak(ndev, &skb, bdp, pkt_len - 4,
- need_swap);
- if (!is_copybreak) {
- skb_new = netdev_alloc_skb(ndev, FEC_ENET_RX_FRSIZE);
- if (unlikely(!skb_new)) {
- ndev->stats.rx_dropped++;
- goto rx_processing_done;
- }
- dma_unmap_single(&fep->pdev->dev,
- fec32_to_cpu(bdp->cbd_bufaddr),
- FEC_ENET_RX_FRSIZE - fep->rx_align,
- DMA_FROM_DEVICE);
- }
-
- prefetch(skb->data - NET_IP_ALIGN);
+ skb = build_skb(page_address(page), PAGE_SIZE);
+ skb_reserve(skb, FEC_ENET_XDP_HEADROOM);
skb_put(skb, pkt_len - 4);
+ skb_mark_for_recycle(skb);
data = skb->data;
- if (!is_copybreak && need_swap)
+ if (need_swap)
swap_buffer(data, pkt_len);
#if !defined(CONFIG_M5272)
@@ -1649,16 +1703,6 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
skb_record_rx_queue(skb, queue_id);
napi_gro_receive(&fep->napi, skb);
- if (is_copybreak) {
- dma_sync_single_for_device(&fep->pdev->dev,
- fec32_to_cpu(bdp->cbd_bufaddr),
- FEC_ENET_RX_FRSIZE - fep->rx_align,
- DMA_FROM_DEVICE);
- } else {
- rxq->rx_skbuff[index] = skb_new;
- fec_enet_new_rxbdp(ndev, bdp, skb_new);
- }
-
rx_processing_done:
/* Clear the status flags for this buffer */
status &= ~BD_ENET_RX_STATS;
@@ -3002,26 +3046,19 @@ static void fec_enet_free_buffers(struct net_device *ndev)
struct fec_enet_private *fep = netdev_priv(ndev);
unsigned int i;
struct sk_buff *skb;
- struct bufdesc *bdp;
struct fec_enet_priv_tx_q *txq;
struct fec_enet_priv_rx_q *rxq;
unsigned int q;
for (q = 0; q < fep->num_rx_queues; q++) {
rxq = fep->rx_queue[q];
- bdp = rxq->bd.base;
- for (i = 0; i < rxq->bd.ring_size; i++) {
- skb = rxq->rx_skbuff[i];
- rxq->rx_skbuff[i] = NULL;
- if (skb) {
- dma_unmap_single(&fep->pdev->dev,
- fec32_to_cpu(bdp->cbd_bufaddr),
- FEC_ENET_RX_FRSIZE - fep->rx_align,
- DMA_FROM_DEVICE);
- dev_kfree_skb(skb);
- }
- bdp = fec_enet_get_nextdesc(bdp, &rxq->bd);
- }
+ for (i = 0; i < rxq->bd.ring_size; i++)
+ page_pool_release_page(rxq->page_pool, rxq->rx_skb_info[i].page);
+
+ if (xdp_rxq_info_is_reg(&rxq->xdp_rxq))
+ xdp_rxq_info_unreg(&rxq->xdp_rxq);
+ page_pool_destroy(rxq->page_pool);
+ rxq->page_pool = NULL;
}
for (q = 0; q < fep->num_tx_queues; q++) {
@@ -3111,24 +3148,32 @@ static int
fec_enet_alloc_rxq_buffers(struct net_device *ndev, unsigned int queue)
{
struct fec_enet_private *fep = netdev_priv(ndev);
- unsigned int i;
- struct sk_buff *skb;
+ int i, err;
struct bufdesc *bdp;
struct fec_enet_priv_rx_q *rxq;
+ dma_addr_t phys_addr;
+ struct page *page;
+
rxq = fep->rx_queue[queue];
bdp = rxq->bd.base;
+
+ err = fec_enet_create_page_pool(fep, rxq, rxq->bd.ring_size);
+ if (err < 0) {
+ netdev_err(ndev, "%s failed queue %d (%d)\n", __func__, queue, err);
+ return err;
+ }
+
for (i = 0; i < rxq->bd.ring_size; i++) {
- skb = __netdev_alloc_skb(ndev, FEC_ENET_RX_FRSIZE, GFP_KERNEL);
- if (!skb)
+ page = page_pool_dev_alloc_pages(rxq->page_pool);
+ if (!page)
goto err_alloc;
- if (fec_enet_new_rxbdp(ndev, bdp, skb)) {
- dev_kfree_skb(skb);
- goto err_alloc;
- }
+ phys_addr = page_pool_get_dma_addr(page) + FEC_ENET_XDP_HEADROOM;
+ bdp->cbd_bufaddr = cpu_to_fec32(phys_addr);
- rxq->rx_skbuff[i] = skb;
+ rxq->rx_skb_info[i].page = page;
+ rxq->rx_skb_info[i].offset = FEC_ENET_XDP_HEADROOM;
bdp->cbd_sc = cpu_to_fec16(BD_ENET_RX_EMPTY);
if (fep->bufdesc_ex) {
--
2.34.1
Powered by blists - more mailing lists