[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <b1c7efdc33221fdb588995b385415d68b149aa73.1681987376.git.lorenzo@kernel.org>
Date: Thu, 20 Apr 2023 13:16:21 +0200
From: Lorenzo Bianconi <lorenzo@...nel.org>
To: netdev@...r.kernel.org
Cc: bpf@...r.kernel.org, davem@...emloft.net, edumazet@...gle.com,
kuba@...nel.org, pabeni@...hat.com, ast@...nel.org,
daniel@...earbox.net, hawk@...nel.org, john.fastabend@...il.com,
toke@...hat.com, mtahhan@...hat.com, lorenzo.bianconi@...hat.com
Subject: [PATCH net-next 1/2] net: veth: add page_pool for page recycling
Introduce page_pool support in veth driver in order to recycle pages
in veth_convert_skb_to_xdp_buff routine and avoid reallocating the skb
through the page allocator.
The patch has been sending tcp traffic to a veth pair where the remote
peer is running a simple xdp program just returing xdp_pass:
veth upstream codebase:
MTU 1500B: ~ 8Gbps
MTU 8000B: ~ 13.9Gbps
veth upstream codebase + pp support:
MTU 1500B: ~ 9.2Gbps
MTU 8000B: ~ 16.2Gbps
Tested-by: Maryam Tahhan <mtahhan@...hat.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@...nel.org>
---
drivers/net/Kconfig | 1 +
drivers/net/veth.c | 54 ++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 50 insertions(+), 5 deletions(-)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index c34bd432da27..368c6f5b327e 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -402,6 +402,7 @@ config TUN_VNET_CROSS_LE
config VETH
tristate "Virtual ethernet pair device"
+ select PAGE_POOL
help
This device is a local ethernet tunnel. Devices are created in pairs.
When one end receives the packet it appears on its pair and vice
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index e1b38fbf1dd9..141b7745ba43 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -26,6 +26,7 @@
#include <linux/ptr_ring.h>
#include <linux/bpf_trace.h>
#include <linux/net_tstamp.h>
+#include <net/page_pool.h>
#define DRV_NAME "veth"
#define DRV_VERSION "1.0"
@@ -65,6 +66,7 @@ struct veth_rq {
bool rx_notify_masked;
struct ptr_ring xdp_ring;
struct xdp_rxq_info xdp_rxq;
+ struct page_pool *page_pool;
};
struct veth_priv {
@@ -711,8 +713,8 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
skb_shinfo(skb)->nr_frags ||
skb_headroom(skb) < XDP_PACKET_HEADROOM) {
u32 size, len, max_head_size, off;
+ struct page *page = NULL;
struct sk_buff *nskb;
- struct page *page;
int i, head_off;
/* We need a private copy of the skb and data buffers since
@@ -727,17 +729,21 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
goto drop;
/* Allocate skb head */
- page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+ if (rq->page_pool)
+ page = page_pool_dev_alloc_pages(rq->page_pool);
if (!page)
goto drop;
nskb = build_skb(page_address(page), PAGE_SIZE);
if (!nskb) {
- put_page(page);
+ page_pool_put_full_page(rq->page_pool, page, false);
goto drop;
}
skb_reserve(nskb, VETH_XDP_HEADROOM);
+ skb_copy_header(nskb, skb);
+ skb_mark_for_recycle(nskb);
+
size = min_t(u32, skb->len, max_head_size);
if (skb_copy_bits(skb, 0, nskb->data, size)) {
consume_skb(nskb);
@@ -745,16 +751,17 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
}
skb_put(nskb, size);
- skb_copy_header(nskb, skb);
head_off = skb_headroom(nskb) - skb_headroom(skb);
skb_headers_offset_update(nskb, head_off);
/* Allocate paged area of new skb */
off = size;
len = skb->len - off;
+ page = NULL;
for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
- page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+ if (rq->page_pool)
+ page = page_pool_dev_alloc_pages(rq->page_pool);
if (!page) {
consume_skb(nskb);
goto drop;
@@ -770,6 +777,7 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
len -= size;
off += size;
+ page = NULL;
}
consume_skb(skb);
@@ -1002,11 +1010,37 @@ static int veth_poll(struct napi_struct *napi, int budget)
return done;
}
+static int veth_create_page_pool(struct veth_rq *rq)
+{
+ struct page_pool_params pp_params = {
+ .order = 0,
+ .pool_size = VETH_RING_SIZE,
+ .nid = NUMA_NO_NODE,
+ .dev = &rq->dev->dev,
+ };
+
+ rq->page_pool = page_pool_create(&pp_params);
+ if (IS_ERR(rq->page_pool)) {
+ int err = PTR_ERR(rq->page_pool);
+
+ rq->page_pool = NULL;
+ return err;
+ }
+
+ return 0;
+}
+
static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
{
struct veth_priv *priv = netdev_priv(dev);
int err, i;
+ for (i = start; i < end; i++) {
+ err = veth_create_page_pool(&priv->rq[i]);
+ if (err)
+ goto err_page_pool;
+ }
+
for (i = start; i < end; i++) {
struct veth_rq *rq = &priv->rq[i];
@@ -1027,6 +1061,11 @@ static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
err_xdp_ring:
for (i--; i >= start; i--)
ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free);
+err_page_pool:
+ for (i = start; i < end; i++) {
+ page_pool_destroy(priv->rq[i].page_pool);
+ priv->rq[i].page_pool = NULL;
+ }
return err;
}
@@ -1056,6 +1095,11 @@ static void veth_napi_del_range(struct net_device *dev, int start, int end)
rq->rx_notify_masked = false;
ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free);
}
+
+ for (i = start; i < end; i++) {
+ page_pool_destroy(priv->rq[i].page_pool);
+ priv->rq[i].page_pool = NULL;
+ }
}
static void veth_napi_del(struct net_device *dev)
--
2.40.0
Powered by blists - more mailing lists