[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230801061932.10335-2-liangchen.linux@gmail.com>
Date: Tue, 1 Aug 2023 14:19:32 +0800
From: Liang Chen <liangchen.linux@...il.com>
To: davem@...emloft.net,
edumazet@...gle.com,
kuba@...nel.org,
pabeni@...hat.com,
linyunsheng@...wei.com
Cc: hawk@...nel.org,
ilias.apalodimas@...aro.org,
daniel@...earbox.net,
ast@...nel.org,
netdev@...r.kernel.org,
liangchen.linux@...il.com
Subject: [RFC PATCH net-next v2 2/2] net: veth: Improving page pool pages recycling
Page pool is supported for veth. But for XDP_TX and XDP_REDIRECT cases,
the pages are not effectively recycled. "ethtool -S" statistics for the
page pool are as follows:
NIC statistics:
rx_pp_alloc_fast: 18041186
rx_pp_alloc_slow: 286369
rx_pp_recycle_ring: 0
rx_pp_recycle_released_ref: 18327555
This failure to recycle page pool pages is a result of the code snippet
below, which converts page pool pages into regular pages and releases
the skb data structure:
veth_xdp_get(xdp);
consume_skb(skb);
The reason behind is some skbs received from the veth peer are not page
pool pages, and remain so after conversion to xdp frame. In order to not
confusing __xdp_return with mixed regular pages and page pool pages, they
are all converted to regular pages. So registering xdp memory model as
MEM_TYPE_PAGE_SHARED is sufficient.
If we replace the above code with kfree_skb_partial, directly releasing
the skb data structure, we can retain the original page pool page behavior.
However, directly changing the xdp memory model to MEM_TYPE_PAGE_POOL is
not a solution as explained above. Therefore, we introduced an additionally
MEM_TYPE_PAGE_POOL model for each rq.
In addition, to avoid mixing up pages from page pools with different
xdp_mem_id, page pool pages directly coming from the peers are still
converted into regular pages. This is not common, as most of the time,
they will be reallocated in veth_convert_skb_to_xdp_buff.
The following tests were conducted using pktgen to generate traffic and
evaluate the performance improvement after page pool pages get successfully
recycled in scenarios involving XDP_TX, XDP_REDIRECT, and AF_XDP.
Test environment setup:
ns1 ns2
veth0 <-peer-> veth1
veth2 <-peer-> veth3
Test Results:
pktgen -> veth1 -> veth0(XDP_TX) -> veth1(XDP_DROP)
without PP recycle: 1,780,392
with PP recycle: 1,984,680
improvement: ~10%
pktgen -> veth1 -> veth0(XDP_TX) -> veth1(XDP_PASS)
without PP recycle: 1,433,491
with PP recycle: 1,511,680
improvement: 5~6%
pktgen -> veth1 -> veth0(XDP_REDIRECT) -> veth2 -> veth3(XDP_DROP)
without PP recycle: 1,527,708
with PP recycle: 1,672,101
improvement: ~10%
pktgen -> veth1 -> veth0(XDP_REDIRECT) -> veth2 -> veth3(XDP_PASS)
without PP recycle: 1,325,804
with PP recycle: 1,392,704
improvement: ~5.5%
pktgen -> veth1 -> veth0(AF_XDP) -> user space(DROP)
without PP recycle: 1,607,609
with PP recycle: 1,736,957
improvement: ~8%
Additionally, the performance improvement were measured when converting to
xdp_buff doesn't require buffer copy and original skb uses regular pages,
i.e. page pool recycle not involved. This still gives around 2% improvement
attributed to the changes from consume_skb to kfree_skb_partial.
Signed-off-by: Liang Chen <liangchen.linux@...il.com>
---
Changes from v1:
- pp pages from the peers are still converted into regular pages.
---
drivers/net/veth.c | 48 ++++++++++++++++++++++++++++++++++++++++------
1 file changed, 42 insertions(+), 6 deletions(-)
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 509e901da41d..ea1b344e5db4 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -62,6 +62,7 @@ struct veth_rq {
struct net_device *dev;
struct bpf_prog __rcu *xdp_prog;
struct xdp_mem_info xdp_mem;
+ struct xdp_mem_info xdp_mem_pp;
struct veth_rq_stats stats;
bool rx_notify_masked;
struct ptr_ring xdp_ring;
@@ -836,6 +837,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
struct bpf_prog *xdp_prog;
struct veth_xdp_buff vxbuf;
struct xdp_buff *xdp = &vxbuf.xdp;
+ struct sk_buff *skb_orig;
u32 act, metalen;
int off;
@@ -848,6 +850,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
goto out;
}
+ skb_orig = skb;
__skb_push(skb, skb->data - skb_mac_header(skb));
if (veth_convert_skb_to_xdp_buff(rq, xdp, &skb))
goto drop;
@@ -862,9 +865,18 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
case XDP_PASS:
break;
case XDP_TX:
- veth_xdp_get(xdp);
- consume_skb(skb);
- xdp->rxq->mem = rq->xdp_mem;
+ if (skb != skb_orig) {
+ xdp->rxq->mem = rq->xdp_mem_pp;
+ kfree_skb_partial(skb, true);
+ } else if (!skb->pp_recycle) {
+ xdp->rxq->mem = rq->xdp_mem;
+ kfree_skb_partial(skb, true);
+ } else {
+ veth_xdp_get(xdp);
+ consume_skb(skb);
+ xdp->rxq->mem = rq->xdp_mem;
+ }
+
if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) {
trace_xdp_exception(rq->dev, xdp_prog, act);
stats->rx_drops++;
@@ -874,9 +886,18 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
rcu_read_unlock();
goto xdp_xmit;
case XDP_REDIRECT:
- veth_xdp_get(xdp);
- consume_skb(skb);
- xdp->rxq->mem = rq->xdp_mem;
+ if (skb != skb_orig) {
+ xdp->rxq->mem = rq->xdp_mem_pp;
+ kfree_skb_partial(skb, true);
+ } else if (!skb->pp_recycle) {
+ xdp->rxq->mem = rq->xdp_mem;
+ kfree_skb_partial(skb, true);
+ } else {
+ veth_xdp_get(xdp);
+ consume_skb(skb);
+ xdp->rxq->mem = rq->xdp_mem;
+ }
+
if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) {
stats->rx_drops++;
goto err_xdp;
@@ -1061,6 +1082,14 @@ static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
goto err_page_pool;
}
+ for (i = start; i < end; i++) {
+ err = xdp_reg_mem_model(&priv->rq[i].xdp_mem_pp,
+ MEM_TYPE_PAGE_POOL,
+ priv->rq[i].page_pool);
+ if (err)
+ goto err_reg_mem;
+ }
+
for (i = start; i < end; i++) {
struct veth_rq *rq = &priv->rq[i];
@@ -1082,6 +1111,10 @@ static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
for (i--; i >= start; i--)
ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free);
i = end;
+err_reg_mem:
+ for (i--; i >= start; i--)
+ xdp_unreg_mem_model(&priv->rq[i].xdp_mem_pp);
+ i = end;
err_page_pool:
for (i--; i >= start; i--) {
page_pool_destroy(priv->rq[i].page_pool);
@@ -1117,6 +1150,9 @@ static void veth_napi_del_range(struct net_device *dev, int start, int end)
ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free);
}
+ for (i = start; i < end; i++)
+ xdp_unreg_mem_model(&priv->rq[i].xdp_mem_pp);
+
for (i = start; i < end; i++) {
page_pool_destroy(priv->rq[i].page_pool);
priv->rq[i].page_pool = NULL;
--
2.40.1
Powered by blists - more mailing lists