[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZW-tX9EAnbw9a2lF@lore-desk>
Date: Wed, 6 Dec 2023 00:08:15 +0100
From: Lorenzo Bianconi <lorenzo@...nel.org>
To: Jakub Kicinski <kuba@...nel.org>
Cc: aleksander.lobakin@...el.com, netdev@...r.kernel.org,
davem@...emloft.net, edumazet@...gle.com, pabeni@...hat.com,
lorenzo.bianconi@...hat.com, bpf@...r.kernel.org, hawk@...nel.org,
toke@...hat.com, willemdebruijn.kernel@...il.com,
jasowang@...hat.com, sdf@...gle.com
Subject: Re: [PATCH v3 net-next 2/2] xdp: add multi-buff support for xdp
running in generic mode
> On Mon, 4 Dec 2023 16:43:56 +0100 Lorenzo Bianconi wrote:
> > yes, I was thinking about it actually.
> > I run some preliminary tests to check if we are introducing any performance
> > penalties or so.
> > My setup relies on a couple of veth pairs and an eBPF program to perform
> > XDP_REDIRECT from one pair to another one. I am running the program in xdp
> > driver mode (not generic one).
> >
> > v00 (NS:ns0 - 192.168.0.1/24) <---> (NS:ns1 - 192.168.0.2/24) v01 v10 (NS:ns1 - 192.168.1.1/24) <---> (NS:ns2 - 192.168.1.2/24) v11
> >
> > v00: iperf3 client
> > v11: iperf3 server
> >
> > I am run the test with different MTU valeus (1500B, 8KB, 64KB)
> >
> > net-next veth codebase:
> > =======================
> > - MTU 1500: iperf3 ~ 4.37Gbps
> > - MTU 8000: iperf3 ~ 9.75Gbps
> > - MTU 64000: iperf3 ~ 11.24Gbps
> >
> > net-next veth codebase + page_frag_cache instead of page_pool:
> > ==============================================================
> > - MTU 1500: iperf3 ~ 4.99Gbps (+14%)
> > - MTU 8000: iperf3 ~ 8.5Gbps (-12%)
> > - MTU 64000: iperf3 ~ 11.9Gbps ( +6%)
> >
> > It seems there is no a clear win situation of using page_pool or
> > page_frag_cache. What do you think?
>
> Hm, interesting. Are the iperf processes running on different cores?
> May be worth pinning (both same and different) to make sure the cache
> effects are isolated.
Hi Jakub,
I carried out some more tests today based on your suggestion on both veth
driver and xdp_generic codebase (on a more powerful system).
Test setup:
v00 (NS:ns0 - 192.168.0.1/24) <---> (NS:ns1 - 192.168.0.2/24) v01 ==(XDP_REDIRECT)==> v10 (NS:ns1 - 192.168.1.1/24) <---> (NS:ns2 - 192.168.1.2/24) v11
- v00: iperf3 client (pinned on core 0)
- v11: iperf3 server (pinned on core 7)
net-next veth codebase (page_pool APIs):
=======================================
- MTU 1500: ~ 5.42 Gbps
- MTU 8000: ~ 14.1 Gbps
- MTU 64000: ~ 18.4 Gbps
net-next veth codebase + page_frag_cahe APIs [0]:
=================================================
- MTU 1500: ~ 6.62 Gbps
- MTU 8000: ~ 14.7 Gbps
- MTU 64000: ~ 19.7 Gbps
xdp_generic codebase + page_frag_cahe APIs (current proposed patch):
====================================================================
- MTU 1500: ~ 6.41 Gbps
- MTU 8000: ~ 14.2 Gbps
- MTU 64000: ~ 19.8 Gbps
xdp_generic codebase + page_frag_cahe APIs [1]:
===============================================
- MTU 1500: ~ 5.75 Gbps
- MTU 8000: ~ 15.3 Gbps
- MTU 64000: ~ 21.2 Gbps
It seems page_pool APIs are working better for xdp_generic codebase
(except MTU 1500 case) while page_frag_cache APIs are better for
veth driver. What do you think? Am I missing something?
Regards,
Lorenzo
[0] Here I have just used napi_alloc_frag() instead of
page_pool_dev_alloc_va()/page_pool_dev_alloc() in
veth_convert_skb_to_xdp_buff()
[1] I developed this PoC to use page_pool APIs for xdp_generic code:
diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h
index cdcafb30d437..5115b61f38f1 100644
--- a/include/net/netdev_rx_queue.h
+++ b/include/net/netdev_rx_queue.h
@@ -21,6 +21,7 @@ struct netdev_rx_queue {
#ifdef CONFIG_XDP_SOCKETS
struct xsk_buff_pool *pool;
#endif
+ struct page_pool *page_pool;
} ____cacheline_aligned_in_smp;
/*
diff --git a/net/core/dev.c b/net/core/dev.c
index ed827b443d48..06fb568427c4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -153,6 +153,8 @@
#include <linux/prandom.h>
#include <linux/once_lite.h>
#include <net/netdev_rx_queue.h>
+#include <net/page_pool/types.h>
+#include <net/page_pool/helpers.h>
#include "dev.h"
#include "net-sysfs.h"
@@ -4964,6 +4966,7 @@ static int netif_skb_check_for_generic_xdp(struct sk_buff **pskb,
*/
if (skb_cloned(skb) || skb_shinfo(skb)->nr_frags ||
skb_headroom(skb) < XDP_PACKET_HEADROOM) {
+ struct netdev_rx_queue *rxq = netif_get_rxqueue(skb);
u32 mac_len = skb->data - skb_mac_header(skb);
u32 size, truesize, len, max_head_size, off;
struct sk_buff *nskb;
@@ -4978,18 +4981,19 @@ static int netif_skb_check_for_generic_xdp(struct sk_buff **pskb,
size = min_t(u32, skb->len, max_head_size);
truesize = SKB_HEAD_ALIGN(size) + XDP_PACKET_HEADROOM;
- data = napi_alloc_frag(truesize);
+ data = page_pool_dev_alloc_va(rxq->page_pool, &truesize);
if (!data)
return -ENOMEM;
nskb = napi_build_skb(data, truesize);
if (!nskb) {
- skb_free_frag(data);
+ page_pool_free_va(rxq->page_pool, data, true);
return -ENOMEM;
}
skb_reserve(nskb, XDP_PACKET_HEADROOM);
skb_copy_header(nskb, skb);
+ skb_mark_for_recycle(nskb);
err = skb_copy_bits(skb, 0, nskb->data, size);
if (err) {
@@ -5005,18 +5009,21 @@ static int netif_skb_check_for_generic_xdp(struct sk_buff **pskb,
len = skb->len - off;
for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
struct page *page;
+ u32 page_off;
size = min_t(u32, len, PAGE_SIZE);
- data = napi_alloc_frag(size);
+ truesize = size;
+ page = page_pool_dev_alloc(rxq->page_pool, &page_off,
+ &truesize);
if (!data) {
consume_skb(nskb);
return -ENOMEM;
}
- page = virt_to_head_page(data);
- skb_add_rx_frag(nskb, i, page,
- data - page_address(page), size, size);
- err = skb_copy_bits(skb, off, data, size);
+ skb_add_rx_frag(nskb, i, page, page_off, size, truesize);
+ err = skb_copy_bits(skb, off,
+ page_address(page) + page_off,
+ size);
if (err) {
consume_skb(nskb);
return err;
@@ -10057,6 +10064,11 @@ EXPORT_SYMBOL(netif_stacked_transfer_operstate);
static int netif_alloc_rx_queues(struct net_device *dev)
{
unsigned int i, count = dev->num_rx_queues;
+ struct page_pool_params page_pool_params = {
+ .pool_size = 256,
+ .nid = NUMA_NO_NODE,
+ .dev = &dev->dev,
+ };
struct netdev_rx_queue *rx;
size_t sz = count * sizeof(*rx);
int err = 0;
@@ -10075,14 +10087,25 @@ static int netif_alloc_rx_queues(struct net_device *dev)
/* XDP RX-queue setup */
err = xdp_rxq_info_reg(&rx[i].xdp_rxq, dev, i, 0);
if (err < 0)
- goto err_rxq_info;
+ goto err_rxq;
+
+ /* rx queue page pool allocator */
+ rx[i].page_pool = page_pool_create(&page_pool_params);
+ if (IS_ERR(rx[i].page_pool)) {
+ rx[i].page_pool = NULL;
+ goto err_rxq;
+ }
}
return 0;
-err_rxq_info:
+err_rxq:
/* Rollback successful reg's and free other resources */
- while (i--)
+ while (i--) {
xdp_rxq_info_unreg(&rx[i].xdp_rxq);
+ if (rx[i].page_pool)
+ page_pool_destroy(rx[i].page_pool);
+ }
+
kvfree(dev->_rx);
dev->_rx = NULL;
return err;
@@ -10096,8 +10119,11 @@ static void netif_free_rx_queues(struct net_device *dev)
if (!dev->_rx)
return;
- for (i = 0; i < count; i++)
+ for (i = 0; i < count; i++) {
xdp_rxq_info_unreg(&dev->_rx[i].xdp_rxq);
+ if (dev->_rx[i].page_pool)
+ page_pool_destroy(dev->_rx[i].page_pool);
+ }
kvfree(dev->_rx);
}
--
2.43.0
Download attachment "signature.asc" of type "application/pgp-signature" (229 bytes)
Powered by blists - more mailing lists