lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Tue,  7 Nov 2023 13:40:42 -0800
From: David Wei <dw@...idwei.uk>
To: io-uring@...r.kernel.org,
	netdev@...r.kernel.org
Cc: Jens Axboe <axboe@...nel.dk>,
	Pavel Begunkov <asml.silence@...il.com>,
	Jakub Kicinski <kuba@...nel.org>,
	Paolo Abeni <pabeni@...hat.com>,
	"David S. Miller" <davem@...emloft.net>,
	Eric Dumazet <edumazet@...gle.com>,
	Jesper Dangaard Brouer <hawk@...nel.org>,
	David Ahern <dsahern@...nel.org>,
	Mina Almasry <almasrymina@...gle.com>,
	Willem de Bruijn <willemdebruijn.kernel@...il.com>,
	Dragos Tatulea <dtatulea@...dia.com>
Subject: [PATCH 17/20] io_uring/zcrx: copy fallback to ring buffers

From: Pavel Begunkov <asml.silence@...il.com>

The copy fallback is currently limited to spinlock protected ->freelist,
but we also want to be able to grab buffers from the refill queue, which
is napi protected. Use the new napi_execute() helper to inject a
function call into the napi context.

todo: the way we set napi_id with io_zc_rx_set_napi in drivers later is
not reliable, we should catch all netif_napi_del() and update the id.

Signed-off-by: Pavel Begunkov <asml.silence@...il.com>
Signed-off-by: David Wei <dw@...idwei.uk>
---
 include/linux/io_uring.h |  1 +
 io_uring/zc_rx.c         | 45 ++++++++++++++++++++++++++++++++++++++--
 io_uring/zc_rx.h         |  1 +
 3 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index fb88e000c156..bf886d6de4e0 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -75,6 +75,7 @@ struct io_zc_rx_buf *io_zc_rx_get_buf(struct io_zc_rx_ifq *ifq);
 struct io_zc_rx_buf *io_zc_rx_buf_from_page(struct io_zc_rx_ifq *ifq,
 					    struct page *page);
 void io_zc_rx_put_buf(struct io_zc_rx_ifq *ifq, struct io_zc_rx_buf *buf);
+void io_zc_rx_set_napi(struct io_zc_rx_ifq *ifq, unsigned napi_id);
 
 static inline dma_addr_t io_zc_rx_buf_dma(struct io_zc_rx_buf *buf)
 {
diff --git a/io_uring/zc_rx.c b/io_uring/zc_rx.c
index c2ed600f0951..14328024a550 100644
--- a/io_uring/zc_rx.c
+++ b/io_uring/zc_rx.c
@@ -7,6 +7,7 @@
 #include <linux/netdevice.h>
 #include <linux/nospec.h>
 #include <net/tcp.h>
+#include <net/busy_poll.h>
 
 #include <uapi/linux/io_uring.h>
 
@@ -41,6 +42,11 @@ struct io_zc_rx_pool {
 	u32			freelist[];
 };
 
+struct io_zc_refill_data {
+	struct io_zc_rx_ifq *ifq;
+	unsigned count;
+};
+
 static inline u32 io_zc_rx_cqring_entries(struct io_zc_rx_ifq *ifq)
 {
 	struct io_rbuf_ring *ring = ifq->ring;
@@ -244,6 +250,12 @@ static void io_zc_rx_destroy_ifq(struct io_zc_rx_ifq *ifq)
 	kfree(ifq);
 }
 
+void io_zc_rx_set_napi(struct io_zc_rx_ifq *ifq, unsigned napi_id)
+{
+	ifq->napi_id = napi_id;
+}
+EXPORT_SYMBOL(io_zc_rx_set_napi);
+
 static void io_zc_rx_destroy_pool_work(struct work_struct *work)
 {
 	struct io_zc_rx_pool *pool = container_of(
@@ -498,14 +510,43 @@ static void io_zc_rx_refill_cache(struct io_zc_rx_ifq *ifq, int count)
 	pool->cache_count += filled;
 }
 
+static bool io_napi_refill(void *data)
+{
+	struct io_zc_refill_data *rd = data;
+	struct io_zc_rx_ifq *ifq = rd->ifq;
+	struct io_zc_rx_pool *pool = ifq->pool;
+	int i, count = rd->count;
+
+	lockdep_assert_no_hardirq();
+
+	if (!pool->cache_count)
+		io_zc_rx_refill_cache(ifq, POOL_REFILL_COUNT);
+
+	spin_lock_bh(&pool->freelist_lock);
+	for (i = 0; i < count && pool->cache_count; i++) {
+		u32 pgid;
+
+		pgid = pool->cache[--pool->cache_count];
+		pool->freelist[pool->free_count++] = pgid;
+	}
+	spin_unlock_bh(&pool->freelist_lock);
+	return true;
+}
+
 static struct io_zc_rx_buf *io_zc_get_buf_task_safe(struct io_zc_rx_ifq *ifq)
 {
 	struct io_zc_rx_pool *pool = ifq->pool;
 	struct io_zc_rx_buf *buf = NULL;
 	u32 pgid;
 
-	if (!READ_ONCE(pool->free_count))
-		return NULL;
+	if (!READ_ONCE(pool->free_count)) {
+		struct io_zc_refill_data rd = {
+			.ifq = ifq,
+			.count = 1,
+		};
+
+		napi_execute(ifq->napi_id, io_napi_refill, &rd);
+	}
 
 	spin_lock_bh(&pool->freelist_lock);
 	if (pool->free_count) {
diff --git a/io_uring/zc_rx.h b/io_uring/zc_rx.h
index fac32089e699..fd8828e4bd7a 100644
--- a/io_uring/zc_rx.h
+++ b/io_uring/zc_rx.h
@@ -20,6 +20,7 @@ struct io_zc_rx_ifq {
 	u32			cached_rq_head;
 	u32			cached_cq_tail;
 	void			*pool;
+	unsigned int		napi_id;
 
 	unsigned		nr_sockets;
 	struct file		*sockets[IO_ZC_MAX_IFQ_SOCKETS];
-- 
2.39.3


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ