[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240312214430.2923019-13-dw@davidwei.uk>
Date: Tue, 12 Mar 2024 14:44:26 -0700
From: David Wei <dw@...idwei.uk>
To: io-uring@...r.kernel.org,
netdev@...r.kernel.org
Cc: Jens Axboe <axboe@...nel.dk>,
Pavel Begunkov <asml.silence@...il.com>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>,
"David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>,
Jesper Dangaard Brouer <hawk@...nel.org>,
David Ahern <dsahern@...nel.org>,
Mina Almasry <almasrymina@...gle.com>
Subject: [RFC PATCH v4 12/16] io_uring/zcrx: implement PP_FLAG_DMA_* handling
From: Pavel Begunkov <asml.silence@...il.com>
The patch implements support for PP_FLAG_DMA_MAP and
PP_FLAG_DMA_SYNC_DEV. Dma map buffers when creating a page pool if
needed, and unmap on tear down. Most of synching is done by page pool
apart from when we're grabbing buffers from the refill ring, in which
case it we need to do it by hand.
Signed-off-by: Pavel Begunkov <asml.silence@...il.com>
Signed-off-by: David Wei <dw@...idwei.uk>
---
io_uring/zc_rx.c | 90 +++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 86 insertions(+), 4 deletions(-)
diff --git a/io_uring/zc_rx.c b/io_uring/zc_rx.c
index b2507df121fb..4bd27eda4bc9 100644
--- a/io_uring/zc_rx.c
+++ b/io_uring/zc_rx.c
@@ -9,6 +9,7 @@
#include <net/tcp.h>
#include <net/af_unix.h>
#include <trace/events/page_pool.h>
+#include <net/page_pool/helpers.h>
#include <uapi/linux/io_uring.h>
@@ -72,6 +73,7 @@ static int io_zc_rx_init_buf(struct page *page, struct io_zc_rx_buf *buf)
{
memset(&buf->niov, 0, sizeof(buf->niov));
atomic_long_set(&buf->niov.pp_ref_count, 0);
+ page_pool_set_dma_addr_netmem(net_iov_to_netmem(&buf->niov), 0);
buf->page = page;
get_page(page);
@@ -392,12 +394,25 @@ static inline netmem_ref io_zc_buf_to_netmem(struct io_zc_rx_buf *buf)
return net_iov_to_netmem(&buf->niov);
}
+static inline void io_zc_sync_for_device(struct page_pool *pp,
+ netmem_ref netmem)
+{
+ if (pp->p.flags & PP_FLAG_DMA_SYNC_DEV) {
+ dma_addr_t dma_addr = page_pool_get_dma_addr_netmem(netmem);
+
+ dma_sync_single_range_for_device(pp->p.dev, dma_addr,
+ pp->p.offset, pp->p.max_len,
+ pp->p.dma_dir);
+ }
+}
+
static inline void io_zc_add_pp_cache(struct page_pool *pp,
struct io_zc_rx_buf *buf)
{
netmem_ref netmem = io_zc_buf_to_netmem(buf);
page_pool_set_pp_info(pp, netmem);
+ io_zc_sync_for_device(pp, netmem);
pp->alloc.cache[pp->alloc.count++] = netmem;
}
@@ -517,9 +532,71 @@ static void io_pp_zc_scrub(struct page_pool *pp)
}
}
+#define IO_PP_DMA_ATTRS (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
+static void io_pp_unmap_buf(struct io_zc_rx_buf *buf, struct page_pool *pp)
+{
+ netmem_ref netmem = net_iov_to_netmem(&buf->niov);
+ dma_addr_t dma = page_pool_get_dma_addr_netmem(netmem);
+
+ dma_unmap_page_attrs(pp->p.dev, dma, PAGE_SIZE << pp->p.order,
+ pp->p.dma_dir, IO_PP_DMA_ATTRS);
+ page_pool_set_dma_addr_netmem(netmem, 0);
+}
+
+static int io_pp_map_buf(struct io_zc_rx_buf *buf, struct page_pool *pp)
+{
+ netmem_ref netmem = net_iov_to_netmem(&buf->niov);
+ dma_addr_t dma_addr;
+ int ret;
+
+ dma_addr = dma_map_page_attrs(pp->p.dev, buf->page, 0,
+ PAGE_SIZE << pp->p.order, pp->p.dma_dir,
+ IO_PP_DMA_ATTRS);
+ ret = dma_mapping_error(pp->p.dev, dma_addr);
+ if (ret)
+ return ret;
+
+ if (WARN_ON_ONCE(page_pool_set_dma_addr_netmem(netmem, dma_addr))) {
+ dma_unmap_page_attrs(pp->p.dev, dma_addr,
+ PAGE_SIZE << pp->p.order, pp->p.dma_dir,
+ IO_PP_DMA_ATTRS);
+ return -EFAULT;
+ }
+
+ io_zc_sync_for_device(pp, netmem);
+ return 0;
+}
+
+static int io_pp_map_pool(struct io_zc_rx_pool *pool, struct page_pool *pp)
+{
+ int i, ret = 0;
+
+ for (i = 0; i < pool->nr_bufs; i++) {
+ ret = io_pp_map_buf(&pool->bufs[i], pp);
+ if (ret)
+ break;
+ }
+
+ if (ret) {
+ while (i--)
+ io_pp_unmap_buf(&pool->bufs[i], pp);
+ }
+ return ret;
+}
+
+static void io_pp_unmap_pool(struct io_zc_rx_pool *pool, struct page_pool *pp)
+{
+ int i;
+
+ for (i = 0; i < pool->nr_bufs; i++)
+ io_pp_unmap_buf(&pool->bufs[i], pp);
+}
+
static int io_pp_zc_init(struct page_pool *pp)
{
struct io_zc_rx_ifq *ifq = pp->mp_priv;
+ int ret;
if (!ifq)
return -EINVAL;
@@ -527,10 +604,12 @@ static int io_pp_zc_init(struct page_pool *pp)
return -EINVAL;
if (!pp->p.napi)
return -EINVAL;
- if (pp->p.flags & PP_FLAG_DMA_MAP)
- return -EOPNOTSUPP;
- if (pp->p.flags & PP_FLAG_DMA_SYNC_DEV)
- return -EOPNOTSUPP;
+
+ if (pp->p.flags & PP_FLAG_DMA_MAP) {
+ ret = io_pp_map_pool(ifq->pool, pp);
+ if (ret)
+ return ret;
+ }
percpu_ref_get(&ifq->ctx->refs);
ifq->pp = pp;
@@ -542,6 +621,9 @@ static void io_pp_zc_destroy(struct page_pool *pp)
struct io_zc_rx_ifq *ifq = pp->mp_priv;
struct io_zc_rx_pool *pool = ifq->pool;
+ if (pp->p.flags & PP_FLAG_DMA_MAP)
+ io_pp_unmap_pool(ifq->pool, pp);
+
ifq->pp = NULL;
if (WARN_ON_ONCE(pool->free_count != pool->nr_bufs))
--
2.43.0
Powered by blists - more mailing lists