[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250108220644.3528845-17-dw@davidwei.uk>
Date: Wed, 8 Jan 2025 14:06:37 -0800
From: David Wei <dw@...idwei.uk>
To: io-uring@...r.kernel.org,
netdev@...r.kernel.org
Cc: Jens Axboe <axboe@...nel.dk>,
Pavel Begunkov <asml.silence@...il.com>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>,
"David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>,
Jesper Dangaard Brouer <hawk@...nel.org>,
David Ahern <dsahern@...nel.org>,
Mina Almasry <almasrymina@...gle.com>,
Stanislav Fomichev <stfomichev@...il.com>,
Joe Damato <jdamato@...tly.com>,
Pedro Tammela <pctammela@...atatu.com>
Subject: [PATCH net-next v10 16/22] io_uring/zcrx: dma-map area for the device
From: Pavel Begunkov <asml.silence@...il.com>
Setup DMA mappings for the area into which we intend to receive data
later on. We know the device we want to attach to even before we get a
page pool and can pre-map in advance. All net_iov are synchronised for
device when allocated, see page_pool_mp_return_in_cache().
Reviewed-by: Jens Axboe <axboe@...nel.dk>
Signed-off-by: Pavel Begunkov <asml.silence@...il.com>
Signed-off-by: David Wei <dw@...idwei.uk>
---
io_uring/zcrx.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++-
io_uring/zcrx.h | 1 +
2 files changed, 91 insertions(+), 1 deletion(-)
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 64e86d14acc7..273bad4d86a2 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/errno.h>
+#include <linux/dma-map-ops.h>
#include <linux/mm.h>
#include <linux/nospec.h>
#include <linux/io_uring.h>
@@ -21,6 +22,82 @@
#include "zcrx.h"
#include "rsrc.h"
+#define IO_DMA_ATTR (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
+static struct device *io_zcrx_get_device(struct io_zcrx_ifq *ifq)
+{
+ return ifq->dev->dev.parent;
+}
+
+static void __io_zcrx_unmap_area(struct io_zcrx_ifq *ifq,
+ struct io_zcrx_area *area, int nr_mapped)
+{
+ int i;
+
+ for (i = 0; i < nr_mapped; i++) {
+ struct net_iov *niov = &area->nia.niovs[i];
+ dma_addr_t dma;
+
+ dma = page_pool_get_dma_addr_netmem(net_iov_to_netmem(niov));
+ dma_unmap_page_attrs(io_zcrx_get_device(ifq), dma, PAGE_SIZE,
+ DMA_FROM_DEVICE, IO_DMA_ATTR);
+ page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), 0);
+ }
+}
+
+static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
+{
+ if (area->is_mapped)
+ __io_zcrx_unmap_area(ifq, area, area->nia.num_niovs);
+}
+
+static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
+{
+ struct device *dev = io_zcrx_get_device(ifq);
+ int i;
+
+ if (!dev)
+ return -EINVAL;
+
+ for (i = 0; i < area->nia.num_niovs; i++) {
+ struct net_iov *niov = &area->nia.niovs[i];
+ dma_addr_t dma;
+
+ dma = dma_map_page_attrs(dev, area->pages[i], 0, PAGE_SIZE,
+ DMA_FROM_DEVICE, IO_DMA_ATTR);
+ if (dma_mapping_error(dev, dma))
+ break;
+ if (page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), dma)) {
+ dma_unmap_page_attrs(dev, dma, PAGE_SIZE,
+ DMA_FROM_DEVICE, IO_DMA_ATTR);
+ break;
+ }
+ }
+
+ if (i != area->nia.num_niovs) {
+ __io_zcrx_unmap_area(ifq, area, i);
+ return -EINVAL;
+ }
+
+ area->is_mapped = true;
+ return 0;
+}
+
+static void io_zcrx_sync_for_device(const struct page_pool *pool,
+ struct net_iov *niov)
+{
+#if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC)
+ dma_addr_t dma_addr;
+
+ if (!dma_dev_need_sync(pool->p.dev))
+ return;
+
+ dma_addr = page_pool_get_dma_addr_netmem(net_iov_to_netmem(niov));
+ __dma_sync_single_for_device(pool->p.dev, dma_addr + pool->p.offset,
+ PAGE_SIZE, pool->p.dma_dir);
+#endif
+}
+
#define IO_RQ_MAX_ENTRIES 32768
__maybe_unused
@@ -83,6 +160,8 @@ static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq)
static void io_zcrx_free_area(struct io_zcrx_area *area)
{
+ io_zcrx_unmap_area(area->ifq, area);
+
kvfree(area->freelist);
kvfree(area->nia.niovs);
kvfree(area->user_refs);
@@ -254,6 +333,10 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
if (!ifq->dev)
goto err;
+ ret = io_zcrx_map_area(ifq, ifq->area);
+ if (ret)
+ goto err;
+
reg.offsets.rqes = sizeof(struct io_uring);
reg.offsets.head = offsetof(struct io_uring, head);
reg.offsets.tail = offsetof(struct io_uring, tail);
@@ -404,6 +487,7 @@ static void io_zcrx_ring_refill(struct page_pool *pp,
continue;
}
+ io_zcrx_sync_for_device(pp, niov);
net_mp_netmem_place_in_cache(pp, netmem);
} while (--entries);
@@ -421,6 +505,7 @@ static void io_zcrx_refill_slow(struct page_pool *pp, struct io_zcrx_ifq *ifq)
netmem_ref netmem = net_iov_to_netmem(niov);
net_mp_niov_set_page_pool(pp, niov);
+ io_zcrx_sync_for_device(pp, niov);
net_mp_netmem_place_in_cache(pp, netmem);
}
spin_unlock_bh(&area->freelist_lock);
@@ -466,10 +551,14 @@ static int io_pp_zc_init(struct page_pool *pp)
return -EINVAL;
if (WARN_ON_ONCE(ifq->dev != pp->slow.netdev))
return -EINVAL;
- if (pp->dma_map)
+ if (WARN_ON_ONCE(io_zcrx_get_device(ifq) != pp->p.dev))
+ return -EINVAL;
+ if (WARN_ON_ONCE(!pp->dma_map))
return -EOPNOTSUPP;
if (pp->p.order != 0)
return -EOPNOTSUPP;
+ if (pp->p.dma_dir != DMA_FROM_DEVICE)
+ return -EOPNOTSUPP;
percpu_ref_get(&ifq->ctx->refs);
return 0;
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index f31c8006ca9c..beacf1ea6380 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -11,6 +11,7 @@ struct io_zcrx_area {
struct io_zcrx_ifq *ifq;
atomic_t *user_refs;
+ bool is_mapped;
u16 area_id;
struct page **pages;
--
2.43.5
Powered by blists - more mailing lists