[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20bb1890e60a82ec945ab36370d1fd54be414ab6.1746097431.git.asml.silence@gmail.com>
Date: Thu, 1 May 2025 13:17:18 +0100
From: Pavel Begunkov <asml.silence@...il.com>
To: io-uring@...r.kernel.org
Cc: asml.silence@...il.com,
David Wei <dw@...idwei.uk>,
netdev@...r.kernel.org,
Jamal Hadi Salim <jhs@...atatu.com>,
Pedro Tammela <pctammela@...atatu.com>,
Victor Nogueira <victor@...atatu.com>
Subject: [PATCH io_uring 5/5] io_uring/zcrx: dmabuf backed zerocopy receive
Add support for dmabuf backed zcrx areas. To use it, the user should
pass IORING_ZCRX_AREA_DMABUF in the struct io_uring_zcrx_area_reg flags
field and pass a dmabuf fd in the dmabuf_fd field.
Signed-off-by: Pavel Begunkov <asml.silence@...il.com>
---
include/uapi/linux/io_uring.h | 6 +-
io_uring/zcrx.c | 155 ++++++++++++++++++++++++++++++----
io_uring/zcrx.h | 7 ++
3 files changed, 151 insertions(+), 17 deletions(-)
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 130f3bc71a69..5ce096090b0c 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -990,12 +990,16 @@ struct io_uring_zcrx_offsets {
__u64 __resv[2];
};
+enum io_uring_zcrx_area_flags {
+ IORING_ZCRX_AREA_DMABUF = 1,
+};
+
struct io_uring_zcrx_area_reg {
__u64 addr;
__u64 len;
__u64 rq_area_token;
__u32 flags;
- __u32 __resv1;
+ __u32 dmabuf_fd;
__u64 __resv2[2];
};
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 34b09beba992..fac293bcba72 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -47,30 +47,110 @@ static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
return area->mem.pages[net_iov_idx(niov)];
}
-static void io_release_area_mem(struct io_zcrx_mem *mem)
+static void io_release_dmabuf(struct io_zcrx_mem *mem)
{
- if (mem->pages) {
- unpin_user_pages(mem->pages, mem->nr_folios);
- kvfree(mem->pages);
+ if (mem->sgt)
+ dma_buf_unmap_attachment_unlocked(mem->attach, mem->sgt,
+ DMA_FROM_DEVICE);
+ if (mem->attach)
+ dma_buf_detach(mem->dmabuf, mem->attach);
+ if (mem->dmabuf)
+ dma_buf_put(mem->dmabuf);
+
+ mem->sgt = NULL;
+ mem->attach = NULL;
+ mem->dmabuf = NULL;
+}
+
+static int io_import_dmabuf(struct io_zcrx_ifq *ifq,
+ struct io_zcrx_mem *mem,
+ struct io_uring_zcrx_area_reg *area_reg)
+{
+ unsigned long off = (unsigned long)area_reg->addr;
+ unsigned long len = (unsigned long)area_reg->len;
+ unsigned long total_size = 0;
+ struct scatterlist *sg;
+ int dmabuf_fd = area_reg->dmabuf_fd;
+ int i, ret;
+
+ if (WARN_ON_ONCE(!ifq->dev))
+ return -EFAULT;
+
+ mem->is_dmabuf = true;
+ mem->dmabuf = dma_buf_get(dmabuf_fd);
+ if (IS_ERR(mem->dmabuf)) {
+ ret = PTR_ERR(mem->dmabuf);
+ mem->dmabuf = NULL;
+ goto err;
+ }
+
+ mem->attach = dma_buf_attach(mem->dmabuf, ifq->dev);
+ if (IS_ERR(mem->attach)) {
+ ret = PTR_ERR(mem->attach);
+ mem->attach = NULL;
+ goto err;
+ }
+
+ mem->sgt = dma_buf_map_attachment_unlocked(mem->attach, DMA_FROM_DEVICE);
+ if (IS_ERR(mem->sgt)) {
+ ret = PTR_ERR(mem->sgt);
+ mem->sgt = NULL;
+ goto err;
}
+
+ for_each_sgtable_dma_sg(mem->sgt, sg, i)
+ total_size += sg_dma_len(sg);
+
+ if (total_size < off + len)
+ return -EINVAL;
+
+ mem->dmabuf_offset = off;
+ mem->size = len;
+ return 0;
+err:
+ io_release_dmabuf(mem);
+ return ret;
}
-static int io_import_area(struct io_zcrx_ifq *ifq,
+static int io_zcrx_map_area_dmabuf(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
+{
+ unsigned long off = area->mem.dmabuf_offset;
+ struct scatterlist *sg;
+ unsigned i, niov_idx = 0;
+
+ for_each_sgtable_dma_sg(area->mem.sgt, sg, i) {
+ dma_addr_t dma = sg_dma_address(sg);
+ unsigned long sg_len = sg_dma_len(sg);
+ unsigned long sg_off = min(sg_len, off);
+
+ off -= sg_off;
+ sg_len -= sg_off;
+ dma += sg_off;
+
+ while (sg_len && niov_idx < area->nia.num_niovs) {
+ struct net_iov *niov = &area->nia.niovs[niov_idx];
+
+ if (net_mp_niov_set_dma_addr(niov, dma))
+ return 0;
+ sg_len -= PAGE_SIZE;
+ dma += PAGE_SIZE;
+ niov_idx++;
+ }
+ }
+ return niov_idx;
+}
+
+static int io_import_umem(struct io_zcrx_ifq *ifq,
struct io_zcrx_mem *mem,
struct io_uring_zcrx_area_reg *area_reg)
{
struct page **pages;
int nr_pages;
- int ret;
- ret = io_validate_user_buf_range(area_reg->addr, area_reg->len);
- if (ret)
- return ret;
+ if (area_reg->dmabuf_fd)
+ return -EINVAL;
if (!area_reg->addr)
return -EFAULT;
- if (area_reg->addr & ~PAGE_MASK || area_reg->len & ~PAGE_MASK)
- return -EINVAL;
-
pages = io_pin_pages((unsigned long)area_reg->addr, area_reg->len,
&nr_pages);
if (IS_ERR(pages))
@@ -82,6 +162,35 @@ static int io_import_area(struct io_zcrx_ifq *ifq,
return 0;
}
+static void io_release_area_mem(struct io_zcrx_mem *mem)
+{
+ if (mem->is_dmabuf) {
+ io_release_dmabuf(mem);
+ return;
+ }
+ if (mem->pages) {
+ unpin_user_pages(mem->pages, mem->nr_folios);
+ kvfree(mem->pages);
+ }
+}
+
+static int io_import_area(struct io_zcrx_ifq *ifq,
+ struct io_zcrx_mem *mem,
+ struct io_uring_zcrx_area_reg *area_reg)
+{
+ int ret;
+
+ ret = io_validate_user_buf_range(area_reg->addr, area_reg->len);
+ if (ret)
+ return ret;
+ if (area_reg->addr & ~PAGE_MASK || area_reg->len & ~PAGE_MASK)
+ return -EINVAL;
+
+ if (area_reg->flags & IORING_ZCRX_AREA_DMABUF)
+ return io_import_dmabuf(ifq, mem, area_reg);
+ return io_import_umem(ifq, mem, area_reg);
+}
+
static void io_zcrx_unmap_umem(struct io_zcrx_ifq *ifq,
struct io_zcrx_area *area, int nr_mapped)
{
@@ -101,7 +210,10 @@ static void __io_zcrx_unmap_area(struct io_zcrx_ifq *ifq,
{
int i;
- io_zcrx_unmap_umem(ifq, area, nr_mapped);
+ if (area->mem.is_dmabuf)
+ io_release_dmabuf(&area->mem);
+ else
+ io_zcrx_unmap_umem(ifq, area, nr_mapped);
for (i = 0; i < area->nia.num_niovs; i++)
net_mp_niov_set_dma_addr(&area->nia.niovs[i], 0);
@@ -145,7 +257,11 @@ static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
if (area->is_mapped)
return 0;
- nr = io_zcrx_map_area_umem(ifq, area);
+ if (area->mem.is_dmabuf)
+ nr = io_zcrx_map_area_dmabuf(ifq, area);
+ else
+ nr = io_zcrx_map_area_umem(ifq, area);
+
if (nr != area->nia.num_niovs) {
__io_zcrx_unmap_area(ifq, area, nr);
return -EINVAL;
@@ -251,6 +367,8 @@ static void io_zcrx_free_area(struct io_zcrx_area *area)
kfree(area);
}
+#define IO_ZCRX_AREA_SUPPORTED_FLAGS (IORING_ZCRX_AREA_DMABUF)
+
static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
struct io_zcrx_area **res,
struct io_uring_zcrx_area_reg *area_reg)
@@ -259,9 +377,11 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
unsigned nr_iovs;
int i, ret;
- if (area_reg->flags || area_reg->rq_area_token)
+ if (area_reg->flags & ~IO_ZCRX_AREA_SUPPORTED_FLAGS)
+ return -EINVAL;
+ if (area_reg->rq_area_token)
return -EINVAL;
- if (area_reg->__resv1 || area_reg->__resv2[0] || area_reg->__resv2[1])
+ if (area_reg->__resv2[0] || area_reg->__resv2[1])
return -EINVAL;
ret = -ENOMEM;
@@ -819,6 +939,9 @@ static ssize_t io_zcrx_copy_chunk(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
size_t copied = 0;
int ret = 0;
+ if (area->mem.is_dmabuf)
+ return -EFAULT;
+
while (len) {
size_t copy_size = min_t(size_t, PAGE_SIZE, len);
const int dst_off = 0;
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index 9c22807af807..2f5e26389f22 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -3,15 +3,22 @@
#define IOU_ZC_RX_H
#include <linux/io_uring_types.h>
+#include <linux/dma-buf.h>
#include <linux/socket.h>
#include <net/page_pool/types.h>
#include <net/net_trackers.h>
struct io_zcrx_mem {
unsigned long size;
+ bool is_dmabuf;
struct page **pages;
unsigned long nr_folios;
+
+ struct dma_buf_attachment *attach;
+ struct dma_buf *dmabuf;
+ struct sg_table *sgt;
+ unsigned long dmabuf_offset;
};
struct io_zcrx_area {
--
2.48.1
Powered by blists - more mailing lists