[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <60f630cf-0057-4675-afcd-2b4e46430a44@gmail.com>
Date: Mon, 27 Oct 2025 11:47:51 +0000
From: Pavel Begunkov <asml.silence@...il.com>
To: David Wei <dw@...idwei.uk>, io-uring@...r.kernel.org,
netdev@...r.kernel.org
Cc: Jens Axboe <axboe@...nel.dk>
Subject: Re: [PATCH v3 3/3] io_uring/zcrx: share an ifq between rings
On 10/27/25 10:20, Pavel Begunkov wrote:
> On 10/26/25 17:34, David Wei wrote:
>> Add a way to share an ifq from a src ring that is real i.e. bound to a
>> HW RX queue with other rings. This is done by passing a new flag
>> IORING_ZCRX_IFQ_REG_SHARE in the registration struct
>> io_uring_zcrx_ifq_reg, alongside the fd of the src ring and the ifq id
>> to be shared.
>>
>> To prevent the src ring or ifq from being cleaned up or freed while
>> there are still shared ifqs, take the appropriate refs on the src ring
>> (ctx->refs) and src ifq (ifq->refs).
>>
>> Signed-off-by: David Wei <dw@...idwei.uk>
>> ---
>> include/uapi/linux/io_uring.h | 4 ++
>> io_uring/zcrx.c | 74 ++++++++++++++++++++++++++++++++++-
>> 2 files changed, 76 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
>> index 04797a9b76bc..4da4552a4215 100644
>> --- a/include/uapi/linux/io_uring.h
>> +++ b/include/uapi/linux/io_uring.h
>> @@ -1063,6 +1063,10 @@ struct io_uring_zcrx_area_reg {
>> __u64 __resv2[2];
>> };
>> +enum io_uring_zcrx_ifq_reg_flags {
>> + IORING_ZCRX_IFQ_REG_SHARE = 1,
>> +};
>> +
>> /*
>> * Argument for IORING_REGISTER_ZCRX_IFQ
>> */
>> diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
>> index 569cc0338acb..7418c959390a 100644
>> --- a/io_uring/zcrx.c
>> +++ b/io_uring/zcrx.c
>> @@ -22,10 +22,10 @@
>> #include <uapi/linux/io_uring.h>
>> #include "io_uring.h"
>> -#include "kbuf.h"
>> #include "memmap.h"
>> #include "zcrx.h"
>> #include "rsrc.h"
>> +#include "register.h"
>> #define IO_ZCRX_AREA_SUPPORTED_FLAGS (IORING_ZCRX_AREA_DMABUF)
>> @@ -541,6 +541,67 @@ struct io_mapped_region *io_zcrx_get_region(struct io_ring_ctx *ctx,
>> return ifq ? &ifq->region : NULL;
>> }
>> +static int io_share_zcrx_ifq(struct io_ring_ctx *ctx,
>> + struct io_uring_zcrx_ifq_reg __user *arg,
>> + struct io_uring_zcrx_ifq_reg *reg)
>> +{
>> + struct io_ring_ctx *src_ctx;
>> + struct io_zcrx_ifq *src_ifq;
>> + struct file *file;
>> + int src_fd, ret;
>> + u32 src_id, id;
>> +
>> + src_fd = reg->if_idx;
>> + src_id = reg->if_rxq;
>> +
>> + file = io_uring_register_get_file(src_fd, false);
>> + if (IS_ERR(file))
>> + return PTR_ERR(file);
>> +
>> + src_ctx = file->private_data;
>> + if (src_ctx == ctx)
>> + return -EBADFD;
>> +
>> + mutex_unlock(&ctx->uring_lock);
>> + io_lock_two_rings(ctx, src_ctx);
>> +
>> + ret = -EINVAL;
>> + src_ifq = xa_load(&src_ctx->zcrx_ctxs, src_id);
>> + if (!src_ifq)
>> + goto err_unlock;
>> +
>> + percpu_ref_get(&src_ctx->refs);
>> + refcount_inc(&src_ifq->refs);
>> +
>> + scoped_guard(mutex, &ctx->mmap_lock) {
>> + ret = xa_alloc(&ctx->zcrx_ctxs, &id, NULL, xa_limit_31b, GFP_KERNEL);
>> + if (ret)
>> + goto err_unlock;
>> +
>> + ret = -ENOMEM;
>> + if (xa_store(&ctx->zcrx_ctxs, id, src_ifq, GFP_KERNEL)) {
>> + xa_erase(&ctx->zcrx_ctxs, id);
>> + goto err_unlock;
>> + }
>
> It's just xa_alloc(..., src_ifq, ...);
>
>> + }
>> +
>> + reg->zcrx_id = id;
>> + if (copy_to_user(arg, reg, sizeof(*reg))) {
>> + ret = -EFAULT;
>> + goto err;
>> + }
>
> Better to do that before publishing zcrx into ctx->zcrx_ctxs
>
>> + mutex_unlock(&src_ctx->uring_lock);
>> + fput(file);
>> + return 0;
>> +err:
>> + scoped_guard(mutex, &ctx->mmap_lock)
>> + xa_erase(&ctx->zcrx_ctxs, id);
>> +err_unlock:
>> + mutex_unlock(&src_ctx->uring_lock);
>> + fput(file);
>> + return ret;
>> +}
>> +
>> int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
>> struct io_uring_zcrx_ifq_reg __user *arg)
>> {
>> @@ -566,6 +627,8 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
>> return -EINVAL;
>> if (copy_from_user(®, arg, sizeof(reg)))
>> return -EFAULT;
>> + if (reg.flags & IORING_ZCRX_IFQ_REG_SHARE)
>> + return io_share_zcrx_ifq(ctx, arg, ®);
>> if (copy_from_user(&rd, u64_to_user_ptr(reg.region_ptr), sizeof(rd)))
>> return -EFAULT;
>> if (!mem_is_zero(®.__resv, sizeof(reg.__resv)) ||
>> @@ -663,7 +726,7 @@ void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx)
>> if (ifq)
>> xa_erase(&ctx->zcrx_ctxs, id);
>> }
>> - if (!ifq)
>> + if (!ifq || ctx != ifq->ctx)
>> break;
>> io_zcrx_ifq_free(ifq);
>> }
>> @@ -734,6 +797,13 @@ void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx)
>> if (xa_get_mark(&ctx->zcrx_ctxs, index, XA_MARK_0))
>> continue;
>> + /*
>> + * Only shared ifqs want to put ctx->refs on the owning ifq
>> + * ring. This matches the get in io_share_zcrx_ifq().
>> + */
>> + if (ctx != ifq->ctx)
>> + percpu_ref_put(&ifq->ctx->refs);
>
> After you put this and ifq->refs below down, the zcrx object can get
> destroyed, but this ctx might still have requests using the object.
> Waiting on ctx refs would ensure requests are killed, but that'd
> create a cycle.
Another concerning part is long term cross ctx referencing,
which is even worse than pp locking it up. I mentioned
that it'd be great to reverse the refcounting relation,
but that'd also need additional ground work to break
dependencies.
>
>> +
>> /* Safe to clean up from any ring. */
>> if (refcount_dec_and_test(&ifq->refs)) {
>> io_zcrx_scrub(ifq);
>
--
Pavel Begunkov
Powered by blists - more mailing lists