lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <60f630cf-0057-4675-afcd-2b4e46430a44@gmail.com>
Date: Mon, 27 Oct 2025 11:47:51 +0000
From: Pavel Begunkov <asml.silence@...il.com>
To: David Wei <dw@...idwei.uk>, io-uring@...r.kernel.org,
 netdev@...r.kernel.org
Cc: Jens Axboe <axboe@...nel.dk>
Subject: Re: [PATCH v3 3/3] io_uring/zcrx: share an ifq between rings

On 10/27/25 10:20, Pavel Begunkov wrote:
> On 10/26/25 17:34, David Wei wrote:
>> Add a way to share an ifq from a src ring that is real i.e. bound to a
>> HW RX queue with other rings. This is done by passing a new flag
>> IORING_ZCRX_IFQ_REG_SHARE in the registration struct
>> io_uring_zcrx_ifq_reg, alongside the fd of the src ring and the ifq id
>> to be shared.
>>
>> To prevent the src ring or ifq from being cleaned up or freed while
>> there are still shared ifqs, take the appropriate refs on the src ring
>> (ctx->refs) and src ifq (ifq->refs).
>>
>> Signed-off-by: David Wei <dw@...idwei.uk>
>> ---
>>   include/uapi/linux/io_uring.h |  4 ++
>>   io_uring/zcrx.c               | 74 ++++++++++++++++++++++++++++++++++-
>>   2 files changed, 76 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
>> index 04797a9b76bc..4da4552a4215 100644
>> --- a/include/uapi/linux/io_uring.h
>> +++ b/include/uapi/linux/io_uring.h
>> @@ -1063,6 +1063,10 @@ struct io_uring_zcrx_area_reg {
>>       __u64    __resv2[2];
>>   };
>> +enum io_uring_zcrx_ifq_reg_flags {
>> +    IORING_ZCRX_IFQ_REG_SHARE    = 1,
>> +};
>> +
>>   /*
>>    * Argument for IORING_REGISTER_ZCRX_IFQ
>>    */
>> diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
>> index 569cc0338acb..7418c959390a 100644
>> --- a/io_uring/zcrx.c
>> +++ b/io_uring/zcrx.c
>> @@ -22,10 +22,10 @@
>>   #include <uapi/linux/io_uring.h>
>>   #include "io_uring.h"
>> -#include "kbuf.h"
>>   #include "memmap.h"
>>   #include "zcrx.h"
>>   #include "rsrc.h"
>> +#include "register.h"
>>   #define IO_ZCRX_AREA_SUPPORTED_FLAGS    (IORING_ZCRX_AREA_DMABUF)
>> @@ -541,6 +541,67 @@ struct io_mapped_region *io_zcrx_get_region(struct io_ring_ctx *ctx,
>>       return ifq ? &ifq->region : NULL;
>>   }
>> +static int io_share_zcrx_ifq(struct io_ring_ctx *ctx,
>> +                 struct io_uring_zcrx_ifq_reg __user *arg,
>> +                 struct io_uring_zcrx_ifq_reg *reg)
>> +{
>> +    struct io_ring_ctx *src_ctx;
>> +    struct io_zcrx_ifq *src_ifq;
>> +    struct file *file;
>> +    int src_fd, ret;
>> +    u32 src_id, id;
>> +
>> +    src_fd = reg->if_idx;
>> +    src_id = reg->if_rxq;
>> +
>> +    file = io_uring_register_get_file(src_fd, false);
>> +    if (IS_ERR(file))
>> +        return PTR_ERR(file);
>> +
>> +    src_ctx = file->private_data;
>> +    if (src_ctx == ctx)
>> +        return -EBADFD;
>> +
>> +    mutex_unlock(&ctx->uring_lock);
>> +    io_lock_two_rings(ctx, src_ctx);
>> +
>> +    ret = -EINVAL;
>> +    src_ifq = xa_load(&src_ctx->zcrx_ctxs, src_id);
>> +    if (!src_ifq)
>> +        goto err_unlock;
>> +
>> +    percpu_ref_get(&src_ctx->refs);
>> +    refcount_inc(&src_ifq->refs);
>> +
>> +    scoped_guard(mutex, &ctx->mmap_lock) {
>> +        ret = xa_alloc(&ctx->zcrx_ctxs, &id, NULL, xa_limit_31b, GFP_KERNEL);
>> +        if (ret)
>> +            goto err_unlock;
>> +
>> +        ret = -ENOMEM;
>> +        if (xa_store(&ctx->zcrx_ctxs, id, src_ifq, GFP_KERNEL)) {
>> +            xa_erase(&ctx->zcrx_ctxs, id);
>> +            goto err_unlock;
>> +        }
> 
> It's just xa_alloc(..., src_ifq, ...);
> 
>> +    }
>> +
>> +    reg->zcrx_id = id;
>> +    if (copy_to_user(arg, reg, sizeof(*reg))) {
>> +        ret = -EFAULT;
>> +        goto err;
>> +    }
> 
> Better to do that before publishing zcrx into ctx->zcrx_ctxs
> 
>> +    mutex_unlock(&src_ctx->uring_lock);
>> +    fput(file);
>> +    return 0;
>> +err:
>> +    scoped_guard(mutex, &ctx->mmap_lock)
>> +        xa_erase(&ctx->zcrx_ctxs, id);
>> +err_unlock:
>> +    mutex_unlock(&src_ctx->uring_lock);
>> +    fput(file);
>> +    return ret;
>> +}
>> +
>>   int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
>>                 struct io_uring_zcrx_ifq_reg __user *arg)
>>   {
>> @@ -566,6 +627,8 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
>>           return -EINVAL;
>>       if (copy_from_user(&reg, arg, sizeof(reg)))
>>           return -EFAULT;
>> +    if (reg.flags & IORING_ZCRX_IFQ_REG_SHARE)
>> +        return io_share_zcrx_ifq(ctx, arg, &reg);
>>       if (copy_from_user(&rd, u64_to_user_ptr(reg.region_ptr), sizeof(rd)))
>>           return -EFAULT;
>>       if (!mem_is_zero(&reg.__resv, sizeof(reg.__resv)) ||
>> @@ -663,7 +726,7 @@ void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx)
>>               if (ifq)
>>                   xa_erase(&ctx->zcrx_ctxs, id);
>>           }
>> -        if (!ifq)
>> +        if (!ifq || ctx != ifq->ctx)
>>               break;
>>           io_zcrx_ifq_free(ifq);
>>       }
>> @@ -734,6 +797,13 @@ void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx)
>>           if (xa_get_mark(&ctx->zcrx_ctxs, index, XA_MARK_0))
>>               continue;
>> +        /*
>> +         * Only shared ifqs want to put ctx->refs on the owning ifq
>> +         * ring. This matches the get in io_share_zcrx_ifq().
>> +         */
>> +        if (ctx != ifq->ctx)
>> +            percpu_ref_put(&ifq->ctx->refs);
> 
> After you put this and ifq->refs below down, the zcrx object can get
> destroyed, but this ctx might still have requests using the object.
> Waiting on ctx refs would ensure requests are killed, but that'd
> create a cycle.

Another concerning part is long term cross ctx referencing,
which is even worse than pp locking it up. I mentioned
that it'd be great to reverse the refcounting relation,
but that'd also need additional ground work to break
dependencies.

> 
>> +
>>           /* Safe to clean up from any ring. */
>>           if (refcount_dec_and_test(&ifq->refs)) {
>>               io_zcrx_scrub(ifq);
> 

-- 
Pavel Begunkov


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ