lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <2439336d-b6ae-4d08-a1e8-2372fc6df383@kernel.dk>
Date: Wed, 15 Jan 2025 10:18:29 -0700
From: Jens Axboe <axboe@...nel.dk>
To: Jann Horn <jannh@...gle.com>, Pavel Begunkov <asml.silence@...il.com>
Cc: io-uring@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] io_uring/rsrc: Simplify buffer cloning by locking both
 rings

On 1/15/25 9:25 AM, Jann Horn wrote:
> The locking in the buffer cloning code is somewhat complex because it goes
> back and forth between locking the source ring and the destination ring.
> 
> Make it easier to reason about by locking both rings at the same time.
> To avoid ABBA deadlocks, lock the rings in ascending kernel address order,
> just like in lock_two_nondirectories().
> 
> Signed-off-by: Jann Horn <jannh@...gle.com>
> ---
> Just an idea for how I think io_clone_buffers() could be changed so it
> becomes slightly easier to reason about.
> I left the out_unlock jump label with its current name for now, though
> I guess that should probably be adjusted.

Looks pretty clean to me, and does make it easier to reason about. Only
thing that stuck out to me was:

> @@ -1067,7 +1060,18 @@ int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg)
>  	file = io_uring_register_get_file(buf.src_fd, registered_src);
>  	if (IS_ERR(file))
>  		return PTR_ERR(file);
> -	ret = io_clone_buffers(ctx, file->private_data, &buf);
> +	src_ctx = file->private_data;
> +	if (src_ctx == ctx) {
> +		ret = -ELOOP;
> +		goto out_put;
> +	}

which is a change, as previously it would've been legal to do something ala:

struct io_uring ring;
struct iovec vecs[2];

vecs[0] = real_buffer;
vecs[1] = sparse_buffer;

io_uring_register_buffers(&ring, vecs, 2);

io_uring_clone_buffers_offset(&ring, &ring, 1, 0, 1, IORING_REGISTER_DST_REPLACE);

and clone vecs[0] into slot 1. With the patch, that'll return -ELOOP instead.

Maybe something like the below incremental, to just make the unlock +
double lock depending on whether they are different or not? And also
cleaning up the label naming at the same time.


diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 4b030382ad03..a1c7c8db5545 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -938,6 +938,9 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
 	int i, ret, off, nr;
 	unsigned int nbufs;
 
+	lockdep_assert_held(&ctx->uring_lock);
+	lockdep_assert_held(&src_ctx->uring_lock);
+
 	/*
 	 * Accounting state is shared between the two rings; that only works if
 	 * both rings are accounted towards the same counters.
@@ -979,17 +982,17 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
 	ret = -ENXIO;
 	nbufs = src_ctx->buf_table.nr;
 	if (!nbufs)
-		goto out_unlock;
+		goto out_free;
 	ret = -EINVAL;
 	if (!arg->nr)
 		arg->nr = nbufs;
 	else if (arg->nr > nbufs)
-		goto out_unlock;
+		goto out_free;
 	ret = -EOVERFLOW;
 	if (check_add_overflow(arg->nr, arg->src_off, &off))
-		goto out_unlock;
+		goto out_free;
 	if (off > nbufs)
-		goto out_unlock;
+		goto out_free;
 
 	off = arg->dst_off;
 	i = arg->src_off;
@@ -1004,7 +1007,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
 			dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
 			if (!dst_node) {
 				ret = -ENOMEM;
-				goto out_unlock;
+				goto out_free;
 			}
 
 			refcount_inc(&src_node->buf->refs);
@@ -1027,11 +1030,11 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
 	 * copied to a ring that does not have buffers yet (checked at function
 	 * entry).
 	 */
-	WARN_ON(ctx->buf_table.nr);
+	WARN_ON_ONCE(ctx->buf_table.nr);
 	ctx->buf_table = data;
 	return 0;
 
-out_unlock:
+out_free:
 	io_rsrc_data_free(ctx, &data);
 	return ret;
 }
@@ -1064,18 +1067,18 @@ int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg)
 	file = io_uring_register_get_file(buf.src_fd, registered_src);
 	if (IS_ERR(file))
 		return PTR_ERR(file);
+
 	src_ctx = file->private_data;
-	if (src_ctx == ctx) {
-		ret = -ELOOP;
-		goto out_put;
+	if (src_ctx != ctx) {
+		mutex_unlock(&ctx->uring_lock);
+		lock_two_rings(ctx, src_ctx);
 	}
 
-	mutex_unlock(&ctx->uring_lock);
-	lock_two_rings(ctx, src_ctx);
 	ret = io_clone_buffers(ctx, src_ctx, &buf);
-	mutex_unlock(&src_ctx->uring_lock);
 
-out_put:
+	if (src_ctx != ctx)
+		mutex_unlock(&src_ctx->uring_lock);
+
 	if (!registered_src)
 		fput(file);
 	return ret;

-- 
Jens Axboe

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ