linux-kernel - Re: [BUG] io_uring: defer logic based on shared data

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <bfb58429-6abe-06f0-3fd8-14a0040cecf0@kernel.dk>
Date:   Fri, 25 Oct 2019 10:03:21 -0600
From:   Jens Axboe <axboe@...nel.dk>
To:     Pavel Begunkov <asml.silence@...il.com>,
        linux-block@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [BUG] io_uring: defer logic based on shared data

On 10/25/19 3:55 AM, Pavel Begunkov wrote:
> I found 2 problems with __io_sequence_defer().
> 
> 1. it uses @sq_dropped, but doesn't consider @cq_overflow
> 2. @sq_dropped and @cq_overflow are write-shared with userspace, so
> it can be maliciously changed.
> 
> see sent liburing test (test/defer *_hung()), which left an unkillable
> process for me

OK, how about the below. I'll split this in two, as it's really two
separate fixes.


diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5d10984381cf..5d9d960c1c17 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -191,6 +191,7 @@ struct io_ring_ctx {
 		unsigned		sq_entries;
 		unsigned		sq_mask;
 		unsigned		sq_thread_idle;
+		unsigned		cached_sq_dropped;
 		struct io_uring_sqe	*sq_sqes;
 
 		struct list_head	defer_list;
@@ -208,6 +209,7 @@ struct io_ring_ctx {
 
 	struct {
 		unsigned		cached_cq_tail;
+		unsigned		cached_cq_overflow;
 		unsigned		cq_entries;
 		unsigned		cq_mask;
 		struct wait_queue_head	cq_wait;
@@ -419,7 +421,8 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 static inline bool __io_sequence_defer(struct io_ring_ctx *ctx,
 				       struct io_kiocb *req)
 {
-	return req->sequence != ctx->cached_cq_tail + ctx->rings->sq_dropped;
+	return req->sequence != ctx->cached_cq_tail + ctx->cached_sq_dropped
+					+ ctx->cached_cq_overflow;
 }
 
 static inline bool io_sequence_defer(struct io_ring_ctx *ctx,
@@ -590,9 +593,8 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
 		WRITE_ONCE(cqe->res, res);
 		WRITE_ONCE(cqe->flags, 0);
 	} else {
-		unsigned overflow = READ_ONCE(ctx->rings->cq_overflow);
-
-		WRITE_ONCE(ctx->rings->cq_overflow, overflow + 1);
+		ctx->cached_cq_overflow++;
+		WRITE_ONCE(ctx->rings->cq_overflow, ctx->cached_cq_overflow);
 	}
 }
 
@@ -2601,7 +2603,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
 
 	/* drop invalid entries */
 	ctx->cached_sq_head++;
-	rings->sq_dropped++;
+	ctx->cached_sq_dropped++;
+	WRITE_ONCE(rings->sq_dropped, ctx->cached_sq_dropped);
 	return false;
 }
 
@@ -2685,6 +2688,7 @@ static int io_sq_thread(void *data)
 
 	timeout = inflight = 0;
 	while (!kthread_should_park()) {
+		unsigned prev_cq, cur_cq;
 		bool mm_fault = false;
 		unsigned int to_submit;
 
@@ -2767,8 +2771,12 @@ static int io_sq_thread(void *data)
 		}
 
 		to_submit = min(to_submit, ctx->sq_entries);
+		prev_cq = ctx->cached_cq_tail + ctx->cached_cq_overflow;
 		inflight += io_submit_sqes(ctx, to_submit, cur_mm != NULL,
 					   mm_fault);
+		cur_cq = ctx->cached_cq_tail + ctx->cached_cq_overflow;
+		if ((ctx->flags & IORING_SETUP_IOPOLL) && (prev_cq != cur_cq))
+			inflight -= cur_cq - prev_cq;
 
 		/* Commit SQ ring head once we've consumed all SQEs */
 		io_commit_sqring(ctx);

-- 
Jens Axboe