lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 19 May 2021 15:13:18 +0100
From:   Pavel Begunkov <asml.silence@...il.com>
To:     io-uring@...r.kernel.org, netdev@...r.kernel.org,
        bpf@...r.kernel.org, linux-kernel@...r.kernel.org
Cc:     Jens Axboe <axboe@...nel.dk>, Alexei Starovoitov <ast@...nel.org>,
        Daniel Borkmann <daniel@...earbox.net>,
        Andrii Nakryiko <andrii@...nel.org>,
        Martin KaFai Lau <kafai@...com>,
        Song Liu <songliubraving@...com>, Yonghong Song <yhs@...com>,
        John Fastabend <john.fastabend@...il.com>,
        KP Singh <kpsingh@...nel.org>,
        Horst Schirmeier <horst.schirmeier@...dortmund.de>,
        "Franz-B . Tuneke" <franz-bernhard.tuneke@...dortmund.de>,
        Christian Dietrich <stettberger@...ucode.de>
Subject: [PATCH 07/23] io_uring: extract struct for CQ

Extract a structure describing an internal completion queue state and
called, struct io_cqring. We need it to support multi-CQ rings.

Signed-off-by: Pavel Begunkov <asml.silence@...il.com>
---
 fs/io_uring.c | 47 +++++++++++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 22 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 49a1b6b81d7d..4fecd9da689e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -335,6 +335,12 @@ struct io_submit_state {
 	unsigned int		ios_left;
 };
 
+struct io_cqring {
+	unsigned		cached_tail;
+	unsigned		entries;
+	struct io_rings		*rings;
+};
+
 struct io_ring_ctx {
 	struct {
 		struct percpu_ref	refs;
@@ -402,17 +408,14 @@ struct io_ring_ctx {
 	struct xarray		personalities;
 	u32			pers_next;
 
-	struct {
-		unsigned		cached_cq_tail;
-		unsigned		cq_entries;
-		atomic_t		cq_timeouts;
-		unsigned		cq_last_tm_flush;
-		unsigned		cq_extra;
-		unsigned long		cq_check_overflow;
-		struct wait_queue_head	cq_wait;
-		struct fasync_struct	*cq_fasync;
-		struct eventfd_ctx	*cq_ev_fd;
-	} ____cacheline_aligned_in_smp;
+	struct fasync_struct	*cq_fasync;
+	struct eventfd_ctx	*cq_ev_fd;
+	atomic_t		cq_timeouts;
+	unsigned		cq_last_tm_flush;
+	unsigned long		cq_check_overflow;
+	unsigned		cq_extra;
+	struct wait_queue_head	cq_wait;
+	struct io_cqring	cqs[1];
 
 	struct {
 		spinlock_t		completion_lock;
@@ -1207,7 +1210,7 @@ static bool req_need_defer(struct io_kiocb *req, u32 seq)
 	if (unlikely(req->flags & REQ_F_IO_DRAIN)) {
 		struct io_ring_ctx *ctx = req->ctx;
 
-		return seq + READ_ONCE(ctx->cq_extra) != ctx->cached_cq_tail;
+		return seq + READ_ONCE(ctx->cq_extra) != ctx->cqs[0].cached_tail;
 	}
 
 	return false;
@@ -1312,7 +1315,7 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx)
 	if (list_empty(&ctx->timeout_list))
 		return;
 
-	seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
+	seq = ctx->cqs[0].cached_tail - atomic_read(&ctx->cq_timeouts);
 
 	do {
 		u32 events_needed, events_got;
@@ -1346,7 +1349,7 @@ static void io_commit_cqring(struct io_ring_ctx *ctx)
 	io_flush_timeouts(ctx);
 
 	/* order cqe stores with ring update */
-	smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
+	smp_store_release(&ctx->rings->cq.tail, ctx->cqs[0].cached_tail);
 
 	if (unlikely(!list_empty(&ctx->defer_list)))
 		__io_queue_deferred(ctx);
@@ -1361,23 +1364,23 @@ static inline bool io_sqring_full(struct io_ring_ctx *ctx)
 
 static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx)
 {
-	return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head);
+	return ctx->cqs[0].cached_tail - READ_ONCE(ctx->rings->cq.head);
 }
 
 static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx)
 {
 	struct io_rings *rings = ctx->rings;
-	unsigned tail, mask = ctx->cq_entries - 1;
+	unsigned tail, mask = ctx->cqs[0].entries - 1;
 
 	/*
 	 * writes to the cq entry need to come after reading head; the
 	 * control dependency is enough as we're using WRITE_ONCE to
 	 * fill the cq entry
 	 */
-	if (__io_cqring_events(ctx) == ctx->cq_entries)
+	if (__io_cqring_events(ctx) == ctx->cqs[0].entries)
 		return NULL;
 
-	tail = ctx->cached_cq_tail++;
+	tail = ctx->cqs[0].cached_tail++;
 	return &rings->cqes[tail & mask];
 }
 
@@ -1430,7 +1433,7 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
 	unsigned long flags;
 	bool all_flushed, posted;
 
-	if (!force && __io_cqring_events(ctx) == ctx->cq_entries)
+	if (!force && __io_cqring_events(ctx) == ctx->cqs[0].entries)
 		return false;
 
 	posted = false;
@@ -5670,7 +5673,7 @@ static int io_timeout(struct io_kiocb *req, unsigned int issue_flags)
 		goto add;
 	}
 
-	tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
+	tail = ctx->cqs[0].cached_tail - atomic_read(&ctx->cq_timeouts);
 	req->timeout.target_seq = tail + off;
 
 	/* Update the last seq here in case io_flush_timeouts() hasn't.
@@ -9331,7 +9334,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 		if (unlikely(ret))
 			goto out;
 
-		min_complete = min(min_complete, ctx->cq_entries);
+		min_complete = min(min_complete, ctx->cqs[0].entries);
 
 		/*
 		 * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user
@@ -9481,7 +9484,7 @@ static int io_allocate_scq_urings(struct io_ring_ctx *ctx,
 
 	/* make sure these are sane, as we already accounted them */
 	ctx->sq_entries = p->sq_entries;
-	ctx->cq_entries = p->cq_entries;
+	ctx->cqs[0].entries = p->cq_entries;
 
 	size = rings_size(p->sq_entries, p->cq_entries, &sq_array_offset);
 	if (size == SIZE_MAX)
-- 
2.31.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ