lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <2c07d8e5cb5dfbd678d5a0bc6fb398aee82b67e4.1640029579.git.asml.silence@gmail.com>
Date:   Tue, 21 Dec 2021 15:35:39 +0000
From:   Pavel Begunkov <asml.silence@...il.com>
To:     io-uring@...r.kernel.org, netdev@...r.kernel.org,
        linux-kernel@...r.kernel.org
Cc:     Jakub Kicinski <kuba@...nel.org>,
        Jonathan Lemon <jonathan.lemon@...il.com>,
        "David S . Miller" <davem@...emloft.net>,
        Willem de Bruijn <willemb@...gle.com>,
        Eric Dumazet <edumazet@...gle.com>,
        David Ahern <dsahern@...nel.org>, Jens Axboe <axboe@...nel.dk>,
        Pavel Begunkov <asml.silence@...il.com>
Subject: [RFC v2 17/19] io_uring: unclog ctx refs waiting with zc notifiers

Currently every instance of struct io_tx_notifier holds a ctx reference,
including ones sitting in caches. So, when we try to quiesce the ring
(e.g. for register) we'd be waiting for refs that nobody can release.
That's worked around in for cancellation.

Don't do ctx references but wait for all notifiers to return into
caches when needed. Even better solution would be to wait for all rsrc
refs. It's also nice to remove an extra pair of percpu_ref_get/put().

Signed-off-by: Pavel Begunkov <asml.silence@...il.com>
---
 fs/io_uring.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5f79178a3f38..8cfa8ea161e4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -453,6 +453,7 @@ struct io_ring_ctx {
 		struct io_mapped_ubuf		*dummy_ubuf;
 		struct io_rsrc_data		*file_data;
 		struct io_rsrc_data		*buf_data;
+		int				nr_tx_ctx;
 
 		struct delayed_work		rsrc_put_work;
 		struct llist_head		rsrc_put_llist;
@@ -1982,7 +1983,6 @@ static void io_zc_tx_work_callback(struct work_struct *work)
 	io_cqring_ev_posted(ctx);
 
 	percpu_ref_put(rsrc_refs);
-	percpu_ref_put(&ctx->refs);
 }
 
 static void io_uring_tx_zerocopy_callback(struct sk_buff *skb,
@@ -2028,6 +2028,7 @@ static void io_notifier_free_cached(struct io_ring_ctx *ctx)
 					    struct io_tx_notifier, cache_node);
 		list_del(&notifier->cache_node);
 		kfree(notifier);
+		ctx->nr_tx_ctx--;
 	}
 }
 
@@ -2060,6 +2061,7 @@ static struct io_tx_notifier *io_alloc_tx_notifier(struct io_ring_ctx *ctx,
 		notifier = kmalloc(sizeof(*notifier), gfp_flags);
 		if (!notifier)
 			return NULL;
+		ctx->nr_tx_ctx++;
 		uarg = &notifier->uarg;
 		uarg->ctx = ctx;
 		uarg->flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
@@ -2072,7 +2074,6 @@ static struct io_tx_notifier *io_alloc_tx_notifier(struct io_ring_ctx *ctx,
 	io_set_rsrc_node(&notifier->fixed_rsrc_refs, ctx);
 
 	refcount_set(&notifier->uarg.refcnt, 1);
-	percpu_ref_get(&ctx->refs);
 	return notifier;
 }
 
@@ -9785,7 +9786,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 #endif
 	WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
 
-	io_notifier_free_cached(ctx);
 	io_sqe_tx_ctx_unregister(ctx);
 	io_mem_free(ctx->rings);
 	io_mem_free(ctx->sq_sqes);
@@ -9946,6 +9946,19 @@ static __cold void io_ring_exit_work(struct work_struct *work)
 	spin_lock(&ctx->completion_lock);
 	spin_unlock(&ctx->completion_lock);
 
+	while (1) {
+		int nr;
+
+		mutex_lock(&ctx->uring_lock);
+		io_notifier_free_cached(ctx);
+		nr = ctx->nr_tx_ctx;
+		mutex_unlock(&ctx->uring_lock);
+
+		if (!nr)
+			break;
+		schedule_timeout(interval);
+	}
+
 	io_ring_ctx_free(ctx);
 }
 
-- 
2.34.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ