netdev - [RFC v2 17/19] io_uring: unclog ctx refs waiting with zc notifiers

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-Id: <2c07d8e5cb5dfbd678d5a0bc6fb398aee82b67e4.1640029579.git.asml.silence@gmail.com>
Date:   Tue, 21 Dec 2021 15:35:39 +0000
From:   Pavel Begunkov <asml.silence@...il.com>
To:     io-uring@...r.kernel.org, netdev@...r.kernel.org,
        linux-kernel@...r.kernel.org
Cc:     Jakub Kicinski <kuba@...nel.org>,
        Jonathan Lemon <jonathan.lemon@...il.com>,
        "David S . Miller" <davem@...emloft.net>,
        Willem de Bruijn <willemb@...gle.com>,
        Eric Dumazet <edumazet@...gle.com>,
        David Ahern <dsahern@...nel.org>, Jens Axboe <axboe@...nel.dk>,
        Pavel Begunkov <asml.silence@...il.com>
Subject: [RFC v2 17/19] io_uring: unclog ctx refs waiting with zc notifiers

Currently every instance of struct io_tx_notifier holds a ctx reference,
including ones sitting in caches. So, when we try to quiesce the ring
(e.g. for register) we'd be waiting for refs that nobody can release.
That's worked around in for cancellation.

Don't do ctx references but wait for all notifiers to return into
caches when needed. Even better solution would be to wait for all rsrc
refs. It's also nice to remove an extra pair of percpu_ref_get/put().

Signed-off-by: Pavel Begunkov <asml.silence@...il.com>
---
 fs/io_uring.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5f79178a3f38..8cfa8ea161e4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -453,6 +453,7 @@ struct io_ring_ctx {
 		struct io_mapped_ubuf		*dummy_ubuf;
 		struct io_rsrc_data		*file_data;
 		struct io_rsrc_data		*buf_data;
+		int				nr_tx_ctx;
 
 		struct delayed_work		rsrc_put_work;
 		struct llist_head		rsrc_put_llist;
@@ -1982,7 +1983,6 @@ static void io_zc_tx_work_callback(struct work_struct *work)
 	io_cqring_ev_posted(ctx);
 
 	percpu_ref_put(rsrc_refs);
-	percpu_ref_put(&ctx->refs);
 }
 
 static void io_uring_tx_zerocopy_callback(struct sk_buff *skb,
@@ -2028,6 +2028,7 @@ static void io_notifier_free_cached(struct io_ring_ctx *ctx)
 					    struct io_tx_notifier, cache_node);
 		list_del(&notifier->cache_node);
 		kfree(notifier);
+		ctx->nr_tx_ctx--;
 	}
 }
 
@@ -2060,6 +2061,7 @@ static struct io_tx_notifier *io_alloc_tx_notifier(struct io_ring_ctx *ctx,
 		notifier = kmalloc(sizeof(*notifier), gfp_flags);
 		if (!notifier)
 			return NULL;
+		ctx->nr_tx_ctx++;
 		uarg = &notifier->uarg;
 		uarg->ctx = ctx;
 		uarg->flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
@@ -2072,7 +2074,6 @@ static struct io_tx_notifier *io_alloc_tx_notifier(struct io_ring_ctx *ctx,
 	io_set_rsrc_node(&notifier->fixed_rsrc_refs, ctx);
 
 	refcount_set(&notifier->uarg.refcnt, 1);
-	percpu_ref_get(&ctx->refs);
 	return notifier;
 }
 
@@ -9785,7 +9786,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 #endif
 	WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
 
-	io_notifier_free_cached(ctx);
 	io_sqe_tx_ctx_unregister(ctx);
 	io_mem_free(ctx->rings);
 	io_mem_free(ctx->sq_sqes);
@@ -9946,6 +9946,19 @@ static __cold void io_ring_exit_work(struct work_struct *work)
 	spin_lock(&ctx->completion_lock);
 	spin_unlock(&ctx->completion_lock);
 
+	while (1) {
+		int nr;
+
+		mutex_lock(&ctx->uring_lock);
+		io_notifier_free_cached(ctx);
+		nr = ctx->nr_tx_ctx;
+		mutex_unlock(&ctx->uring_lock);
+
+		if (!nr)
+			break;
+		schedule_timeout(interval);
+	}
+
 	io_ring_ctx_free(ctx);
 }
 
-- 
2.34.1