lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <3cbedc531b633af4fe8632f7276aa843b5a54875.1664123680.git.josh@joshtriplett.org>
Date:   Sun, 25 Sep 2022 17:44:39 +0100
From:   Josh Triplett <josh@...htriplett.org>
To:     Jens Axboe <axboe@...nel.dk>,
        Pavel Begunkov <asml.silence@...il.com>,
        io-uring@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: [PATCH] io_uring: Support calling io_uring_register with a
 registered ring fd

Add a new flag IORING_REGISTER_USE_REGISTERED_RING (set via the high bit
of the opcode) to treat the fd as a registered index rather than a file
descriptor.

This makes it possible for a library to open an io_uring, register the
ring fd, close the ring fd, and subsequently use the ring entirely via
registered index.

Signed-off-by: Josh Triplett <josh@...htriplett.org>
---

This is enough for many libraries to use io_uring transparently without
disrupting any callers. Libraries with even more stringent requirements
(e.g. never even transiently having a file descriptor open) will need
two additional pieces:
- Adding a flag to io_uring_setup to set up the ring directly as a
  registered file descriptor, without ever putting it in the file
  descriptor table.
- Supporting the initial mmap via a registered file descriptor, such as
  via an io_uring_register call.

 include/uapi/linux/io_uring.h |  6 +++++-
 io_uring/io_uring.c           | 30 +++++++++++++++++++++++-------
 2 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 6b83177fd41d..103b4babc175 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -427,6 +427,7 @@ struct io_uring_params {
 #define IORING_FEAT_RSRC_TAGS		(1U << 10)
 #define IORING_FEAT_CQE_SKIP		(1U << 11)
 #define IORING_FEAT_LINKED_FILE		(1U << 12)
+#define IORING_FEAT_REG_REG_RING	(1U << 13)
 
 /*
  * io_uring_register(2) opcodes and arguments
@@ -474,7 +475,10 @@ enum {
 	IORING_REGISTER_FILE_ALLOC_RANGE	= 25,
 
 	/* this goes last */
-	IORING_REGISTER_LAST
+	IORING_REGISTER_LAST,
+
+	/* flag added to the opcode to use a registered ring fd */
+	IORING_REGISTER_USE_REGISTERED_RING	= 1U << 31
 };
 
 /* io-wq worker categories */
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 2965b354efc8..efe5170d3e77 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3350,7 +3350,7 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
 			IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED |
 			IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS |
 			IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP |
-			IORING_FEAT_LINKED_FILE;
+			IORING_FEAT_LINKED_FILE | IORING_FEAT_REG_REG_RING;
 
 	if (copy_to_user(params, p, sizeof(*p))) {
 		ret = -EFAULT;
@@ -3857,13 +3857,29 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
 	long ret = -EBADF;
 	struct fd f;
 
-	f = fdget(fd);
-	if (!f.file)
-		return -EBADF;
+	/*
+	 * Ring fd has been registered via IORING_REGISTER_RING_FDS, we
+	 * need only dereference our task private array to find it.
+	 */
+	if (opcode & IORING_REGISTER_USE_REGISTERED_RING) {
+		struct io_uring_task *tctx = current->io_uring;
 
-	ret = -EOPNOTSUPP;
-	if (!io_is_uring_fops(f.file))
-		goto out_fput;
+		if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX))
+			return -EINVAL;
+		fd = array_index_nospec(fd, IO_RINGFD_REG_MAX);
+		f.file = tctx->registered_rings[fd];
+		f.flags = 0;
+		if (unlikely(!f.file))
+			return -EBADF;
+		opcode &= ~IORING_REGISTER_USE_REGISTERED_RING;
+	} else {
+		f = fdget(fd);
+		if (unlikely(!f.file))
+			return -EBADF;
+		ret = -EOPNOTSUPP;
+		if (!io_is_uring_fops(f.file))
+			goto out_fput;
+	}
 
 	ctx = f.file->private_data;
 
-- 
2.37.2

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ