lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241112104406.2rltxkliwhksn3hw@green245>
Date: Tue, 12 Nov 2024 16:14:06 +0530
From: Anuj Gupta <anuj20.g@...sung.com>
To: hexue <xue01.he@...sung.com>
Cc: axboe@...nel.dk, asml.silence@...il.com, io-uring@...r.kernel.org,
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH liburing] test: add test cases for hybrid iopoll

On 11/11/24 08:36PM, hexue wrote:
>Add a test file for hybrid iopoll to make sure it works safe.Testcass
>include basic read/write tests, and run in normal iopoll mode and
>passthrough mode respectively.
>
>Signed-off-by: hexue <xue01.he@...sung.com>
>---
> man/io_uring_setup.2            |  10 +-
> src/include/liburing/io_uring.h |   3 +
> test/Makefile                   |   1 +
i> test/iopoll-hybridpoll.c        | 544 ++++++++++++++++++++++++++++++++
> 4 files changed, 557 insertions(+), 1 deletion(-)
> create mode 100644 test/iopoll-hybridpoll.c
>
>diff --git a/man/io_uring_setup.2 b/man/io_uring_setup.2
>index 2f87783..8cfafdc 100644
>--- a/man/io_uring_setup.2
>+++ b/man/io_uring_setup.2
>@@ -78,7 +78,15 @@ in question. For NVMe devices, the nvme driver must be loaded with the
> parameter set to the desired number of polling queues. The polling queues
> will be shared appropriately between the CPUs in the system, if the number
> is less than the number of online CPU threads.
>-
>+.TP
>+.B IORING_SETUP_HYBRID_IOPOLL
>+This flag must setup with
s/<must setup with>/<must be used with>

>+.B IORING_SETUP_IOPOLL
>+flag. hybrid poll is a new
s/hybrid/Hybrid
"new" is probably not required here

>+feature baed on iopoll, this could be a suboptimal solution when running
s/baed/based
s/<this could>/<which might>

>+on a single thread, it offers higher performance than IRQ and lower CPU
s/<, it>/<. It>

>+utilization than polling. Similarly, this feature also requires the devices
>+to support polling configuration.
This feature would work if a device doesn't have polled queues,right?
The performance might be suboptimal in that case, but the userspace won't
get any errors.

> .TP
> .B IORING_SETUP_SQPOLL
> When this flag is specified, a kernel thread is created to perform
>diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h
>index 20bc570..d16364c 100644
>--- a/src/include/liburing/io_uring.h
>+++ b/src/include/liburing/io_uring.h
>@@ -200,6 +200,9 @@ enum io_uring_sqe_flags_bit {
>  */
> #define IORING_SETUP_NO_SQARRAY		(1U << 16)
>
>+/* Use hybrid poll in iopoll process */
>+#define IORING_SETUP_HYBRID_IOPOLL      (1U << 17)
>+
> enum io_uring_op {
> 	IORING_OP_NOP,
> 	IORING_OP_READV,
>diff --git a/test/Makefile b/test/Makefile
>index dfbbcbe..ea9452c 100644
>--- a/test/Makefile
>+++ b/test/Makefile
>@@ -116,6 +116,7 @@ test_srcs := \
> 	iopoll.c \
> 	iopoll-leak.c \
> 	iopoll-overflow.c \
>+	iopoll-hybridpoll.c \
> 	io_uring_enter.c \
> 	io_uring_passthrough.c \
> 	io_uring_register.c \
>diff --git a/test/iopoll-hybridpoll.c b/test/iopoll-hybridpoll.c
>new file mode 100644
>index 0000000..d7c08ae
>--- /dev/null
>+++ b/test/iopoll-hybridpoll.c
>@@ -0,0 +1,544 @@
>+/* SPDX-License-Identifier: MIT */
>+/*
>+ * Description: basic read/write tests with
>+ * hybrid polled IO, include iopoll and io_uring
>+ * passthrough.
>+ */
>+#include <errno.h>
>+#include <stdio.h>
>+#include <unistd.h>
>+#include <stdlib.h>
>+#include <string.h>
>+#include <fcntl.h>
>+#include <sys/types.h>
>+#include <poll.h>
>+#include <sys/eventfd.h>
>+#include <sys/resource.h>
>+#include "helpers.h"
>+#include "liburing.h"
>+#include "../src/syscall.h"
>+#include "nvme.h"
>+
>+#define FILE_SIZE	(128 * 1024)
>+#define BS		4096
>+#define BUFFERS		(FILE_SIZE / BS)
>+
>+static struct iovec *vecs;
>+static int no_pt, no_iopoll;
>+
>+static int fill_pattern(int tc)
>+{
>+	unsigned int val, *ptr;
>+	int i, j;
>+	int u_in_buf = BS / sizeof(val);
>+
>+	val = (tc / 2) * FILE_SIZE;
>+	for (i = 0; i < BUFFERS; i++) {
>+		ptr = vecs[i].iov_base;
>+		for (j = 0; j < u_in_buf; j++) {
>+			*ptr = val;
>+			val++;
>+			ptr++;
>+		}
>+	}
>+
>+	return 0;
>+}
>+
>+static int verify_buf(int tc, void *buf, off_t off)
>+{
>+	int i, u_in_buf = BS / sizeof(unsigned int);
>+	unsigned int *ptr;
>+
>+	off /= sizeof(unsigned int);
>+	off += (tc / 2) * FILE_SIZE;
>+	ptr = buf;
>+	for (i = 0; i < u_in_buf; i++) {
>+		if (off != *ptr) {
>+			fprintf(stderr, "Found %u, wanted %llu\n", *ptr,
>+					(unsigned long long) off);
>+			return 1;
>+		}
>+		ptr++;
>+		off++;
>+	}
>+
>+	return 0;
>+}
>+
>+static int __test_io_uring_passthrough_io(const char *file, struct io_uring *ring, int tc, int write,
>+		     int sqthread, int fixed, int nonvec)
>+{
>+	struct io_uring_sqe *sqe;
>+	struct io_uring_cqe *cqe;
>+	struct nvme_uring_cmd *cmd;
>+	int open_flags;
>+	int do_fixed;
>+	int i, ret, fd = -1;
>+	off_t offset;
>+	__u64 slba;
>+	__u32 nlb;
>+
>+	if (write)
>+		open_flags = O_WRONLY;
>+	else
>+		open_flags = O_RDONLY;
>+
>+	if (fixed) {
>+		ret = t_register_buffers(ring, vecs, BUFFERS);
>+		if (ret == T_SETUP_SKIP)
>+			return 0;
>+		if (ret != T_SETUP_OK) {
>+			fprintf(stderr, "buffer reg failed: %d\n", ret);
>+			goto err;
>+		}
>+	}
>+
>+	fd = open(file, open_flags);
>+	if (fd < 0) {
>+		if (errno == EACCES || errno == EPERM)
>+			return T_EXIT_SKIP;
>+		perror("file open");
>+		goto err;
>+	}
>+
>+	if (sqthread) {
>+		ret = io_uring_register_files(ring, &fd, 1);
>+		if (ret) {
>+			fprintf(stderr, "file reg failed: %d\n", ret);
>+			goto err;
>+		}
>+	}
>+
>+	if (write)
>+		fill_pattern(tc);
>+
>+	offset = 0;
>+	for (i = 0; i < BUFFERS; i++) {
>+		sqe = io_uring_get_sqe(ring);
>+		if (!sqe) {
>+			fprintf(stderr, "sqe get failed\n");
>+			goto err;
>+		}
>+		if (write) {
>+			int use_fd = fd;
>+
>+			do_fixed = fixed;
>+
>+			if (sqthread)
>+				use_fd = 0;
>+			if (fixed && (i & 1))
>+				do_fixed = 0;
>+			if (do_fixed) {
>+				io_uring_prep_write_fixed(sqe, use_fd, vecs[i].iov_base,
>+								vecs[i].iov_len,
>+								offset, i);
>+				sqe->cmd_op = NVME_URING_CMD_IO;
>+			} else if (nonvec) {
>+				io_uring_prep_write(sqe, use_fd, vecs[i].iov_base,
>+							vecs[i].iov_len, offset);
>+				sqe->cmd_op = NVME_URING_CMD_IO;
>+			} else {
>+				io_uring_prep_writev(sqe, use_fd, &vecs[i], 1,
>+								offset);
>+				sqe->cmd_op = NVME_URING_CMD_IO_VEC;
>+			}	
>+		} else {
>+			int use_fd = fd;
>+
>+			do_fixed = fixed;
>+
>+			if (sqthread)
>+				use_fd = 0;
>+			if (fixed && (i & 1))
>+				do_fixed = 0;
>+			if (do_fixed) {
>+				io_uring_prep_read_fixed(sqe, use_fd, vecs[i].iov_base,
>+								vecs[i].iov_len,
>+								offset, i);
>+				sqe->cmd_op = NVME_URING_CMD_IO;
>+			} else if (nonvec) {
>+				io_uring_prep_read(sqe, use_fd, vecs[i].iov_base,
>+							vecs[i].iov_len, offset);
>+				sqe->cmd_op = NVME_URING_CMD_IO;
>+			} else {
>+				io_uring_prep_readv(sqe, use_fd, &vecs[i], 1,
>+								offset);
>+				sqe->cmd_op = NVME_URING_CMD_IO_VEC;
>+			}
>+		}
>+		sqe->opcode = IORING_OP_URING_CMD;
>+		if (do_fixed)
>+			sqe->uring_cmd_flags |= IORING_URING_CMD_FIXED;
>+		sqe->user_data = ((uint64_t)offset << 32) | i;
>+		if (sqthread)
>+			sqe->flags |= IOSQE_FIXED_FILE;
>+
>+		cmd = (struct nvme_uring_cmd *)sqe->cmd;
>+		memset(cmd, 0, sizeof(struct nvme_uring_cmd));
>+
>+		cmd->opcode = write ? nvme_cmd_write : nvme_cmd_read;
>+
>+		slba = offset >> lba_shift;
>+		nlb = (BS >> lba_shift) - 1;
>+
>+		/* cdw10 and cdw11 represent starting lba */
>+		cmd->cdw10 = slba & 0xffffffff;
>+		cmd->cdw11 = slba >> 32;
>+		/* cdw12 represent number of lba's for read/write */
>+		cmd->cdw12 = nlb;
>+		if (do_fixed || nonvec) {
>+			cmd->addr = (__u64)(uintptr_t)vecs[i].iov_base;
>+			cmd->data_len = vecs[i].iov_len;
>+		} else {
>+			cmd->addr = (__u64)(uintptr_t)&vecs[i];
>+			cmd->data_len = 1;
>+		}
>+		cmd->nsid = nsid;
>+
>+		offset += BS;
>+	}
>+
>+	ret = io_uring_submit(ring);
>+	if (ret != BUFFERS) {
>+		fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
>+		goto err;
>+	}
>+
>+	for (i = 0; i < BUFFERS; i++) {
>+		ret = io_uring_wait_cqe(ring, &cqe);
>+		if (ret) {
>+			fprintf(stderr, "wait_cqe=%d\n", ret);
>+			goto err;
>+		}
>+		if (cqe->res != 0) {
>+			if (!no_pt) {
>+				no_pt = 1;
>+				goto skip;
>+			}
>+			fprintf(stderr, "cqe res %d, wanted 0\n", cqe->res);
>+			goto err;
>+		}
>+		io_uring_cqe_seen(ring, cqe);
>+		if (!write) {
>+			int index = cqe->user_data & 0xffffffff;
>+			void *buf = vecs[index].iov_base;
>+			off_t voff = cqe->user_data >> 32;
>+
>+			if (verify_buf(tc, buf, voff))
>+				goto err;
>+		}
>+	}
>+
>+	if (fixed) {
>+		ret = io_uring_unregister_buffers(ring);
>+		if (ret) {
>+			fprintf(stderr, "buffer unreg failed: %d\n", ret);
>+			goto err;
>+		}
>+	}
>+	if (sqthread) {
>+		ret = io_uring_unregister_files(ring);
>+		if (ret) {
>+			fprintf(stderr, "file unreg failed: %d\n", ret);
>+			goto err;
>+		}
>+	}
>+
>+skip:
>+	close(fd);
>+	return 0;
>+err:
>+	if (fd != -1)
>+		close(fd);
>+	return 1;
>+}
>+
>+static int test_io_uring_passthrough(const char *file, int tc, int write, int sqthread,
>+		   int fixed, int nonvec)
>+{
>+	struct io_uring ring;
>+	int ret, ring_flags = 0;
>+
>+	ring_flags |= IORING_SETUP_SQE128;
>+	ring_flags |= IORING_SETUP_CQE32;
>+	ring_flags |= IORING_SETUP_HYBRID_IOPOLL;
>+
>+	if (sqthread)
>+		ring_flags |= IORING_SETUP_SQPOLL;
>+
>+	ret = t_create_ring(64, &ring, ring_flags);
>+	if (ret == T_SETUP_SKIP)
>+		return 0;
>+	if (ret != T_SETUP_OK) {
>+		if (ret == -EINVAL) {
>+			no_pt = 1;
>+			return T_SETUP_SKIP;
>+		}
>+		fprintf(stderr, "ring create failed: %d\n", ret);
>+		return 1;
>+	}
>+
>+	ret = __test_io_uring_passthrough_io(file, &ring, tc, write, sqthread, fixed, nonvec);
>+	io_uring_queue_exit(&ring);
>+
>+	return ret;
>+}
>+
>+static int provide_buffers(struct io_uring *ring)
>+{
>+	struct io_uring_sqe *sqe;
>+	struct io_uring_cqe *cqe;
>+	int ret, i;
>+
>+	for (i = 0; i < BUFFERS; i++) {
>+		sqe = io_uring_get_sqe(ring);
>+		io_uring_prep_provide_buffers(sqe, vecs[i].iov_base,
>+						vecs[i].iov_len, 1, 1, i);
>+	}
>+
>+	ret = io_uring_submit(ring);
>+	if (ret != BUFFERS) {
>+		fprintf(stderr, "submit: %d\n", ret);
>+		return 1;
>+	}
>+
>+	for (i = 0; i < BUFFERS; i++) {
>+		ret = io_uring_wait_cqe(ring, &cqe);
>+		if (cqe->res < 0) {
>+			fprintf(stderr, "cqe->res=%d\n", cqe->res);
>+			return 1;
>+		}
>+		io_uring_cqe_seen(ring, cqe);
>+	}
>+
>+	return 0;
>+}
>+
>+static int __test_iopoll_io(const char *file, struct io_uring *ring, int write, int sqthread,
>+		     int fixed, int buf_select)
>+{
>+	struct io_uring_sqe *sqe;
>+	struct io_uring_cqe *cqe;
>+	int open_flags;
>+	int i, fd = -1, ret;
>+	off_t offset;
>+
>+	if (buf_select) {
>+		write = 0;
>+		fixed = 0;
>+	}
>+	if (buf_select && provide_buffers(ring))
>+		return 1;
>+
>+	if (write)
>+		open_flags = O_WRONLY;
>+	else
>+		open_flags = O_RDONLY;
>+	open_flags |= O_DIRECT;
>+
>+	if (fixed) {
>+		ret = t_register_buffers(ring, vecs, BUFFERS);
>+		if (ret == T_SETUP_SKIP)
>+			return 0;
>+		if (ret != T_SETUP_OK) {
>+			fprintf(stderr, "buffer reg failed: %d\n", ret);
>+			goto err;
>+		}
>+	}
>+	fd = open(file, open_flags);
>+	if (fd < 0) {
>+		if (errno == EINVAL || errno == EPERM || errno == EACCES)
>+			return 0;
>+		perror("file open");
>+		goto err;
>+	}
>+	if (sqthread) {
>+		ret = io_uring_register_files(ring, &fd, 1);
>+		if (ret) {
>+			fprintf(stderr, "file reg failed: %d\n", ret);
>+			goto err;
>+		}
>+	}
>+
>+	offset = 0;
>+	for (i = 0; i < BUFFERS; i++) {
>+		sqe = io_uring_get_sqe(ring);
>+		if (!sqe) {
>+			fprintf(stderr, "sqe get failed\n");
>+			goto err;
>+		}
>+		offset = BS * (rand() % BUFFERS);
>+		if (write) {
>+			int do_fixed = fixed;
>+			int use_fd = fd;
>+
>+			if (sqthread)
>+				use_fd = 0;
>+			if (fixed && (i & 1))
>+				do_fixed = 0;
>+			if (do_fixed) {
>+				io_uring_prep_write_fixed(sqe, use_fd, vecs[i].iov_base,
>+								vecs[i].iov_len,
>+								offset, i);
>+			} else {
>+				io_uring_prep_writev(sqe, use_fd, &vecs[i], 1,
>+								offset);
>+			}
>+		} else {
>+			int do_fixed = fixed;
>+			int use_fd = fd;
>+
>+			if (sqthread)
>+				use_fd = 0;
>+			if (fixed && (i & 1))
>+				do_fixed = 0;
>+			if (do_fixed) {
>+				io_uring_prep_read_fixed(sqe, use_fd, vecs[i].iov_base,
>+								vecs[i].iov_len,
>+								offset, i);
>+			} else {
>+				io_uring_prep_readv(sqe, use_fd, &vecs[i], 1,
>+								offset);
>+			}
>+
>+		}
>+		if (sqthread)
>+			sqe->flags |= IOSQE_FIXED_FILE;
>+		if (buf_select) {
>+			sqe->flags |= IOSQE_BUFFER_SELECT;
>+			sqe->buf_group = buf_select;
>+			sqe->user_data = i;
>+		}
>+	}
>+
>+	ret = io_uring_submit(ring);
>+	if (ret != BUFFERS) {
>+		ret = io_uring_peek_cqe(ring, &cqe);
>+		if (!ret && cqe->res == -EOPNOTSUPP) {
>+			no_iopoll = 1;
>+			io_uring_cqe_seen(ring, cqe);
>+			goto out;
>+		}
>+		fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
>+		goto err;
>+	}
>+
>+	for (i = 0; i < BUFFERS; i++) {
>+		ret = io_uring_wait_cqe(ring, &cqe);
>+		if (ret) {
>+			fprintf(stderr, "wait_cqe=%d\n", ret);
>+			goto err;
>+		} else if (cqe->res == -EOPNOTSUPP) {
>+			fprintf(stdout, "File/device/fs doesn't support polled IO\n");
>+			no_iopoll = 1;
>+			goto out;
>+		} else if (cqe->res != BS) {
>+			fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, BS);
>+			goto err;
>+		}
>+		io_uring_cqe_seen(ring, cqe);
>+	}
>+
>+	if (fixed) {
>+		ret = io_uring_unregister_buffers(ring);
>+		if (ret) {
>+			fprintf(stderr, "buffer unreg failed: %d\n", ret);
>+			goto err;
>+		}
>+	}
>+	if (sqthread) {
>+		ret = io_uring_unregister_files(ring);
>+		if (ret) {
>+			fprintf(stderr, "file unreg failed: %d\n", ret);
>+			goto err;
>+		}
>+	}
>+
>+out:
>+	close(fd);
>+	return 0;
>+err:
>+	if (fd != -1)
>+		close(fd);
>+	return 1;
>+}
>+
>+static int test_iopoll(const char *fname, int write, int sqthread, int fixed,
>+		   int buf_select, int defer)
>+{
>+	struct io_uring ring;
>+	int ret, ring_flags = IORING_SETUP_IOPOLL | IORING_SETUP_HYBRID_IOPOLL;
>+
>+	if (no_iopoll)
>+		return 0;
>+
>+	if (defer)
>+		ring_flags |= IORING_SETUP_SINGLE_ISSUER |
>+			      IORING_SETUP_DEFER_TASKRUN;
>+
>+	ret = t_create_ring(64, &ring, ring_flags);
>+	if (ret == T_SETUP_SKIP)
>+		return 0;
>+	if (ret != T_SETUP_OK) {
>+		fprintf(stderr, "ring create failed: %d\n", ret);
>+		return 1;
>+	}
>+	ret = __test_iopoll_io(fname, &ring, write, sqthread, fixed, buf_select);
>+	io_uring_queue_exit(&ring);
>+	return ret;
>+}
>+
>+int main(int argc, char *argv[])
>+{
>+	int i, ret;
>+	char buf[256];
>+	char *fname;
>+
>+	if (argc > 1) {
>+		fname = argv[1];
>+	} else {
>+		srand((unsigned)time(NULL));
>+		snprintf(buf, sizeof(buf), ".basic-rw-%u-%u",
>+			(unsigned)rand(), (unsigned)getpid());
>+		fname = buf;
>+		t_create_file(fname, FILE_SIZE);
>+	}
>+
>+	vecs = t_create_buffers(BUFFERS, BS);
>+
>+	for (i = 0; i < 16; i++) {
>+		int write = (i & 1) != 0;
>+		int sqthread = (i & 2) != 0;
>+		int fixed = (i & 4) != 0;
>+		int buf_select = (i & 8) != 0;
>+		int defer = (i & 16) != 0;
>+		int nonvec = buf_select;
>+
>+		ret = test_iopoll(fname, write, sqthread, fixed, buf_select, defer);
>+		if (ret) {
>+			fprintf(stderr, "test_iopoll_io failed %d/%d/%d/%d/%d\n",
>+				write, sqthread, fixed, buf_select, defer);
>+			goto err;
>+		}
>+		if (no_iopoll)
>+			break;
>+
>+		ret = test_io_uring_passthrough(fname, i, write, sqthread, fixed, nonvec);
>+		if (no_pt)
>+			break;
>+		if (ret) {
>+			fprintf(stderr, "test_io_uring_passthrough_io failed %d/%d/%d/%d\n",
>+				write, sqthread, fixed, nonvec);
>+			goto err;
>+		}
>+	}
>+
>+	if (fname != argv[1])
>+		unlink(fname);
>+	return ret;
>+err:
>+	if (fname != argv[1])
>+		unlink(fname);
>+	return T_EXIT_FAIL;
>+}

This patch mostly looks fine. But the code here seems to be largely
duplicated from "test/io_uring_passthrough.c" and "test/iopoll.c".
Can we consider adding the hybrid poll test as a part of the existing
tests as it seems that it would only require passing a extra flag
during ring setup.

>-- 
>2.34.1
>


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ