[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241111123650.1857526-1-xue01.he@samsung.com>
Date: Mon, 11 Nov 2024 20:36:50 +0800
From: hexue <xue01.he@...sung.com>
To: axboe@...nel.dk, asml.silence@...il.com
Cc: io-uring@...r.kernel.org, linux-kernel@...r.kernel.org, hexue
<xue01.he@...sung.com>
Subject: [PATCH liburing] test: add test cases for hybrid iopoll
Add a test file for hybrid iopoll to make sure it works safe.Testcass
include basic read/write tests, and run in normal iopoll mode and
passthrough mode respectively.
Signed-off-by: hexue <xue01.he@...sung.com>
---
man/io_uring_setup.2 | 10 +-
src/include/liburing/io_uring.h | 3 +
test/Makefile | 1 +
test/iopoll-hybridpoll.c | 544 ++++++++++++++++++++++++++++++++
4 files changed, 557 insertions(+), 1 deletion(-)
create mode 100644 test/iopoll-hybridpoll.c
diff --git a/man/io_uring_setup.2 b/man/io_uring_setup.2
index 2f87783..8cfafdc 100644
--- a/man/io_uring_setup.2
+++ b/man/io_uring_setup.2
@@ -78,7 +78,15 @@ in question. For NVMe devices, the nvme driver must be loaded with the
parameter set to the desired number of polling queues. The polling queues
will be shared appropriately between the CPUs in the system, if the number
is less than the number of online CPU threads.
-
+.TP
+.B IORING_SETUP_HYBRID_IOPOLL
+This flag must setup with
+.B IORING_SETUP_IOPOLL
+flag. hybrid poll is a new
+feature baed on iopoll, this could be a suboptimal solution when running
+on a single thread, it offers higher performance than IRQ and lower CPU
+utilization than polling. Similarly, this feature also requires the devices
+to support polling configuration.
.TP
.B IORING_SETUP_SQPOLL
When this flag is specified, a kernel thread is created to perform
diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h
index 20bc570..d16364c 100644
--- a/src/include/liburing/io_uring.h
+++ b/src/include/liburing/io_uring.h
@@ -200,6 +200,9 @@ enum io_uring_sqe_flags_bit {
*/
#define IORING_SETUP_NO_SQARRAY (1U << 16)
+/* Use hybrid poll in iopoll process */
+#define IORING_SETUP_HYBRID_IOPOLL (1U << 17)
+
enum io_uring_op {
IORING_OP_NOP,
IORING_OP_READV,
diff --git a/test/Makefile b/test/Makefile
index dfbbcbe..ea9452c 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -116,6 +116,7 @@ test_srcs := \
iopoll.c \
iopoll-leak.c \
iopoll-overflow.c \
+ iopoll-hybridpoll.c \
io_uring_enter.c \
io_uring_passthrough.c \
io_uring_register.c \
diff --git a/test/iopoll-hybridpoll.c b/test/iopoll-hybridpoll.c
new file mode 100644
index 0000000..d7c08ae
--- /dev/null
+++ b/test/iopoll-hybridpoll.c
@@ -0,0 +1,544 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: basic read/write tests with
+ * hybrid polled IO, include iopoll and io_uring
+ * passthrough.
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/resource.h>
+#include "helpers.h"
+#include "liburing.h"
+#include "../src/syscall.h"
+#include "nvme.h"
+
+#define FILE_SIZE (128 * 1024)
+#define BS 4096
+#define BUFFERS (FILE_SIZE / BS)
+
+static struct iovec *vecs;
+static int no_pt, no_iopoll;
+
+static int fill_pattern(int tc)
+{
+ unsigned int val, *ptr;
+ int i, j;
+ int u_in_buf = BS / sizeof(val);
+
+ val = (tc / 2) * FILE_SIZE;
+ for (i = 0; i < BUFFERS; i++) {
+ ptr = vecs[i].iov_base;
+ for (j = 0; j < u_in_buf; j++) {
+ *ptr = val;
+ val++;
+ ptr++;
+ }
+ }
+
+ return 0;
+}
+
+static int verify_buf(int tc, void *buf, off_t off)
+{
+ int i, u_in_buf = BS / sizeof(unsigned int);
+ unsigned int *ptr;
+
+ off /= sizeof(unsigned int);
+ off += (tc / 2) * FILE_SIZE;
+ ptr = buf;
+ for (i = 0; i < u_in_buf; i++) {
+ if (off != *ptr) {
+ fprintf(stderr, "Found %u, wanted %llu\n", *ptr,
+ (unsigned long long) off);
+ return 1;
+ }
+ ptr++;
+ off++;
+ }
+
+ return 0;
+}
+
+static int __test_io_uring_passthrough_io(const char *file, struct io_uring *ring, int tc, int write,
+ int sqthread, int fixed, int nonvec)
+{
+ struct io_uring_sqe *sqe;
+ struct io_uring_cqe *cqe;
+ struct nvme_uring_cmd *cmd;
+ int open_flags;
+ int do_fixed;
+ int i, ret, fd = -1;
+ off_t offset;
+ __u64 slba;
+ __u32 nlb;
+
+ if (write)
+ open_flags = O_WRONLY;
+ else
+ open_flags = O_RDONLY;
+
+ if (fixed) {
+ ret = t_register_buffers(ring, vecs, BUFFERS);
+ if (ret == T_SETUP_SKIP)
+ return 0;
+ if (ret != T_SETUP_OK) {
+ fprintf(stderr, "buffer reg failed: %d\n", ret);
+ goto err;
+ }
+ }
+
+ fd = open(file, open_flags);
+ if (fd < 0) {
+ if (errno == EACCES || errno == EPERM)
+ return T_EXIT_SKIP;
+ perror("file open");
+ goto err;
+ }
+
+ if (sqthread) {
+ ret = io_uring_register_files(ring, &fd, 1);
+ if (ret) {
+ fprintf(stderr, "file reg failed: %d\n", ret);
+ goto err;
+ }
+ }
+
+ if (write)
+ fill_pattern(tc);
+
+ offset = 0;
+ for (i = 0; i < BUFFERS; i++) {
+ sqe = io_uring_get_sqe(ring);
+ if (!sqe) {
+ fprintf(stderr, "sqe get failed\n");
+ goto err;
+ }
+ if (write) {
+ int use_fd = fd;
+
+ do_fixed = fixed;
+
+ if (sqthread)
+ use_fd = 0;
+ if (fixed && (i & 1))
+ do_fixed = 0;
+ if (do_fixed) {
+ io_uring_prep_write_fixed(sqe, use_fd, vecs[i].iov_base,
+ vecs[i].iov_len,
+ offset, i);
+ sqe->cmd_op = NVME_URING_CMD_IO;
+ } else if (nonvec) {
+ io_uring_prep_write(sqe, use_fd, vecs[i].iov_base,
+ vecs[i].iov_len, offset);
+ sqe->cmd_op = NVME_URING_CMD_IO;
+ } else {
+ io_uring_prep_writev(sqe, use_fd, &vecs[i], 1,
+ offset);
+ sqe->cmd_op = NVME_URING_CMD_IO_VEC;
+ }
+ } else {
+ int use_fd = fd;
+
+ do_fixed = fixed;
+
+ if (sqthread)
+ use_fd = 0;
+ if (fixed && (i & 1))
+ do_fixed = 0;
+ if (do_fixed) {
+ io_uring_prep_read_fixed(sqe, use_fd, vecs[i].iov_base,
+ vecs[i].iov_len,
+ offset, i);
+ sqe->cmd_op = NVME_URING_CMD_IO;
+ } else if (nonvec) {
+ io_uring_prep_read(sqe, use_fd, vecs[i].iov_base,
+ vecs[i].iov_len, offset);
+ sqe->cmd_op = NVME_URING_CMD_IO;
+ } else {
+ io_uring_prep_readv(sqe, use_fd, &vecs[i], 1,
+ offset);
+ sqe->cmd_op = NVME_URING_CMD_IO_VEC;
+ }
+ }
+ sqe->opcode = IORING_OP_URING_CMD;
+ if (do_fixed)
+ sqe->uring_cmd_flags |= IORING_URING_CMD_FIXED;
+ sqe->user_data = ((uint64_t)offset << 32) | i;
+ if (sqthread)
+ sqe->flags |= IOSQE_FIXED_FILE;
+
+ cmd = (struct nvme_uring_cmd *)sqe->cmd;
+ memset(cmd, 0, sizeof(struct nvme_uring_cmd));
+
+ cmd->opcode = write ? nvme_cmd_write : nvme_cmd_read;
+
+ slba = offset >> lba_shift;
+ nlb = (BS >> lba_shift) - 1;
+
+ /* cdw10 and cdw11 represent starting lba */
+ cmd->cdw10 = slba & 0xffffffff;
+ cmd->cdw11 = slba >> 32;
+ /* cdw12 represent number of lba's for read/write */
+ cmd->cdw12 = nlb;
+ if (do_fixed || nonvec) {
+ cmd->addr = (__u64)(uintptr_t)vecs[i].iov_base;
+ cmd->data_len = vecs[i].iov_len;
+ } else {
+ cmd->addr = (__u64)(uintptr_t)&vecs[i];
+ cmd->data_len = 1;
+ }
+ cmd->nsid = nsid;
+
+ offset += BS;
+ }
+
+ ret = io_uring_submit(ring);
+ if (ret != BUFFERS) {
+ fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
+ goto err;
+ }
+
+ for (i = 0; i < BUFFERS; i++) {
+ ret = io_uring_wait_cqe(ring, &cqe);
+ if (ret) {
+ fprintf(stderr, "wait_cqe=%d\n", ret);
+ goto err;
+ }
+ if (cqe->res != 0) {
+ if (!no_pt) {
+ no_pt = 1;
+ goto skip;
+ }
+ fprintf(stderr, "cqe res %d, wanted 0\n", cqe->res);
+ goto err;
+ }
+ io_uring_cqe_seen(ring, cqe);
+ if (!write) {
+ int index = cqe->user_data & 0xffffffff;
+ void *buf = vecs[index].iov_base;
+ off_t voff = cqe->user_data >> 32;
+
+ if (verify_buf(tc, buf, voff))
+ goto err;
+ }
+ }
+
+ if (fixed) {
+ ret = io_uring_unregister_buffers(ring);
+ if (ret) {
+ fprintf(stderr, "buffer unreg failed: %d\n", ret);
+ goto err;
+ }
+ }
+ if (sqthread) {
+ ret = io_uring_unregister_files(ring);
+ if (ret) {
+ fprintf(stderr, "file unreg failed: %d\n", ret);
+ goto err;
+ }
+ }
+
+skip:
+ close(fd);
+ return 0;
+err:
+ if (fd != -1)
+ close(fd);
+ return 1;
+}
+
+static int test_io_uring_passthrough(const char *file, int tc, int write, int sqthread,
+ int fixed, int nonvec)
+{
+ struct io_uring ring;
+ int ret, ring_flags = 0;
+
+ ring_flags |= IORING_SETUP_SQE128;
+ ring_flags |= IORING_SETUP_CQE32;
+ ring_flags |= IORING_SETUP_HYBRID_IOPOLL;
+
+ if (sqthread)
+ ring_flags |= IORING_SETUP_SQPOLL;
+
+ ret = t_create_ring(64, &ring, ring_flags);
+ if (ret == T_SETUP_SKIP)
+ return 0;
+ if (ret != T_SETUP_OK) {
+ if (ret == -EINVAL) {
+ no_pt = 1;
+ return T_SETUP_SKIP;
+ }
+ fprintf(stderr, "ring create failed: %d\n", ret);
+ return 1;
+ }
+
+ ret = __test_io_uring_passthrough_io(file, &ring, tc, write, sqthread, fixed, nonvec);
+ io_uring_queue_exit(&ring);
+
+ return ret;
+}
+
+static int provide_buffers(struct io_uring *ring)
+{
+ struct io_uring_sqe *sqe;
+ struct io_uring_cqe *cqe;
+ int ret, i;
+
+ for (i = 0; i < BUFFERS; i++) {
+ sqe = io_uring_get_sqe(ring);
+ io_uring_prep_provide_buffers(sqe, vecs[i].iov_base,
+ vecs[i].iov_len, 1, 1, i);
+ }
+
+ ret = io_uring_submit(ring);
+ if (ret != BUFFERS) {
+ fprintf(stderr, "submit: %d\n", ret);
+ return 1;
+ }
+
+ for (i = 0; i < BUFFERS; i++) {
+ ret = io_uring_wait_cqe(ring, &cqe);
+ if (cqe->res < 0) {
+ fprintf(stderr, "cqe->res=%d\n", cqe->res);
+ return 1;
+ }
+ io_uring_cqe_seen(ring, cqe);
+ }
+
+ return 0;
+}
+
+static int __test_iopoll_io(const char *file, struct io_uring *ring, int write, int sqthread,
+ int fixed, int buf_select)
+{
+ struct io_uring_sqe *sqe;
+ struct io_uring_cqe *cqe;
+ int open_flags;
+ int i, fd = -1, ret;
+ off_t offset;
+
+ if (buf_select) {
+ write = 0;
+ fixed = 0;
+ }
+ if (buf_select && provide_buffers(ring))
+ return 1;
+
+ if (write)
+ open_flags = O_WRONLY;
+ else
+ open_flags = O_RDONLY;
+ open_flags |= O_DIRECT;
+
+ if (fixed) {
+ ret = t_register_buffers(ring, vecs, BUFFERS);
+ if (ret == T_SETUP_SKIP)
+ return 0;
+ if (ret != T_SETUP_OK) {
+ fprintf(stderr, "buffer reg failed: %d\n", ret);
+ goto err;
+ }
+ }
+ fd = open(file, open_flags);
+ if (fd < 0) {
+ if (errno == EINVAL || errno == EPERM || errno == EACCES)
+ return 0;
+ perror("file open");
+ goto err;
+ }
+ if (sqthread) {
+ ret = io_uring_register_files(ring, &fd, 1);
+ if (ret) {
+ fprintf(stderr, "file reg failed: %d\n", ret);
+ goto err;
+ }
+ }
+
+ offset = 0;
+ for (i = 0; i < BUFFERS; i++) {
+ sqe = io_uring_get_sqe(ring);
+ if (!sqe) {
+ fprintf(stderr, "sqe get failed\n");
+ goto err;
+ }
+ offset = BS * (rand() % BUFFERS);
+ if (write) {
+ int do_fixed = fixed;
+ int use_fd = fd;
+
+ if (sqthread)
+ use_fd = 0;
+ if (fixed && (i & 1))
+ do_fixed = 0;
+ if (do_fixed) {
+ io_uring_prep_write_fixed(sqe, use_fd, vecs[i].iov_base,
+ vecs[i].iov_len,
+ offset, i);
+ } else {
+ io_uring_prep_writev(sqe, use_fd, &vecs[i], 1,
+ offset);
+ }
+ } else {
+ int do_fixed = fixed;
+ int use_fd = fd;
+
+ if (sqthread)
+ use_fd = 0;
+ if (fixed && (i & 1))
+ do_fixed = 0;
+ if (do_fixed) {
+ io_uring_prep_read_fixed(sqe, use_fd, vecs[i].iov_base,
+ vecs[i].iov_len,
+ offset, i);
+ } else {
+ io_uring_prep_readv(sqe, use_fd, &vecs[i], 1,
+ offset);
+ }
+
+ }
+ if (sqthread)
+ sqe->flags |= IOSQE_FIXED_FILE;
+ if (buf_select) {
+ sqe->flags |= IOSQE_BUFFER_SELECT;
+ sqe->buf_group = buf_select;
+ sqe->user_data = i;
+ }
+ }
+
+ ret = io_uring_submit(ring);
+ if (ret != BUFFERS) {
+ ret = io_uring_peek_cqe(ring, &cqe);
+ if (!ret && cqe->res == -EOPNOTSUPP) {
+ no_iopoll = 1;
+ io_uring_cqe_seen(ring, cqe);
+ goto out;
+ }
+ fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
+ goto err;
+ }
+
+ for (i = 0; i < BUFFERS; i++) {
+ ret = io_uring_wait_cqe(ring, &cqe);
+ if (ret) {
+ fprintf(stderr, "wait_cqe=%d\n", ret);
+ goto err;
+ } else if (cqe->res == -EOPNOTSUPP) {
+ fprintf(stdout, "File/device/fs doesn't support polled IO\n");
+ no_iopoll = 1;
+ goto out;
+ } else if (cqe->res != BS) {
+ fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, BS);
+ goto err;
+ }
+ io_uring_cqe_seen(ring, cqe);
+ }
+
+ if (fixed) {
+ ret = io_uring_unregister_buffers(ring);
+ if (ret) {
+ fprintf(stderr, "buffer unreg failed: %d\n", ret);
+ goto err;
+ }
+ }
+ if (sqthread) {
+ ret = io_uring_unregister_files(ring);
+ if (ret) {
+ fprintf(stderr, "file unreg failed: %d\n", ret);
+ goto err;
+ }
+ }
+
+out:
+ close(fd);
+ return 0;
+err:
+ if (fd != -1)
+ close(fd);
+ return 1;
+}
+
+static int test_iopoll(const char *fname, int write, int sqthread, int fixed,
+ int buf_select, int defer)
+{
+ struct io_uring ring;
+ int ret, ring_flags = IORING_SETUP_IOPOLL | IORING_SETUP_HYBRID_IOPOLL;
+
+ if (no_iopoll)
+ return 0;
+
+ if (defer)
+ ring_flags |= IORING_SETUP_SINGLE_ISSUER |
+ IORING_SETUP_DEFER_TASKRUN;
+
+ ret = t_create_ring(64, &ring, ring_flags);
+ if (ret == T_SETUP_SKIP)
+ return 0;
+ if (ret != T_SETUP_OK) {
+ fprintf(stderr, "ring create failed: %d\n", ret);
+ return 1;
+ }
+ ret = __test_iopoll_io(fname, &ring, write, sqthread, fixed, buf_select);
+ io_uring_queue_exit(&ring);
+ return ret;
+}
+
+int main(int argc, char *argv[])
+{
+ int i, ret;
+ char buf[256];
+ char *fname;
+
+ if (argc > 1) {
+ fname = argv[1];
+ } else {
+ srand((unsigned)time(NULL));
+ snprintf(buf, sizeof(buf), ".basic-rw-%u-%u",
+ (unsigned)rand(), (unsigned)getpid());
+ fname = buf;
+ t_create_file(fname, FILE_SIZE);
+ }
+
+ vecs = t_create_buffers(BUFFERS, BS);
+
+ for (i = 0; i < 16; i++) {
+ int write = (i & 1) != 0;
+ int sqthread = (i & 2) != 0;
+ int fixed = (i & 4) != 0;
+ int buf_select = (i & 8) != 0;
+ int defer = (i & 16) != 0;
+ int nonvec = buf_select;
+
+ ret = test_iopoll(fname, write, sqthread, fixed, buf_select, defer);
+ if (ret) {
+ fprintf(stderr, "test_iopoll_io failed %d/%d/%d/%d/%d\n",
+ write, sqthread, fixed, buf_select, defer);
+ goto err;
+ }
+ if (no_iopoll)
+ break;
+
+ ret = test_io_uring_passthrough(fname, i, write, sqthread, fixed, nonvec);
+ if (no_pt)
+ break;
+ if (ret) {
+ fprintf(stderr, "test_io_uring_passthrough_io failed %d/%d/%d/%d\n",
+ write, sqthread, fixed, nonvec);
+ goto err;
+ }
+ }
+
+ if (fname != argv[1])
+ unlink(fname);
+ return ret;
+err:
+ if (fname != argv[1])
+ unlink(fname);
+ return T_EXIT_FAIL;
+}
--
2.34.1
Powered by blists - more mailing lists