[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241112104406.2rltxkliwhksn3hw@green245>
Date: Tue, 12 Nov 2024 16:14:06 +0530
From: Anuj Gupta <anuj20.g@...sung.com>
To: hexue <xue01.he@...sung.com>
Cc: axboe@...nel.dk, asml.silence@...il.com, io-uring@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH liburing] test: add test cases for hybrid iopoll
On 11/11/24 08:36PM, hexue wrote:
>Add a test file for hybrid iopoll to make sure it works safe.Testcass
>include basic read/write tests, and run in normal iopoll mode and
>passthrough mode respectively.
>
>Signed-off-by: hexue <xue01.he@...sung.com>
>---
> man/io_uring_setup.2 | 10 +-
> src/include/liburing/io_uring.h | 3 +
> test/Makefile | 1 +
i> test/iopoll-hybridpoll.c | 544 ++++++++++++++++++++++++++++++++
> 4 files changed, 557 insertions(+), 1 deletion(-)
> create mode 100644 test/iopoll-hybridpoll.c
>
>diff --git a/man/io_uring_setup.2 b/man/io_uring_setup.2
>index 2f87783..8cfafdc 100644
>--- a/man/io_uring_setup.2
>+++ b/man/io_uring_setup.2
>@@ -78,7 +78,15 @@ in question. For NVMe devices, the nvme driver must be loaded with the
> parameter set to the desired number of polling queues. The polling queues
> will be shared appropriately between the CPUs in the system, if the number
> is less than the number of online CPU threads.
>-
>+.TP
>+.B IORING_SETUP_HYBRID_IOPOLL
>+This flag must setup with
s/<must setup with>/<must be used with>
>+.B IORING_SETUP_IOPOLL
>+flag. hybrid poll is a new
s/hybrid/Hybrid
"new" is probably not required here
>+feature baed on iopoll, this could be a suboptimal solution when running
s/baed/based
s/<this could>/<which might>
>+on a single thread, it offers higher performance than IRQ and lower CPU
s/<, it>/<. It>
>+utilization than polling. Similarly, this feature also requires the devices
>+to support polling configuration.
This feature would work if a device doesn't have polled queues,right?
The performance might be suboptimal in that case, but the userspace won't
get any errors.
> .TP
> .B IORING_SETUP_SQPOLL
> When this flag is specified, a kernel thread is created to perform
>diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h
>index 20bc570..d16364c 100644
>--- a/src/include/liburing/io_uring.h
>+++ b/src/include/liburing/io_uring.h
>@@ -200,6 +200,9 @@ enum io_uring_sqe_flags_bit {
> */
> #define IORING_SETUP_NO_SQARRAY (1U << 16)
>
>+/* Use hybrid poll in iopoll process */
>+#define IORING_SETUP_HYBRID_IOPOLL (1U << 17)
>+
> enum io_uring_op {
> IORING_OP_NOP,
> IORING_OP_READV,
>diff --git a/test/Makefile b/test/Makefile
>index dfbbcbe..ea9452c 100644
>--- a/test/Makefile
>+++ b/test/Makefile
>@@ -116,6 +116,7 @@ test_srcs := \
> iopoll.c \
> iopoll-leak.c \
> iopoll-overflow.c \
>+ iopoll-hybridpoll.c \
> io_uring_enter.c \
> io_uring_passthrough.c \
> io_uring_register.c \
>diff --git a/test/iopoll-hybridpoll.c b/test/iopoll-hybridpoll.c
>new file mode 100644
>index 0000000..d7c08ae
>--- /dev/null
>+++ b/test/iopoll-hybridpoll.c
>@@ -0,0 +1,544 @@
>+/* SPDX-License-Identifier: MIT */
>+/*
>+ * Description: basic read/write tests with
>+ * hybrid polled IO, include iopoll and io_uring
>+ * passthrough.
>+ */
>+#include <errno.h>
>+#include <stdio.h>
>+#include <unistd.h>
>+#include <stdlib.h>
>+#include <string.h>
>+#include <fcntl.h>
>+#include <sys/types.h>
>+#include <poll.h>
>+#include <sys/eventfd.h>
>+#include <sys/resource.h>
>+#include "helpers.h"
>+#include "liburing.h"
>+#include "../src/syscall.h"
>+#include "nvme.h"
>+
>+#define FILE_SIZE (128 * 1024)
>+#define BS 4096
>+#define BUFFERS (FILE_SIZE / BS)
>+
>+static struct iovec *vecs;
>+static int no_pt, no_iopoll;
>+
>+static int fill_pattern(int tc)
>+{
>+ unsigned int val, *ptr;
>+ int i, j;
>+ int u_in_buf = BS / sizeof(val);
>+
>+ val = (tc / 2) * FILE_SIZE;
>+ for (i = 0; i < BUFFERS; i++) {
>+ ptr = vecs[i].iov_base;
>+ for (j = 0; j < u_in_buf; j++) {
>+ *ptr = val;
>+ val++;
>+ ptr++;
>+ }
>+ }
>+
>+ return 0;
>+}
>+
>+static int verify_buf(int tc, void *buf, off_t off)
>+{
>+ int i, u_in_buf = BS / sizeof(unsigned int);
>+ unsigned int *ptr;
>+
>+ off /= sizeof(unsigned int);
>+ off += (tc / 2) * FILE_SIZE;
>+ ptr = buf;
>+ for (i = 0; i < u_in_buf; i++) {
>+ if (off != *ptr) {
>+ fprintf(stderr, "Found %u, wanted %llu\n", *ptr,
>+ (unsigned long long) off);
>+ return 1;
>+ }
>+ ptr++;
>+ off++;
>+ }
>+
>+ return 0;
>+}
>+
>+static int __test_io_uring_passthrough_io(const char *file, struct io_uring *ring, int tc, int write,
>+ int sqthread, int fixed, int nonvec)
>+{
>+ struct io_uring_sqe *sqe;
>+ struct io_uring_cqe *cqe;
>+ struct nvme_uring_cmd *cmd;
>+ int open_flags;
>+ int do_fixed;
>+ int i, ret, fd = -1;
>+ off_t offset;
>+ __u64 slba;
>+ __u32 nlb;
>+
>+ if (write)
>+ open_flags = O_WRONLY;
>+ else
>+ open_flags = O_RDONLY;
>+
>+ if (fixed) {
>+ ret = t_register_buffers(ring, vecs, BUFFERS);
>+ if (ret == T_SETUP_SKIP)
>+ return 0;
>+ if (ret != T_SETUP_OK) {
>+ fprintf(stderr, "buffer reg failed: %d\n", ret);
>+ goto err;
>+ }
>+ }
>+
>+ fd = open(file, open_flags);
>+ if (fd < 0) {
>+ if (errno == EACCES || errno == EPERM)
>+ return T_EXIT_SKIP;
>+ perror("file open");
>+ goto err;
>+ }
>+
>+ if (sqthread) {
>+ ret = io_uring_register_files(ring, &fd, 1);
>+ if (ret) {
>+ fprintf(stderr, "file reg failed: %d\n", ret);
>+ goto err;
>+ }
>+ }
>+
>+ if (write)
>+ fill_pattern(tc);
>+
>+ offset = 0;
>+ for (i = 0; i < BUFFERS; i++) {
>+ sqe = io_uring_get_sqe(ring);
>+ if (!sqe) {
>+ fprintf(stderr, "sqe get failed\n");
>+ goto err;
>+ }
>+ if (write) {
>+ int use_fd = fd;
>+
>+ do_fixed = fixed;
>+
>+ if (sqthread)
>+ use_fd = 0;
>+ if (fixed && (i & 1))
>+ do_fixed = 0;
>+ if (do_fixed) {
>+ io_uring_prep_write_fixed(sqe, use_fd, vecs[i].iov_base,
>+ vecs[i].iov_len,
>+ offset, i);
>+ sqe->cmd_op = NVME_URING_CMD_IO;
>+ } else if (nonvec) {
>+ io_uring_prep_write(sqe, use_fd, vecs[i].iov_base,
>+ vecs[i].iov_len, offset);
>+ sqe->cmd_op = NVME_URING_CMD_IO;
>+ } else {
>+ io_uring_prep_writev(sqe, use_fd, &vecs[i], 1,
>+ offset);
>+ sqe->cmd_op = NVME_URING_CMD_IO_VEC;
>+ }
>+ } else {
>+ int use_fd = fd;
>+
>+ do_fixed = fixed;
>+
>+ if (sqthread)
>+ use_fd = 0;
>+ if (fixed && (i & 1))
>+ do_fixed = 0;
>+ if (do_fixed) {
>+ io_uring_prep_read_fixed(sqe, use_fd, vecs[i].iov_base,
>+ vecs[i].iov_len,
>+ offset, i);
>+ sqe->cmd_op = NVME_URING_CMD_IO;
>+ } else if (nonvec) {
>+ io_uring_prep_read(sqe, use_fd, vecs[i].iov_base,
>+ vecs[i].iov_len, offset);
>+ sqe->cmd_op = NVME_URING_CMD_IO;
>+ } else {
>+ io_uring_prep_readv(sqe, use_fd, &vecs[i], 1,
>+ offset);
>+ sqe->cmd_op = NVME_URING_CMD_IO_VEC;
>+ }
>+ }
>+ sqe->opcode = IORING_OP_URING_CMD;
>+ if (do_fixed)
>+ sqe->uring_cmd_flags |= IORING_URING_CMD_FIXED;
>+ sqe->user_data = ((uint64_t)offset << 32) | i;
>+ if (sqthread)
>+ sqe->flags |= IOSQE_FIXED_FILE;
>+
>+ cmd = (struct nvme_uring_cmd *)sqe->cmd;
>+ memset(cmd, 0, sizeof(struct nvme_uring_cmd));
>+
>+ cmd->opcode = write ? nvme_cmd_write : nvme_cmd_read;
>+
>+ slba = offset >> lba_shift;
>+ nlb = (BS >> lba_shift) - 1;
>+
>+ /* cdw10 and cdw11 represent starting lba */
>+ cmd->cdw10 = slba & 0xffffffff;
>+ cmd->cdw11 = slba >> 32;
>+ /* cdw12 represent number of lba's for read/write */
>+ cmd->cdw12 = nlb;
>+ if (do_fixed || nonvec) {
>+ cmd->addr = (__u64)(uintptr_t)vecs[i].iov_base;
>+ cmd->data_len = vecs[i].iov_len;
>+ } else {
>+ cmd->addr = (__u64)(uintptr_t)&vecs[i];
>+ cmd->data_len = 1;
>+ }
>+ cmd->nsid = nsid;
>+
>+ offset += BS;
>+ }
>+
>+ ret = io_uring_submit(ring);
>+ if (ret != BUFFERS) {
>+ fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
>+ goto err;
>+ }
>+
>+ for (i = 0; i < BUFFERS; i++) {
>+ ret = io_uring_wait_cqe(ring, &cqe);
>+ if (ret) {
>+ fprintf(stderr, "wait_cqe=%d\n", ret);
>+ goto err;
>+ }
>+ if (cqe->res != 0) {
>+ if (!no_pt) {
>+ no_pt = 1;
>+ goto skip;
>+ }
>+ fprintf(stderr, "cqe res %d, wanted 0\n", cqe->res);
>+ goto err;
>+ }
>+ io_uring_cqe_seen(ring, cqe);
>+ if (!write) {
>+ int index = cqe->user_data & 0xffffffff;
>+ void *buf = vecs[index].iov_base;
>+ off_t voff = cqe->user_data >> 32;
>+
>+ if (verify_buf(tc, buf, voff))
>+ goto err;
>+ }
>+ }
>+
>+ if (fixed) {
>+ ret = io_uring_unregister_buffers(ring);
>+ if (ret) {
>+ fprintf(stderr, "buffer unreg failed: %d\n", ret);
>+ goto err;
>+ }
>+ }
>+ if (sqthread) {
>+ ret = io_uring_unregister_files(ring);
>+ if (ret) {
>+ fprintf(stderr, "file unreg failed: %d\n", ret);
>+ goto err;
>+ }
>+ }
>+
>+skip:
>+ close(fd);
>+ return 0;
>+err:
>+ if (fd != -1)
>+ close(fd);
>+ return 1;
>+}
>+
>+static int test_io_uring_passthrough(const char *file, int tc, int write, int sqthread,
>+ int fixed, int nonvec)
>+{
>+ struct io_uring ring;
>+ int ret, ring_flags = 0;
>+
>+ ring_flags |= IORING_SETUP_SQE128;
>+ ring_flags |= IORING_SETUP_CQE32;
>+ ring_flags |= IORING_SETUP_HYBRID_IOPOLL;
>+
>+ if (sqthread)
>+ ring_flags |= IORING_SETUP_SQPOLL;
>+
>+ ret = t_create_ring(64, &ring, ring_flags);
>+ if (ret == T_SETUP_SKIP)
>+ return 0;
>+ if (ret != T_SETUP_OK) {
>+ if (ret == -EINVAL) {
>+ no_pt = 1;
>+ return T_SETUP_SKIP;
>+ }
>+ fprintf(stderr, "ring create failed: %d\n", ret);
>+ return 1;
>+ }
>+
>+ ret = __test_io_uring_passthrough_io(file, &ring, tc, write, sqthread, fixed, nonvec);
>+ io_uring_queue_exit(&ring);
>+
>+ return ret;
>+}
>+
>+static int provide_buffers(struct io_uring *ring)
>+{
>+ struct io_uring_sqe *sqe;
>+ struct io_uring_cqe *cqe;
>+ int ret, i;
>+
>+ for (i = 0; i < BUFFERS; i++) {
>+ sqe = io_uring_get_sqe(ring);
>+ io_uring_prep_provide_buffers(sqe, vecs[i].iov_base,
>+ vecs[i].iov_len, 1, 1, i);
>+ }
>+
>+ ret = io_uring_submit(ring);
>+ if (ret != BUFFERS) {
>+ fprintf(stderr, "submit: %d\n", ret);
>+ return 1;
>+ }
>+
>+ for (i = 0; i < BUFFERS; i++) {
>+ ret = io_uring_wait_cqe(ring, &cqe);
>+ if (cqe->res < 0) {
>+ fprintf(stderr, "cqe->res=%d\n", cqe->res);
>+ return 1;
>+ }
>+ io_uring_cqe_seen(ring, cqe);
>+ }
>+
>+ return 0;
>+}
>+
>+static int __test_iopoll_io(const char *file, struct io_uring *ring, int write, int sqthread,
>+ int fixed, int buf_select)
>+{
>+ struct io_uring_sqe *sqe;
>+ struct io_uring_cqe *cqe;
>+ int open_flags;
>+ int i, fd = -1, ret;
>+ off_t offset;
>+
>+ if (buf_select) {
>+ write = 0;
>+ fixed = 0;
>+ }
>+ if (buf_select && provide_buffers(ring))
>+ return 1;
>+
>+ if (write)
>+ open_flags = O_WRONLY;
>+ else
>+ open_flags = O_RDONLY;
>+ open_flags |= O_DIRECT;
>+
>+ if (fixed) {
>+ ret = t_register_buffers(ring, vecs, BUFFERS);
>+ if (ret == T_SETUP_SKIP)
>+ return 0;
>+ if (ret != T_SETUP_OK) {
>+ fprintf(stderr, "buffer reg failed: %d\n", ret);
>+ goto err;
>+ }
>+ }
>+ fd = open(file, open_flags);
>+ if (fd < 0) {
>+ if (errno == EINVAL || errno == EPERM || errno == EACCES)
>+ return 0;
>+ perror("file open");
>+ goto err;
>+ }
>+ if (sqthread) {
>+ ret = io_uring_register_files(ring, &fd, 1);
>+ if (ret) {
>+ fprintf(stderr, "file reg failed: %d\n", ret);
>+ goto err;
>+ }
>+ }
>+
>+ offset = 0;
>+ for (i = 0; i < BUFFERS; i++) {
>+ sqe = io_uring_get_sqe(ring);
>+ if (!sqe) {
>+ fprintf(stderr, "sqe get failed\n");
>+ goto err;
>+ }
>+ offset = BS * (rand() % BUFFERS);
>+ if (write) {
>+ int do_fixed = fixed;
>+ int use_fd = fd;
>+
>+ if (sqthread)
>+ use_fd = 0;
>+ if (fixed && (i & 1))
>+ do_fixed = 0;
>+ if (do_fixed) {
>+ io_uring_prep_write_fixed(sqe, use_fd, vecs[i].iov_base,
>+ vecs[i].iov_len,
>+ offset, i);
>+ } else {
>+ io_uring_prep_writev(sqe, use_fd, &vecs[i], 1,
>+ offset);
>+ }
>+ } else {
>+ int do_fixed = fixed;
>+ int use_fd = fd;
>+
>+ if (sqthread)
>+ use_fd = 0;
>+ if (fixed && (i & 1))
>+ do_fixed = 0;
>+ if (do_fixed) {
>+ io_uring_prep_read_fixed(sqe, use_fd, vecs[i].iov_base,
>+ vecs[i].iov_len,
>+ offset, i);
>+ } else {
>+ io_uring_prep_readv(sqe, use_fd, &vecs[i], 1,
>+ offset);
>+ }
>+
>+ }
>+ if (sqthread)
>+ sqe->flags |= IOSQE_FIXED_FILE;
>+ if (buf_select) {
>+ sqe->flags |= IOSQE_BUFFER_SELECT;
>+ sqe->buf_group = buf_select;
>+ sqe->user_data = i;
>+ }
>+ }
>+
>+ ret = io_uring_submit(ring);
>+ if (ret != BUFFERS) {
>+ ret = io_uring_peek_cqe(ring, &cqe);
>+ if (!ret && cqe->res == -EOPNOTSUPP) {
>+ no_iopoll = 1;
>+ io_uring_cqe_seen(ring, cqe);
>+ goto out;
>+ }
>+ fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
>+ goto err;
>+ }
>+
>+ for (i = 0; i < BUFFERS; i++) {
>+ ret = io_uring_wait_cqe(ring, &cqe);
>+ if (ret) {
>+ fprintf(stderr, "wait_cqe=%d\n", ret);
>+ goto err;
>+ } else if (cqe->res == -EOPNOTSUPP) {
>+ fprintf(stdout, "File/device/fs doesn't support polled IO\n");
>+ no_iopoll = 1;
>+ goto out;
>+ } else if (cqe->res != BS) {
>+ fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, BS);
>+ goto err;
>+ }
>+ io_uring_cqe_seen(ring, cqe);
>+ }
>+
>+ if (fixed) {
>+ ret = io_uring_unregister_buffers(ring);
>+ if (ret) {
>+ fprintf(stderr, "buffer unreg failed: %d\n", ret);
>+ goto err;
>+ }
>+ }
>+ if (sqthread) {
>+ ret = io_uring_unregister_files(ring);
>+ if (ret) {
>+ fprintf(stderr, "file unreg failed: %d\n", ret);
>+ goto err;
>+ }
>+ }
>+
>+out:
>+ close(fd);
>+ return 0;
>+err:
>+ if (fd != -1)
>+ close(fd);
>+ return 1;
>+}
>+
>+static int test_iopoll(const char *fname, int write, int sqthread, int fixed,
>+ int buf_select, int defer)
>+{
>+ struct io_uring ring;
>+ int ret, ring_flags = IORING_SETUP_IOPOLL | IORING_SETUP_HYBRID_IOPOLL;
>+
>+ if (no_iopoll)
>+ return 0;
>+
>+ if (defer)
>+ ring_flags |= IORING_SETUP_SINGLE_ISSUER |
>+ IORING_SETUP_DEFER_TASKRUN;
>+
>+ ret = t_create_ring(64, &ring, ring_flags);
>+ if (ret == T_SETUP_SKIP)
>+ return 0;
>+ if (ret != T_SETUP_OK) {
>+ fprintf(stderr, "ring create failed: %d\n", ret);
>+ return 1;
>+ }
>+ ret = __test_iopoll_io(fname, &ring, write, sqthread, fixed, buf_select);
>+ io_uring_queue_exit(&ring);
>+ return ret;
>+}
>+
>+int main(int argc, char *argv[])
>+{
>+ int i, ret;
>+ char buf[256];
>+ char *fname;
>+
>+ if (argc > 1) {
>+ fname = argv[1];
>+ } else {
>+ srand((unsigned)time(NULL));
>+ snprintf(buf, sizeof(buf), ".basic-rw-%u-%u",
>+ (unsigned)rand(), (unsigned)getpid());
>+ fname = buf;
>+ t_create_file(fname, FILE_SIZE);
>+ }
>+
>+ vecs = t_create_buffers(BUFFERS, BS);
>+
>+ for (i = 0; i < 16; i++) {
>+ int write = (i & 1) != 0;
>+ int sqthread = (i & 2) != 0;
>+ int fixed = (i & 4) != 0;
>+ int buf_select = (i & 8) != 0;
>+ int defer = (i & 16) != 0;
>+ int nonvec = buf_select;
>+
>+ ret = test_iopoll(fname, write, sqthread, fixed, buf_select, defer);
>+ if (ret) {
>+ fprintf(stderr, "test_iopoll_io failed %d/%d/%d/%d/%d\n",
>+ write, sqthread, fixed, buf_select, defer);
>+ goto err;
>+ }
>+ if (no_iopoll)
>+ break;
>+
>+ ret = test_io_uring_passthrough(fname, i, write, sqthread, fixed, nonvec);
>+ if (no_pt)
>+ break;
>+ if (ret) {
>+ fprintf(stderr, "test_io_uring_passthrough_io failed %d/%d/%d/%d\n",
>+ write, sqthread, fixed, nonvec);
>+ goto err;
>+ }
>+ }
>+
>+ if (fname != argv[1])
>+ unlink(fname);
>+ return ret;
>+err:
>+ if (fname != argv[1])
>+ unlink(fname);
>+ return T_EXIT_FAIL;
>+}
This patch mostly looks fine. But the code here seems to be largely
duplicated from "test/io_uring_passthrough.c" and "test/iopoll.c".
Can we consider adding the hybrid poll test as a part of the existing
tests as it seems that it would only require passing a extra flag
during ring setup.
>--
>2.34.1
>
Powered by blists - more mailing lists