[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251121015851.3672073-26-ming.lei@redhat.com>
Date: Fri, 21 Nov 2025 09:58:47 +0800
From: Ming Lei <ming.lei@...hat.com>
To: Jens Axboe <axboe@...nel.dk>,
linux-block@...r.kernel.org
Cc: Caleb Sander Mateos <csander@...estorage.com>,
Uday Shankar <ushankar@...estorage.com>,
Stefani Seibold <stefani@...bold.net>,
Andrew Morton <akpm@...ux-foundation.org>,
linux-kernel@...r.kernel.org,
Ming Lei <ming.lei@...hat.com>
Subject: [PATCH V4 25/27] selftests: ublk: handle UBLK_U_IO_FETCH_IO_CMDS
Add support for UBLK_U_IO_FETCH_IO_CMDS to enable efficient batch
fetching of I/O commands using multishot io_uring operations.
Key improvements:
- Implement multishot UBLK_U_IO_FETCH_IO_CMDS for continuous command fetching
- Add fetch buffer management with page-aligned, mlocked buffers
- Process fetched I/O command tags from kernel-provided buffers
- Integrate fetch operations with existing batch I/O infrastructure
- Significantly reduce uring_cmd issuing overhead through batching
The implementation uses two fetch buffers per thread with automatic
requeuing to maintain continuous I/O command flow. Each fetch operation
retrieves multiple command tags in a single syscall, dramatically
improving performance compared to individual command fetching.
Technical details:
- Fetch buffers are page-aligned and mlocked for optimal performance
- Uses IORING_URING_CMD_MULTISHOT for continuous operation
- Automatic buffer management and requeuing on completion
- Enhanced CQE handling for fetch command completions
Signed-off-by: Ming Lei <ming.lei@...hat.com>
---
tools/testing/selftests/ublk/batch.c | 134 ++++++++++++++++++++++++++-
tools/testing/selftests/ublk/kublk.c | 14 ++-
tools/testing/selftests/ublk/kublk.h | 13 +++
3 files changed, 157 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/ublk/batch.c b/tools/testing/selftests/ublk/batch.c
index e240d4decedf..7db91f910944 100644
--- a/tools/testing/selftests/ublk/batch.c
+++ b/tools/testing/selftests/ublk/batch.c
@@ -140,15 +140,63 @@ void ublk_batch_prepare(struct ublk_thread *t)
t->nr_bufs);
}
+static void free_batch_fetch_buf(struct ublk_thread *t)
+{
+ int i;
+
+ for (i = 0; i < UBLKS_T_NR_FETCH_BUF; i++) {
+ io_uring_free_buf_ring(&t->ring, t->fetch[i].br, 1, i);
+ munlock(t->fetch[i].fetch_buf, t->fetch[i].fetch_buf_size);
+ free(t->fetch[i].fetch_buf);
+ }
+}
+
+static int alloc_batch_fetch_buf(struct ublk_thread *t)
+{
+ /* page aligned fetch buffer, and it is mlocked for speedup delivery */
+ unsigned pg_sz = getpagesize();
+ unsigned buf_size = round_up(t->dev->dev_info.queue_depth * 2, pg_sz);
+ int ret;
+ int i = 0;
+
+ for (i = 0; i < UBLKS_T_NR_FETCH_BUF; i++) {
+ t->fetch[i].fetch_buf_size = buf_size;
+
+ if (posix_memalign((void **)&t->fetch[i].fetch_buf, pg_sz,
+ t->fetch[i].fetch_buf_size))
+ return -ENOMEM;
+
+ /* lock fetch buffer page for fast fetching */
+ if (mlock(t->fetch[i].fetch_buf, t->fetch[i].fetch_buf_size))
+ ublk_err("%s: can't lock fetch buffer %s\n", __func__,
+ strerror(errno));
+ t->fetch[i].br = io_uring_setup_buf_ring(&t->ring, 1,
+ i, IOU_PBUF_RING_INC, &ret);
+ if (!t->fetch[i].br) {
+ ublk_err("Buffer ring register failed %d\n", ret);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
int ublk_batch_alloc_buf(struct ublk_thread *t)
{
+ int ret;
+
ublk_assert(t->nr_commit_buf < 16);
- return alloc_batch_commit_buf(t);
+
+ ret = alloc_batch_commit_buf(t);
+ if (ret)
+ return ret;
+ return alloc_batch_fetch_buf(t);
}
void ublk_batch_free_buf(struct ublk_thread *t)
{
free_batch_commit_buf(t);
+ free_batch_fetch_buf(t);
}
static void ublk_init_batch_cmd(struct ublk_thread *t, __u16 q_id,
@@ -199,6 +247,76 @@ static void ublk_setup_commit_sqe(struct ublk_thread *t,
cmd->flags |= t->cmd_flags;
}
+static void ublk_batch_queue_fetch(struct ublk_thread *t,
+ struct ublk_queue *q,
+ unsigned short buf_idx)
+{
+ unsigned short nr_elem = t->fetch[buf_idx].fetch_buf_size / 2;
+ struct io_uring_sqe *sqe;
+
+ io_uring_buf_ring_add(t->fetch[buf_idx].br, t->fetch[buf_idx].fetch_buf,
+ t->fetch[buf_idx].fetch_buf_size,
+ 0, 0, 0);
+ io_uring_buf_ring_advance(t->fetch[buf_idx].br, 1);
+
+ ublk_io_alloc_sqes(t, &sqe, 1);
+
+ ublk_init_batch_cmd(t, q->q_id, sqe, UBLK_U_IO_FETCH_IO_CMDS, 2, nr_elem,
+ buf_idx);
+
+ sqe->rw_flags= IORING_URING_CMD_MULTISHOT;
+ sqe->buf_group = buf_idx;
+ sqe->flags |= IOSQE_BUFFER_SELECT;
+
+ t->fetch[buf_idx].fetch_buf_off = 0;
+}
+
+void ublk_batch_start_fetch(struct ublk_thread *t,
+ struct ublk_queue *q)
+{
+ int i;
+
+ for (i = 0; i < UBLKS_T_NR_FETCH_BUF; i++)
+ ublk_batch_queue_fetch(t, q, i);
+}
+
+static unsigned short ublk_compl_batch_fetch(struct ublk_thread *t,
+ struct ublk_queue *q,
+ const struct io_uring_cqe *cqe)
+{
+ unsigned short buf_idx = user_data_to_tag(cqe->user_data);
+ unsigned start = t->fetch[buf_idx].fetch_buf_off;
+ unsigned end = start + cqe->res;
+ void *buf = t->fetch[buf_idx].fetch_buf;
+ int i;
+
+ if (cqe->res < 0)
+ return buf_idx;
+
+ if ((end - start) / 2 > q->q_depth) {
+ ublk_err("%s: fetch duplicated ios offset %u count %u\n", __func__, start, cqe->res);
+
+ for (i = start; i < end; i += 2) {
+ unsigned short tag = *(unsigned short *)(buf + i);
+
+ ublk_err("%u ", tag);
+ }
+ ublk_err("\n");
+ }
+
+ for (i = start; i < end; i += 2) {
+ unsigned short tag = *(unsigned short *)(buf + i);
+
+ if (tag >= q->q_depth)
+ ublk_err("%s: bad tag %u\n", __func__, tag);
+
+ if (q->tgt_ops->queue_io)
+ q->tgt_ops->queue_io(t, q, tag);
+ }
+ t->fetch[buf_idx].fetch_buf_off = end;
+ return buf_idx;
+}
+
int ublk_batch_queue_prep_io_cmds(struct ublk_thread *t, struct ublk_queue *q)
{
unsigned short nr_elem = q->q_depth;
@@ -258,12 +376,26 @@ void ublk_batch_compl_cmd(struct ublk_thread *t,
const struct io_uring_cqe *cqe)
{
unsigned op = user_data_to_op(cqe->user_data);
+ struct ublk_queue *q;
+ unsigned buf_idx;
+ unsigned q_id;
if (op == _IOC_NR(UBLK_U_IO_PREP_IO_CMDS) ||
op == _IOC_NR(UBLK_U_IO_COMMIT_IO_CMDS)) {
ublk_batch_compl_commit_cmd(t, cqe, op);
return;
}
+
+ /* FETCH command is per queue */
+ q_id = user_data_to_q_id(cqe->user_data);
+ q = &t->dev->q[q_id];
+ buf_idx = ublk_compl_batch_fetch(t, q, cqe);
+
+ if (cqe->res < 0 && cqe->res != -ENOBUFS) {
+ t->state |= UBLKS_T_STOPPING;
+ } else if (!(cqe->flags & IORING_CQE_F_MORE) || cqe->res == -ENOBUFS) {
+ ublk_batch_queue_fetch(t, q, buf_idx);
+ }
}
void ublk_batch_commit_io_cmds(struct ublk_thread *t)
diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c
index 6565e804679c..cb329c7aebc4 100644
--- a/tools/testing/selftests/ublk/kublk.c
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -493,6 +493,10 @@ static int ublk_thread_init(struct ublk_thread *t, unsigned long long extra_flag
int ring_depth = dev->tgt.sq_depth, cq_depth = dev->tgt.cq_depth;
int ret;
+ /* FETCH_IO_CMDS is multishot, so increase cq depth for BATCH_IO */
+ if (ublk_dev_batch_io(dev))
+ cq_depth += dev->dev_info.queue_depth;
+
ret = ublk_setup_ring(&t->ring, ring_depth, cq_depth,
IORING_SETUP_COOP_TASKRUN |
IORING_SETUP_SINGLE_ISSUER |
@@ -797,7 +801,7 @@ static void ublk_handle_cqe(struct ublk_thread *t,
unsigned q_id = user_data_to_q_id(cqe->user_data);
unsigned cmd_op = user_data_to_op(cqe->user_data);
- if (cqe->res < 0 && cqe->res != -ENODEV)
+ if (cqe->res < 0 && cqe->res != -ENODEV && cqe->res != -ENOBUFS)
ublk_err("%s: res %d userdata %llx thread state %x\n", __func__,
cqe->res, cqe->user_data, t->state);
@@ -922,9 +926,13 @@ static __attribute__((noinline)) int __ublk_io_handler_fn(struct ublk_thread_inf
if (!ublk_thread_batch_io(&t)) {
/* submit all io commands to ublk driver */
ublk_submit_fetch_commands(&t);
- } else if (!t.idx) {
+ } else {
+ struct ublk_queue *q = &t.dev->q[t.idx];
+
/* prepare all io commands in the 1st thread context */
- ublk_batch_setup_queues(&t);
+ if (!t.idx)
+ ublk_batch_setup_queues(&t);
+ ublk_batch_start_fetch(&t, q);
}
do {
diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h
index 0a355653d64c..222501048c24 100644
--- a/tools/testing/selftests/ublk/kublk.h
+++ b/tools/testing/selftests/ublk/kublk.h
@@ -190,6 +190,13 @@ struct batch_commit_buf {
unsigned short count;
};
+struct batch_fetch_buf {
+ struct io_uring_buf_ring *br;
+ void *fetch_buf;
+ unsigned int fetch_buf_size;
+ unsigned int fetch_buf_off;
+};
+
struct ublk_thread {
struct ublk_dev *dev;
unsigned idx;
@@ -216,6 +223,9 @@ struct ublk_thread {
#define UBLKS_T_COMMIT_BUF_INV_IDX ((unsigned short)-1)
struct allocator commit_buf_alloc;
struct batch_commit_buf commit;
+ /* FETCH_IO_CMDS buffer */
+#define UBLKS_T_NR_FETCH_BUF 2
+ struct batch_fetch_buf fetch[UBLKS_T_NR_FETCH_BUF];
struct io_uring ring;
};
@@ -468,6 +478,9 @@ static inline unsigned short ublk_batch_io_buf_idx(
/* Queue UBLK_U_IO_PREP_IO_CMDS for a specific queue with batch elements */
int ublk_batch_queue_prep_io_cmds(struct ublk_thread *t, struct ublk_queue *q);
+/* Start fetching I/O commands using multishot UBLK_U_IO_FETCH_IO_CMDS */
+void ublk_batch_start_fetch(struct ublk_thread *t,
+ struct ublk_queue *q);
/* Handle completion of batch I/O commands (prep/commit) */
void ublk_batch_compl_cmd(struct ublk_thread *t,
const struct io_uring_cqe *cqe);
--
2.47.0
Powered by blists - more mailing lists