[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240828114958.29422-1-quic_mapa@quicinc.com>
Date: Wed, 28 Aug 2024 17:19:58 +0530
From: Manish Pandey <quic_mapa@...cinc.com>
To: Jens Axboe <axboe@...nel.dk>
CC: <linux-block@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
<quic_nitirawa@...cinc.com>, <quic_bhaskarv@...cinc.com>,
<quic_narepall@...cinc.com>, <quic_rampraka@...cinc.com>,
<quic_mapa@...cinc.com>, <quic_cang@...cinc.com>,
<quic_nguyenb@...cinc.com>
Subject: [PATCH] blk-mq: Allow complete locally if capacities are different
'Commit af550e4c9682 ("block/blk-mq: Don't complete locally if
capacities are different")' enforces to complete the request locally
only if the submission and completion CPUs have same capacity.
To have optimal IO load balancing or to avoid contention b/w submission
path and completion path, user may need to complete IO request of large
capacity CPU(s) on Small Capacity CPU(s) or vice versa.
Hence introduce a QUEUE_FLAG_ALLOW_DIFF_CAPACITY blk queue flag to let
user decide if it wants to complete the request locally or need an IPI
even if the capacity of the requesting and completion queue is different.
This gives flexibility to user to choose best CPU for their completion
to give best performance for their system.
Link: https://lore.kernel.org/all/66912a22-540d-4b9a-bd06-cce55b9ad304@quicinc.com/T/
Co-developed-by: Can Guo <quic_cang@...cinc.com>
Co-developed-by: Nitin Rawat <quic_nitirawa@...cinc.com>
Signed-off-by: Manish Pandey <quic_mapa@...cinc.com>
---
block/blk-mq-debugfs.c | 1 +
block/blk-mq.c | 3 ++-
block/blk-sysfs.c | 12 ++++++++++--
include/linux/blkdev.h | 1 +
4 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 5463697a8442..af048dad9667 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -93,6 +93,7 @@ static const char *const blk_queue_flag_name[] = {
QUEUE_FLAG_NAME(RQ_ALLOC_TIME),
QUEUE_FLAG_NAME(HCTX_ACTIVE),
QUEUE_FLAG_NAME(SQ_SCHED),
+ QUEUE_FLAG_NAME(ALLOW_DIFF_CAPACITY),
};
#undef QUEUE_FLAG_NAME
diff --git a/block/blk-mq.c b/block/blk-mq.c
index aa28157b1aaf..1584312d870a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1164,7 +1164,8 @@ static inline bool blk_mq_complete_need_ipi(struct request *rq)
if (cpu == rq->mq_ctx->cpu ||
(!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags) &&
cpus_share_cache(cpu, rq->mq_ctx->cpu) &&
- cpus_equal_capacity(cpu, rq->mq_ctx->cpu)))
+ (test_bit(QUEUE_FLAG_ALLOW_DIFF_CAPACITY, &rq->q->queue_flags) ||
+ cpus_equal_capacity(cpu, rq->mq_ctx->cpu))))
return false;
/* don't try to IPI to an offline CPU */
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 60116d13cb80..37d6ab325180 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -300,8 +300,9 @@ static ssize_t queue_rq_affinity_show(struct gendisk *disk, char *page)
{
bool set = test_bit(QUEUE_FLAG_SAME_COMP, &disk->queue->queue_flags);
bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &disk->queue->queue_flags);
+ bool allow = test_bit(QUEUE_FLAG_ALLOW_DIFF_CAPACITY, &disk->queue->queue_flags);
- return queue_var_show(set << force, page);
+ return queue_var_show((set << force) | (allow << set), page);
}
static ssize_t
@@ -316,15 +317,22 @@ queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
if (ret < 0)
return ret;
- if (val == 2) {
+ if (val == 3) {
+ blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
+ blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
+ blk_queue_flag_set(QUEUE_FLAG_ALLOW_DIFF_CAPACITY, q);
+ } else if (val == 2) {
blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
+ blk_queue_flag_clear(QUEUE_FLAG_ALLOW_DIFF_CAPACITY, q);
} else if (val == 1) {
blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
+ blk_queue_flag_clear(QUEUE_FLAG_ALLOW_DIFF_CAPACITY, q);
} else if (val == 0) {
blk_queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
+ blk_queue_flag_clear(QUEUE_FLAG_ALLOW_DIFF_CAPACITY, q);
}
#endif
return ret;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b7664d593486..902fb726ebe1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -602,6 +602,7 @@ enum {
QUEUE_FLAG_RQ_ALLOC_TIME, /* record rq->alloc_time_ns */
QUEUE_FLAG_HCTX_ACTIVE, /* at least one blk-mq hctx is active */
QUEUE_FLAG_SQ_SCHED, /* single queue style io dispatch */
+ QUEUE_FLAG_ALLOW_DIFF_CAPACITY, /* complete on different capacity CPU-group */
QUEUE_FLAG_MAX
};
--
2.17.1
Powered by blists - more mailing lists