lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240828114958.29422-1-quic_mapa@quicinc.com>
Date: Wed, 28 Aug 2024 17:19:58 +0530
From: Manish Pandey <quic_mapa@...cinc.com>
To: Jens Axboe <axboe@...nel.dk>
CC: <linux-block@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
        <quic_nitirawa@...cinc.com>, <quic_bhaskarv@...cinc.com>,
        <quic_narepall@...cinc.com>, <quic_rampraka@...cinc.com>,
        <quic_mapa@...cinc.com>, <quic_cang@...cinc.com>,
        <quic_nguyenb@...cinc.com>
Subject: [PATCH] blk-mq: Allow complete locally if capacities are different

'Commit af550e4c9682 ("block/blk-mq: Don't complete locally if
capacities are different")' enforces to complete the request locally
only if the submission and completion CPUs have same capacity.

To have optimal IO load balancing or to avoid contention b/w submission
path and completion path, user may need to complete IO request of large
capacity CPU(s) on Small Capacity CPU(s) or vice versa.

Hence introduce a QUEUE_FLAG_ALLOW_DIFF_CAPACITY blk queue flag to let
user decide if it wants to complete the request locally or need an IPI
even if the capacity of the requesting and completion queue is different.
This gives flexibility to user to choose best CPU for their completion
to give best performance for their system.

Link: https://lore.kernel.org/all/66912a22-540d-4b9a-bd06-cce55b9ad304@quicinc.com/T/
Co-developed-by: Can Guo <quic_cang@...cinc.com>
Co-developed-by: Nitin Rawat <quic_nitirawa@...cinc.com>
Signed-off-by: Manish Pandey <quic_mapa@...cinc.com>
---
 block/blk-mq-debugfs.c |  1 +
 block/blk-mq.c         |  3 ++-
 block/blk-sysfs.c      | 12 ++++++++++--
 include/linux/blkdev.h |  1 +
 4 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 5463697a8442..af048dad9667 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -93,6 +93,7 @@ static const char *const blk_queue_flag_name[] = {
 	QUEUE_FLAG_NAME(RQ_ALLOC_TIME),
 	QUEUE_FLAG_NAME(HCTX_ACTIVE),
 	QUEUE_FLAG_NAME(SQ_SCHED),
+	QUEUE_FLAG_NAME(ALLOW_DIFF_CAPACITY),
 };
 #undef QUEUE_FLAG_NAME
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index aa28157b1aaf..1584312d870a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1164,7 +1164,8 @@ static inline bool blk_mq_complete_need_ipi(struct request *rq)
 	if (cpu == rq->mq_ctx->cpu ||
 	    (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags) &&
 	     cpus_share_cache(cpu, rq->mq_ctx->cpu) &&
-	     cpus_equal_capacity(cpu, rq->mq_ctx->cpu)))
+	     (test_bit(QUEUE_FLAG_ALLOW_DIFF_CAPACITY, &rq->q->queue_flags) ||
+	      cpus_equal_capacity(cpu, rq->mq_ctx->cpu))))
 		return false;
 
 	/* don't try to IPI to an offline CPU */
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 60116d13cb80..37d6ab325180 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -300,8 +300,9 @@ static ssize_t queue_rq_affinity_show(struct gendisk *disk, char *page)
 {
 	bool set = test_bit(QUEUE_FLAG_SAME_COMP, &disk->queue->queue_flags);
 	bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &disk->queue->queue_flags);
+	bool allow = test_bit(QUEUE_FLAG_ALLOW_DIFF_CAPACITY, &disk->queue->queue_flags);
 
-	return queue_var_show(set << force, page);
+	return queue_var_show((set << force) | (allow << set), page);
 }
 
 static ssize_t
@@ -316,15 +317,22 @@ queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
 	if (ret < 0)
 		return ret;
 
-	if (val == 2) {
+	if (val == 3) {
+		blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
+		blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
+		blk_queue_flag_set(QUEUE_FLAG_ALLOW_DIFF_CAPACITY, q);
+	} else if (val == 2) {
 		blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
 		blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
+		blk_queue_flag_clear(QUEUE_FLAG_ALLOW_DIFF_CAPACITY, q);
 	} else if (val == 1) {
 		blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
 		blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
+		blk_queue_flag_clear(QUEUE_FLAG_ALLOW_DIFF_CAPACITY, q);
 	} else if (val == 0) {
 		blk_queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
 		blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
+		blk_queue_flag_clear(QUEUE_FLAG_ALLOW_DIFF_CAPACITY, q);
 	}
 #endif
 	return ret;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b7664d593486..902fb726ebe1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -602,6 +602,7 @@ enum {
 	QUEUE_FLAG_RQ_ALLOC_TIME,	/* record rq->alloc_time_ns */
 	QUEUE_FLAG_HCTX_ACTIVE,		/* at least one blk-mq hctx is active */
 	QUEUE_FLAG_SQ_SCHED,		/* single queue style io dispatch */
+	QUEUE_FLAG_ALLOW_DIFF_CAPACITY,	/* complete on different capacity CPU-group */
 	QUEUE_FLAG_MAX
 };
 
-- 
2.17.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ