linux-kernel - [PATCH v3 15/15] blk-mq: use hk cpus only when isolcpus=io

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240806-isolcpus-io-queues-v3-15-da0eecfeaf8b@suse.de>
Date: Tue, 06 Aug 2024 14:06:47 +0200
From: Daniel Wagner <dwagner@...e.de>
To: Jens Axboe <axboe@...nel.dk>, Keith Busch <kbusch@...nel.org>, 
 Sagi Grimberg <sagi@...mberg.me>, Thomas Gleixner <tglx@...utronix.de>, 
 Christoph Hellwig <hch@....de>, 
 "Martin K. Petersen" <martin.petersen@...cle.com>, 
 John Garry <john.g.garry@...cle.com>, "Michael S. Tsirkin" <mst@...hat.com>, 
 Jason Wang <jasowang@...hat.com>, 
 Kashyap Desai <kashyap.desai@...adcom.com>, 
 Sumit Saxena <sumit.saxena@...adcom.com>, 
 Shivasharan S <shivasharan.srikanteshwara@...adcom.com>, 
 Chandrakanth patil <chandrakanth.patil@...adcom.com>, 
 Sathya Prakash Veerichetty <sathya.prakash@...adcom.com>, 
 Suganath Prabu Subramani <suganath-prabu.subramani@...adcom.com>, 
 Nilesh Javali <njavali@...vell.com>, GR-QLogic-Storage-Upstream@...vell.com, 
 Jonathan Corbet <corbet@....net>
Cc: Frederic Weisbecker <frederic@...nel.org>, Mel Gorman <mgorman@...e.de>, 
 Hannes Reinecke <hare@...e.de>, 
 Sridhar Balaraman <sbalaraman@...allelwireless.com>, 
 "brookxu.cn" <brookxu.cn@...il.com>, Ming Lei <ming.lei@...hat.com>, 
 linux-kernel@...r.kernel.org, linux-block@...r.kernel.org, 
 linux-nvme@...ts.infradead.org, linux-scsi@...r.kernel.org, 
 virtualization@...ts.linux.dev, megaraidlinux.pdl@...adcom.com, 
 mpi3mr-linuxdrv.pdl@...adcom.com, MPT-FusionLinux.pdl@...adcom.com, 
 storagedev@...rochip.com, linux-doc@...r.kernel.org, 
 Daniel Wagner <dwagner@...e.de>
Subject: [PATCH v3 15/15] blk-mq: use hk cpus only when isolcpus=io_queue
 is enabled

When isolcpus=io_queue is enabled all hardware queues should run on the
housekeeping CPUs only. Thus ignore the affinity mask provided by the
driver. Also we can't use blk_mq_map_queues because it will map all CPUs
to first hctx unless, the CPU is the same as the hctx has the affinity
set to, e.g. 8 CPUs with isolcpus=io_queue,2-3,6-7 config

  queue mapping for /dev/nvme0n1
        hctx0: default 2 3 4 6 7
        hctx1: default 5
        hctx2: default 0
        hctx3: default 1

  PCI name is 00:05.0: nvme0n1
        irq 57 affinity 0-1 effective 1 is_managed:0 nvme0q0
        irq 58 affinity 4 effective 4 is_managed:1 nvme0q1
        irq 59 affinity 5 effective 5 is_managed:1 nvme0q2
        irq 60 affinity 0 effective 0 is_managed:1 nvme0q3
        irq 61 affinity 1 effective 1 is_managed:1 nvme0q4

where as with blk_mq_hk_map_queues we get

  queue mapping for /dev/nvme0n1
        hctx0: default 2 4
        hctx1: default 3 5
        hctx2: default 0 6
        hctx3: default 1 7

  PCI name is 00:05.0: nvme0n1
        irq 56 affinity 0-1 effective 1 is_managed:0 nvme0q0
        irq 61 affinity 4 effective 4 is_managed:1 nvme0q1
        irq 62 affinity 5 effective 5 is_managed:1 nvme0q2
        irq 63 affinity 0 effective 0 is_managed:1 nvme0q3
        irq 64 affinity 1 effective 1 is_managed:1 nvme0q4

Signed-off-by: Daniel Wagner <dwagner@...e.de>
---
 block/blk-mq-cpumap.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index c1277763aeeb..7e026c2ffa02 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -60,11 +60,64 @@ unsigned int blk_mq_num_online_queues(unsigned int max_queues)
 }
 EXPORT_SYMBOL_GPL(blk_mq_num_online_queues);
 
+static bool blk_mq_hk_map_queues(struct blk_mq_queue_map *qmap)
+{
+	struct cpumask *hk_masks;
+	cpumask_var_t isol_mask;
+
+	unsigned int queue, cpu;
+
+	if (!housekeeping_enabled(HK_TYPE_IO_QUEUE))
+		return false;
+
+	/* map housekeeping cpus to matching hardware context */
+	hk_masks = group_cpus_evenly(qmap->nr_queues);
+	if (!hk_masks)
+		goto fallback;
+
+	for (queue = 0; queue < qmap->nr_queues; queue++) {
+		for_each_cpu(cpu, &hk_masks[queue])
+			qmap->mq_map[cpu] = qmap->queue_offset + queue;
+	}
+
+	kfree(hk_masks);
+
+	/* map isolcpus to hardware context */
+	if (!alloc_cpumask_var(&isol_mask, GFP_KERNEL))
+		goto fallback;
+
+	queue = 0;
+	cpumask_andnot(isol_mask,
+		       cpu_possible_mask,
+		       housekeeping_cpumask(HK_TYPE_IO_QUEUE));
+
+	for_each_cpu(cpu, isol_mask) {
+		qmap->mq_map[cpu] = qmap->queue_offset + queue;
+		queue = (queue + 1) % qmap->nr_queues;
+	}
+
+	free_cpumask_var(isol_mask);
+
+	return true;
+
+fallback:
+	/* map all cpus to hardware context ignoring any affinity */
+	queue = 0;
+	for_each_possible_cpu(cpu) {
+		qmap->mq_map[cpu] = qmap->queue_offset + queue;
+		queue = (queue + 1) % qmap->nr_queues;
+	}
+	return true;
+}
+
 void blk_mq_map_queues(struct blk_mq_queue_map *qmap)
 {
 	const struct cpumask *masks;
 	unsigned int queue, cpu;
 
+	if (blk_mq_hk_map_queues(qmap))
+		return;
+
 	masks = group_cpus_evenly(qmap->nr_queues);
 	if (!masks) {
 		for_each_possible_cpu(cpu)
@@ -118,6 +171,9 @@ void blk_mq_dev_map_queues(struct blk_mq_queue_map *qmap,
 	const struct cpumask *mask;
 	unsigned int queue, cpu;
 
+	if (blk_mq_hk_map_queues(qmap))
+		return;
+
 	for (queue = 0; queue < qmap->nr_queues; queue++) {
 		mask = get_queue_affinity(dev_data, dev_off, queue);
 		if (!mask)

-- 
2.46.0