lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1579522957-4393-8-git-send-email-john.garry@huawei.com>
Date:   Mon, 20 Jan 2020 20:22:37 +0800
From:   John Garry <john.garry@...wei.com>
To:     <jejb@...ux.vnet.ibm.com>, <martin.petersen@...cle.com>
CC:     <linux-scsi@...r.kernel.org>, <linuxarm@...wei.com>,
        <linux-kernel@...r.kernel.org>, John Garry <john.garry@...wei.com>
Subject: [PATCH 7/7] scsi: hisi_sas: Use reply map for v2 hw

With the aim of boosting performance, switch v2 hw to using reply map, i.e.
pseudo blk-mq.

This really when we're running a distro with irqbalance running. The reason
being that the irqbalance can fiddle with the completion queue affinity,
such that we random cpus servicing the CQs. It's not much better without
irqbalance, where it is always cpu0.

Distro read fio performance for 6x SAS SSDs goes from ~450K IOPS ->
650K IOPS.

To continuing support manually setting the irq affinity, introduce module
parameter auto_affine_interrupts, which is default off.

Signed-off-by: John Garry <john.garry@...wei.com>
---
 drivers/scsi/hisi_sas/hisi_sas.h       |  3 ++
 drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 54 +++++++++++++++++++++++++-
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h
index 2bdd64648ef0..0612fc929a2d 100644
--- a/drivers/scsi/hisi_sas/hisi_sas.h
+++ b/drivers/scsi/hisi_sas/hisi_sas.h
@@ -10,6 +10,7 @@
 #include <linux/acpi.h>
 #include <linux/clk.h>
 #include <linux/debugfs.h>
+#include <linux/device.h>
 #include <linux/dmapool.h>
 #include <linux/iopoll.h>
 #include <linux/lcm.h>
@@ -385,6 +386,8 @@ struct hisi_hba {
 	spinlock_t lock;
 	struct semaphore sem;
 
+	struct irq_affinity_desc affinity_masks[HISI_SAS_MAX_QUEUES];
+
 	struct timer_list timer;
 	struct workqueue_struct *wq;
 
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
index e05faf315dcd..533515e85f64 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
@@ -417,6 +417,11 @@ struct sig_atten_lu_s {
 	u32 sas_phy_ctrl;
 };
 
+static bool auto_affine_interrupts;
+module_param(auto_affine_interrupts, bool, 0444);
+MODULE_PARM_DESC(auto_affine_interrupts, "Enable auto-affinity of completion queue interrupts:\n"
+		 "default is off");
+
 static const struct hisi_sas_hw_error one_bit_ecc_errors[] = {
 	{
 		.irq_msk = BIT(SAS_ECC_INTR_DQE_ECC_1B_OFF),
@@ -3304,6 +3309,24 @@ static irq_handler_t fatal_interrupts[HISI_SAS_FATAL_INT_NR] = {
 	fatal_axi_int_v2_hw
 };
 
+static void setup_reply_map_v2_hw(struct hisi_hba *hisi_hba)
+{
+	int queue, cpu;
+
+	for (queue = 0; queue < hisi_hba->queue_count; queue++) {
+		struct hisi_sas_cq *cq = &hisi_hba->cq[queue];
+		struct irq_affinity_desc *mask;
+		struct cpumask *cpumask;
+
+		mask = &hisi_hba->affinity_masks[queue];
+		cpumask = &mask->mask;
+
+		cq->irq_mask = cpumask;
+		for_each_cpu(cpu, cpumask)
+			hisi_hba->reply_map[cpu] = queue;
+	}
+}
+
 /**
  * There is a limitation in the hip06 chipset that we need
  * to map in all mbigen interrupts, even if they are not used.
@@ -3315,6 +3338,27 @@ static int interrupt_init_v2_hw(struct hisi_hba *hisi_hba)
 	int irq, rc = 0, irq_map[128];
 	int i, phy_no, fatal_no, queue_no;
 
+	if (auto_affine_interrupts) {
+		struct irq_affinity_desc *masks;
+		struct irq_affinity desc = {};
+		int nvec;
+
+		nvec = hisi_hba->queue_count;
+		masks = irq_create_affinity_masks(nvec, &desc);
+		if (!masks)
+			return -ENOMEM;
+
+		memcpy(hisi_hba->affinity_masks, masks, nvec * sizeof(*masks));
+		kfree(masks);
+
+		hisi_hba->reply_map = devm_kcalloc(dev, nr_cpu_ids,
+						   sizeof(unsigned int),
+						   GFP_KERNEL);
+		if (!hisi_hba->reply_map)
+			return -ENOMEM;
+		setup_reply_map_v2_hw(hisi_hba);
+	}
+
 	for (i = 0; i < 128; i++)
 		irq_map[i] = platform_get_irq(pdev, i);
 
@@ -3358,11 +3402,16 @@ static int interrupt_init_v2_hw(struct hisi_hba *hisi_hba)
 
 	for (queue_no = 0; queue_no < hisi_hba->queue_count; queue_no++) {
 		struct hisi_sas_cq *cq = &hisi_hba->cq[queue_no];
+		unsigned long irqflags = IRQF_ONESHOT;
+
+		if (auto_affine_interrupts)
+			irqflags |= IRQF_NOBALANCING;
 
 		cq->irq_no = irq_map[queue_no + 96];
+
 		rc = devm_request_threaded_irq(dev, cq->irq_no,
 					       cq_interrupt_v2_hw,
-					       cq_thread_v2_hw, IRQF_ONESHOT,
+					       cq_thread_v2_hw, irqflags,
 					       DRV_NAME " cq", cq);
 		if (rc) {
 			dev_err(dev, "irq init: could not request cq interrupt %d, rc=%d\n",
@@ -3370,6 +3419,9 @@ static int interrupt_init_v2_hw(struct hisi_hba *hisi_hba)
 			rc = -ENOENT;
 			goto err_out;
 		}
+
+		if (auto_affine_interrupts)
+			irq_set_affinity(cq->irq_no, cq->irq_mask);
 	}
 
 	hisi_hba->cq_nvecs = hisi_hba->queue_count;
-- 
2.17.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ