lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20250115083215.2781310-1-danielsftsai@google.com>
Date: Wed, 15 Jan 2025 08:32:15 +0000
From: Daniel Tsai <danielsftsai@...gle.com>
To: Jingoo Han <jingoohan1@...il.com>, 
	Manivannan Sadhasivam <manivannan.sadhasivam@...aro.org>
Cc: Lorenzo Pieralisi <lpieralisi@...nel.org>, 
	"Krzysztof WilczyƄski" <kw@...ux.com>, Rob Herring <robh@...nel.org>, Bjorn Helgaas <bhelgaas@...gle.com>, 
	linux-pci@...r.kernel.org, linux-kernel@...r.kernel.org, 
	Tsai Sung-Fu <danielsftsai@...gle.com>
Subject: [PATCH] PCI: dwc: Separate MSI out to different controller

From: Tsai Sung-Fu <danielsftsai@...gle.com>

Setup the struct irq_affinity at EP side, and passing that as 1 of the
function parameter when endpoint calls pci_alloc_irq_vectors_affinity,
this could help to setup non-default irq_affinity for target irq (end up
in irq_desc->irq_common_data.affinity), and we can make use of that to
separate msi vector out to bind to other msi controller.

In current design, we have 8 msi controllers, and each of them own up to
32 msi vectors, layout as below

msi_controller0 <- msi_vector0 ~ 31
msi_controller1 <- msi_vector32 ~ 63
msi_controller2 <- msi_vector64 ~ 95
.
.
.
msi_controller7 <- msi_vector224 ~ 255

dw_pcie_irq_domain_alloc is allocating msi vector number in a contiguous
fashion, that would end up those allocated msi vectors all handled by
the same msi_controller, which all of them would have irq affinity in
equal. To separate out to different CPU, we need to distribute msi
vectors to different msi_controller, which require to allocate the msi
vector in a stride fashion.

To do that, the CL make use the cpumask_var_t setup by the endpoint,
compare against to see if the affinities are the same, if they are,
bind to msi controller which previously allocated msi vector goes to, if
they are not, assign to new msi controller.

Signed-off-by: Tsai Sung-Fu <danielsftsai@...gle.com>
---
 .../pci/controller/dwc/pcie-designware-host.c | 80 +++++++++++++++----
 drivers/pci/controller/dwc/pcie-designware.h  |  2 +
 2 files changed, 67 insertions(+), 15 deletions(-)

diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
index d2291c3ceb8be..192d05c473b3b 100644
--- a/drivers/pci/controller/dwc/pcie-designware-host.c
+++ b/drivers/pci/controller/dwc/pcie-designware-host.c
@@ -181,25 +181,75 @@ static int dw_pcie_irq_domain_alloc(struct irq_domain *domain,
 				    void *args)
 {
 	struct dw_pcie_rp *pp = domain->host_data;
-	unsigned long flags;
-	u32 i;
-	int bit;
+	const struct cpumask *mask;
+	unsigned long flags, index, start, size;
+	int irq, ctrl, p_irq, *msi_vec_index;
+	unsigned int controller_count = (pp->num_vectors / MAX_MSI_IRQS_PER_CTRL);
+
+	/*
+	 * All IRQs on a given controller will use the same parent interrupt,
+	 * and therefore the same CPU affinity. We try to honor any CPU spreading
+	 * requests by assigning distinct affinity masks to distinct vectors.
+	 * So instead of always allocating the msi vectors in a contiguous fashion,
+	 * the algo here honor whoever comes first can bind the msi controller to
+	 * its irq affinity mask, or compare its cpumask against
+	 * currently recorded to decide if binding to this msi controller.
+	 */
+
+	msi_vec_index = kcalloc(nr_irqs, sizeof(*msi_vec_index), GFP_KERNEL);
+	if (!msi_vec_index)
+		return -ENOMEM;
 
 	raw_spin_lock_irqsave(&pp->lock, flags);
 
-	bit = bitmap_find_free_region(pp->msi_irq_in_use, pp->num_vectors,
-				      order_base_2(nr_irqs));
+	for (irq = 0; irq < nr_irqs; irq++) {
+		mask = irq_get_affinity_mask(virq + irq);
+		for (ctrl = 0; ctrl < controller_count; ctrl++) {
+			start = ctrl * MAX_MSI_IRQS_PER_CTRL;
+			size = start + MAX_MSI_IRQS_PER_CTRL;
+			if (find_next_bit(pp->msi_irq_in_use, size, start) >= size) {
+				cpumask_copy(&pp->msi_ctrl_to_cpu[ctrl], mask);
+				break;
+			}
 
-	raw_spin_unlock_irqrestore(&pp->lock, flags);
+			if (cpumask_equal(&pp->msi_ctrl_to_cpu[ctrl], mask) &&
+			    find_next_zero_bit(pp->msi_irq_in_use, size, start) < size)
+				break;
+		}
 
-	if (bit < 0)
-		return -ENOSPC;
+		/*
+		 * no msi controller matches, we would error return (no space) and
+		 * clear those previously allocated bit, because all those msi vectors
+		 * didn't really successfully allocated, so we shouldn't occupied that
+		 * position in the bitmap in case other endpoint may still make use of
+		 * those. An extra step when choosing to not allocate in contiguous
+		 * fashion.
+		 */
+		if (ctrl == controller_count) {
+			for (p_irq = irq - 1; p_irq >= 0; p_irq--)
+				bitmap_clear(pp->msi_irq_in_use, msi_vec_index[p_irq], 1);
+			raw_spin_unlock_irqrestore(&pp->lock, flags);
+			kfree(msi_vec_index);
+			return -ENOSPC;
+		}
+
+		index = bitmap_find_next_zero_area(pp->msi_irq_in_use,
+						   size,
+						   start,
+						   1,
+						   0);
+		bitmap_set(pp->msi_irq_in_use, index, 1);
+		msi_vec_index[irq] = index;
+	}
 
-	for (i = 0; i < nr_irqs; i++)
-		irq_domain_set_info(domain, virq + i, bit + i,
+	raw_spin_unlock_irqrestore(&pp->lock, flags);
+
+	for (irq = 0; irq < nr_irqs; irq++)
+		irq_domain_set_info(domain, virq + irq, msi_vec_index[irq],
 				    pp->msi_irq_chip,
 				    pp, handle_edge_irq,
 				    NULL, NULL);
+	kfree(msi_vec_index);
 
 	return 0;
 }
@@ -207,15 +257,15 @@ static int dw_pcie_irq_domain_alloc(struct irq_domain *domain,
 static void dw_pcie_irq_domain_free(struct irq_domain *domain,
 				    unsigned int virq, unsigned int nr_irqs)
 {
-	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct irq_data *d;
 	struct dw_pcie_rp *pp = domain->host_data;
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&pp->lock, flags);
-
-	bitmap_release_region(pp->msi_irq_in_use, d->hwirq,
-			      order_base_2(nr_irqs));
-
+	for (int i = 0; i < nr_irqs; i++) {
+		d = irq_domain_get_irq_data(domain, virq + i);
+		bitmap_clear(pp->msi_irq_in_use, d->hwirq, 1);
+	}
 	raw_spin_unlock_irqrestore(&pp->lock, flags);
 }
 
diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
index 347ab74ac35aa..95629b37a238e 100644
--- a/drivers/pci/controller/dwc/pcie-designware.h
+++ b/drivers/pci/controller/dwc/pcie-designware.h
@@ -14,6 +14,7 @@
 #include <linux/bitfield.h>
 #include <linux/bitops.h>
 #include <linux/clk.h>
+#include <linux/cpumask.h>
 #include <linux/dma-mapping.h>
 #include <linux/dma/edma.h>
 #include <linux/gpio/consumer.h>
@@ -373,6 +374,7 @@ struct dw_pcie_rp {
 	struct irq_chip		*msi_irq_chip;
 	u32			num_vectors;
 	u32			irq_mask[MAX_MSI_CTRLS];
+	struct cpumask		msi_ctrl_to_cpu[MAX_MSI_CTRLS];
 	struct pci_host_bridge  *bridge;
 	raw_spinlock_t		lock;
 	DECLARE_BITMAP(msi_irq_in_use, MAX_MSI_IRQS);
-- 
2.48.0.rc2.279.g1de40edade-goog


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ