lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250920203851.2205115-28-ajones@ventanamicro.com>
Date: Sat, 20 Sep 2025 15:38:58 -0500
From: Andrew Jones <ajones@...tanamicro.com>
To: iommu@...ts.linux.dev,
	kvm-riscv@...ts.infradead.org,
	kvm@...r.kernel.org,
	linux-riscv@...ts.infradead.org,
	linux-kernel@...r.kernel.org
Cc: jgg@...dia.com,
	zong.li@...ive.com,
	tjeznach@...osinc.com,
	joro@...tes.org,
	will@...nel.org,
	robin.murphy@....com,
	anup@...infault.org,
	atish.patra@...ux.dev,
	tglx@...utronix.de,
	alex.williamson@...hat.com,
	paul.walmsley@...ive.com,
	palmer@...belt.com,
	alex@...ti.fr
Subject: [RFC PATCH v2 08/18] iommu/riscv: Use MSI table to enable IMSIC access

When setting irq affinity extract the IMSIC address the device
needs to access and add it to the MSI table. If the device no
longer needs access to an IMSIC then remove it from the table
to prohibit access. This allows isolating device MSIs to a set
of harts so we can now add the IRQ_DOMAIN_FLAG_ISOLATED_MSI IRQ
domain flag.

Signed-off-by: Andrew Jones <ajones@...tanamicro.com>
---
 drivers/iommu/riscv/iommu-ir.c | 143 ++++++++++++++++++++++++++++++++-
 drivers/iommu/riscv/iommu.h    |   2 +
 2 files changed, 143 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/riscv/iommu-ir.c b/drivers/iommu/riscv/iommu-ir.c
index 290d91a6c6cd..b97768cac4be 100644
--- a/drivers/iommu/riscv/iommu-ir.c
+++ b/drivers/iommu/riscv/iommu-ir.c
@@ -4,6 +4,7 @@
  *
  * Copyright © 2025 Ventana Micro Systems Inc.
  */
+#include <linux/cleanup.h>
 #include <linux/irqchip/riscv-imsic.h>
 #include <linux/irqdomain.h>
 #include <linux/msi.h>
@@ -106,6 +107,20 @@ static size_t riscv_iommu_ir_nr_msiptes(struct riscv_iommu_domain *domain)
 	return max_idx + 1;
 }
 
+static void riscv_iommu_ir_set_pte(struct riscv_iommu_msipte *pte, u64 addr)
+{
+	pte->pte = FIELD_PREP(RISCV_IOMMU_MSIPTE_M, 3) |
+		   riscv_iommu_phys_to_ppn(addr) |
+		   FIELD_PREP(RISCV_IOMMU_MSIPTE_V, 1);
+	pte->mrif_info = 0;
+}
+
+static void riscv_iommu_ir_clear_pte(struct riscv_iommu_msipte *pte)
+{
+	pte->pte = 0;
+	pte->mrif_info = 0;
+}
+
 static void riscv_iommu_ir_msitbl_inval(struct riscv_iommu_domain *domain,
 					struct riscv_iommu_msipte *pte)
 {
@@ -149,19 +164,99 @@ static void riscv_iommu_ir_msitbl_inval(struct riscv_iommu_domain *domain,
 	rcu_read_unlock();
 }
 
+static void riscv_iommu_ir_msitbl_map(struct riscv_iommu_domain *domain, size_t idx,
+				      phys_addr_t addr)
+{
+	struct riscv_iommu_msipte *pte;
+
+	if (!domain->msi_root)
+		return;
+
+	if (!refcount_inc_not_zero(&domain->msi_pte_counts[idx])) {
+		scoped_guard(raw_spinlock_irqsave, &domain->msi_lock) {
+			if (refcount_read(&domain->msi_pte_counts[idx]) == 0) {
+				pte = &domain->msi_root[idx];
+				riscv_iommu_ir_set_pte(pte, addr);
+				riscv_iommu_ir_msitbl_inval(domain, pte);
+				refcount_set(&domain->msi_pte_counts[idx], 1);
+			} else {
+				refcount_inc(&domain->msi_pte_counts[idx]);
+			}
+		}
+	}
+}
+
+static void riscv_iommu_ir_msitbl_unmap(struct riscv_iommu_domain *domain, size_t idx)
+{
+	struct riscv_iommu_msipte *pte;
+
+	if (!domain->msi_root)
+		return;
+
+	scoped_guard(raw_spinlock_irqsave, &domain->msi_lock) {
+		if (refcount_dec_and_test(&domain->msi_pte_counts[idx])) {
+			pte = &domain->msi_root[idx];
+			riscv_iommu_ir_clear_pte(pte);
+			riscv_iommu_ir_msitbl_inval(domain, pte);
+		}
+	}
+}
+
+static size_t riscv_iommu_ir_get_msipte_idx_from_target(struct riscv_iommu_domain *domain,
+							struct irq_data *data, phys_addr_t *addr)
+{
+	struct msi_msg msg;
+
+	BUG_ON(irq_chip_compose_msi_msg(data, &msg));
+
+	*addr = ((phys_addr_t)msg.address_hi << 32) | msg.address_lo;
+
+	return riscv_iommu_ir_compute_msipte_idx(domain, *addr);
+}
+
+static int riscv_iommu_ir_irq_set_affinity(struct irq_data *data,
+					   const struct cpumask *dest, bool force)
+{
+	struct riscv_iommu_info *info = data->domain->host_data;
+	struct riscv_iommu_domain *domain = info->domain;
+	phys_addr_t old_addr, new_addr;
+	size_t old_idx, new_idx;
+	int ret;
+
+	old_idx = riscv_iommu_ir_get_msipte_idx_from_target(domain, data, &old_addr);
+
+	ret = irq_chip_set_affinity_parent(data, dest, force);
+	if (ret < 0)
+		return ret;
+
+	new_idx = riscv_iommu_ir_get_msipte_idx_from_target(domain, data, &new_addr);
+
+	if (new_idx == old_idx)
+		return ret;
+
+	riscv_iommu_ir_msitbl_unmap(domain, old_idx);
+	riscv_iommu_ir_msitbl_map(domain, new_idx, new_addr);
+
+	return ret;
+}
+
 static struct irq_chip riscv_iommu_ir_irq_chip = {
 	.name			= "IOMMU-IR",
 	.irq_ack		= irq_chip_ack_parent,
 	.irq_mask		= irq_chip_mask_parent,
 	.irq_unmask		= irq_chip_unmask_parent,
-	.irq_set_affinity	= irq_chip_set_affinity_parent,
+	.irq_set_affinity	= riscv_iommu_ir_irq_set_affinity,
 };
 
 static int riscv_iommu_ir_irq_domain_alloc_irqs(struct irq_domain *irqdomain,
 						unsigned int irq_base, unsigned int nr_irqs,
 						void *arg)
 {
+	struct riscv_iommu_info *info = irqdomain->host_data;
+	struct riscv_iommu_domain *domain = info->domain;
 	struct irq_data *data;
+	phys_addr_t addr;
+	size_t idx;
 	int i, ret;
 
 	ret = irq_domain_alloc_irqs_parent(irqdomain, irq_base, nr_irqs, arg);
@@ -171,14 +266,36 @@ static int riscv_iommu_ir_irq_domain_alloc_irqs(struct irq_domain *irqdomain,
 	for (i = 0; i < nr_irqs; i++) {
 		data = irq_domain_get_irq_data(irqdomain, irq_base + i);
 		data->chip = &riscv_iommu_ir_irq_chip;
+		idx = riscv_iommu_ir_get_msipte_idx_from_target(domain, data, &addr);
+		riscv_iommu_ir_msitbl_map(domain, idx, addr);
 	}
 
 	return 0;
 }
 
+static void riscv_iommu_ir_irq_domain_free_irqs(struct irq_domain *irqdomain,
+						unsigned int irq_base,
+						unsigned int nr_irqs)
+{
+	struct riscv_iommu_info *info = irqdomain->host_data;
+	struct riscv_iommu_domain *domain = info->domain;
+	struct irq_data *data;
+	phys_addr_t addr;
+	size_t idx;
+	int i;
+
+	for (i = 0; i < nr_irqs; i++) {
+		data = irq_domain_get_irq_data(irqdomain, irq_base + i);
+		idx = riscv_iommu_ir_get_msipte_idx_from_target(domain, data, &addr);
+		riscv_iommu_ir_msitbl_unmap(domain, idx);
+	}
+
+	irq_domain_free_irqs_parent(irqdomain, irq_base, nr_irqs);
+}
+
 static const struct irq_domain_ops riscv_iommu_ir_irq_domain_ops = {
 	.alloc = riscv_iommu_ir_irq_domain_alloc_irqs,
-	.free = irq_domain_free_irqs_parent,
+	.free = riscv_iommu_ir_irq_domain_free_irqs,
 };
 
 static const struct msi_parent_ops riscv_iommu_ir_msi_parent_ops = {
@@ -221,6 +338,19 @@ struct irq_domain *riscv_iommu_ir_irq_domain_create(struct riscv_iommu_device *i
 		return NULL;
 	}
 
+	if (iommu->caps & RISCV_IOMMU_CAPABILITIES_MSI_FLAT) {
+		/*
+		 * NOTE: The RISC-V IOMMU doesn't actually support isolated MSI because
+		 * there is no MSI message validation (see the comment above
+		 * msi_device_has_isolated_msi()). However, we claim isolated MSI here
+		 * because applying the IOMMU ensures MSI messages may only be delivered
+		 * to the mapped MSI addresses. This allows MSIs to be isolated to
+		 * particular harts/vcpus where the unvalidated MSI messages can be
+		 * tolerated.
+		 */
+		irqdomain->flags |= IRQ_DOMAIN_FLAG_ISOLATED_MSI;
+	}
+
 	irqdomain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
 	irqdomain->msi_parent_ops = &riscv_iommu_ir_msi_parent_ops;
 	irq_domain_update_bus_token(irqdomain, DOMAIN_BUS_MSI_REMAP);
@@ -233,6 +363,7 @@ struct irq_domain *riscv_iommu_ir_irq_domain_create(struct riscv_iommu_device *i
 static void riscv_iommu_ir_free_msi_table(struct riscv_iommu_domain *domain)
 {
 	iommu_free_pages(domain->msi_root);
+	kfree(domain->msi_pte_counts);
 }
 
 void riscv_iommu_ir_irq_domain_remove(struct riscv_iommu_info *info)
@@ -274,6 +405,14 @@ static int riscv_ir_set_imsic_global_config(struct riscv_iommu_device *iommu,
 							     nr_ptes * sizeof(*domain->msi_root));
 		if (!domain->msi_root)
 			return -ENOMEM;
+
+		domain->msi_pte_counts = kcalloc(nr_ptes, sizeof(refcount_t), GFP_KERNEL_ACCOUNT);
+		if (!domain->msi_pte_counts) {
+			iommu_free_pages(domain->msi_root);
+			return -ENOMEM;
+		}
+
+		raw_spin_lock_init(&domain->msi_lock);
 	}
 
 	return 0;
diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
index 1fe35f1210fb..aeb5642f003c 100644
--- a/drivers/iommu/riscv/iommu.h
+++ b/drivers/iommu/riscv/iommu.h
@@ -34,6 +34,8 @@ struct riscv_iommu_domain {
 	unsigned int pgd_mode;
 	unsigned long *pgd_root;
 	struct riscv_iommu_msipte *msi_root;
+	refcount_t *msi_pte_counts;
+	raw_spinlock_t msi_lock;
 	u64 msi_addr_mask;
 	u64 msi_addr_pattern;
 	u32 group_index_bits;
-- 
2.49.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ