lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20221124230505.073418677@linutronix.de>
Date:   Fri, 25 Nov 2022 00:25:45 +0100 (CET)
From:   Thomas Gleixner <tglx@...utronix.de>
To:     LKML <linux-kernel@...r.kernel.org>
Cc:     x86@...nel.org, Joerg Roedel <joro@...tes.org>,
        Will Deacon <will@...nel.org>, linux-pci@...r.kernel.org,
        Bjorn Helgaas <bhelgaas@...gle.com>,
        Lorenzo Pieralisi <lorenzo.pieralisi@....com>,
        Marc Zyngier <maz@...nel.org>,
        Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        Jason Gunthorpe <jgg@...lanox.com>,
        Dave Jiang <dave.jiang@...el.com>,
        Alex Williamson <alex.williamson@...hat.com>,
        Kevin Tian <kevin.tian@...el.com>,
        Dan Williams <dan.j.williams@...el.com>,
        Logan Gunthorpe <logang@...tatee.com>,
        Ashok Raj <ashok.raj@...el.com>, Jon Mason <jdmason@...zu.us>,
        Allen Hubbe <allenbh@...il.com>
Subject: [patch V3 00/33] genirq, PCI/MSI: Support for per device MSI and
 PCI/IMS - Part 3 implementation

This is V3 of the third part of the effort to provide support for per device
MSI interrupt domains.

Version 2 of this part can be found here:

  https://lore.kernel.org/all/20221121083657.157152924@linutronix.de

This is based on the second part which is available here:

  https://lore.kernel.org/all/20221124225331.464480443@linutronix.de

and from git:

  git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git devmsi-v3-part2

This third part is available from git too:

  git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git devmsi-v3-part3

This part provides the actual per device domain implementation and related
functionality:

  1) Provide infrastructure to create and remove per device MSI domains

  2) Implement per device MSI domains in the PCI/MSI code and make
     them conditional on the availability of a suitable parent MSI
     domain. This allows to convert the existing domains one by one
     and keeps both the legacy and the current "global" PCI/MSI domain
     model working.

  3) Convert the related x86 MSI domains over (vector and remapping).

  4) Provide core infrastructure for dynamic allocations

  5) Provide PCI/MSI-X interfaces for device drivers to do post
     MSI-X enable allocation/free

  6) Enable dynamic allocation support on the x86 MSI parent domains

  7) Provide infrastructure to create PCI/IMS domains

  8) Enable IMS support on the x86 MSI parent domains

  9) Provide a driver for IDXD which demonstrates how IMS domains
     look like.

Changes vs. v2:

  - Rework the domain size initialization and handling (Kevin)

  - Enable IMS only when on real hardware (Kevin)

  - Rename the PCI/MSI irqchip functions (Kevin)
  
  - Update change logs and comments (Kevin)
   
The delta patch vs. V3 is attached below. It's not completely accurate as
it has some changes from part 2 intermingled, but you get the idea.

Thanks,

	tglx
---
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 1eb9f9eb4852..4d28967f910d 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -3656,6 +3656,13 @@ static const struct msi_parent_ops amdvi_msi_parent_ops = {
 	.init_dev_msi_info	= msi_parent_init_dev_msi_info,
 };
 
+static const struct msi_parent_ops virt_amdvi_msi_parent_ops = {
+	.supported_flags	= X86_VECTOR_MSI_FLAGS_SUPPORTED |
+				  MSI_FLAG_MULTI_PCI_MSI,
+	.prefix			= "vIR-",
+	.init_dev_msi_info	= msi_parent_init_dev_msi_info,
+};
+
 int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
 {
 	struct fwnode_handle *fn;
@@ -3672,7 +3679,11 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
 
 	irq_domain_update_bus_token(iommu->ir_domain,  DOMAIN_BUS_AMDVI);
 	iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
-	iommu->ir_domain->msi_parent_ops = &amdvi_msi_parent_ops;
+
+	if (amd_iommu_np_cache)
+		iommu->ir_domain->msi_parent_ops = &virt_amdvi_msi_parent_ops;
+	else
+		iommu->ir_domain->msi_parent_ops = &amdvi_msi_parent_ops;
 
 	return 0;
 }
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index f3ee93be9032..a723f53ba472 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -82,7 +82,7 @@ static const struct irq_domain_ops intel_ir_domain_ops;
 
 static void iommu_disable_irq_remapping(struct intel_iommu *iommu);
 static int __init parse_ioapics_under_ir(void);
-static const struct msi_parent_ops dmar_msi_parent_ops;
+static const struct msi_parent_ops dmar_msi_parent_ops, virt_dmar_msi_parent_ops;
 
 static bool ir_pre_enabled(struct intel_iommu *iommu)
 {
@@ -577,7 +577,11 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
 
 	irq_domain_update_bus_token(iommu->ir_domain,  DOMAIN_BUS_DMAR);
 	iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
-	iommu->ir_domain->msi_parent_ops = &dmar_msi_parent_ops;
+
+	if (cap_caching_mode(iommu->cap))
+		iommu->ir_domain->msi_parent_ops = &virt_dmar_msi_parent_ops;
+	else
+		iommu->ir_domain->msi_parent_ops = &dmar_msi_parent_ops;
 
 	ir_table->base = page_address(pages);
 	ir_table->bitmap = bitmap;
@@ -1436,6 +1440,13 @@ static const struct msi_parent_ops dmar_msi_parent_ops = {
 	.init_dev_msi_info	= msi_parent_init_dev_msi_info,
 };
 
+static const struct msi_parent_ops virt_dmar_msi_parent_ops = {
+	.supported_flags	= X86_VECTOR_MSI_FLAGS_SUPPORTED |
+				  MSI_FLAG_MULTI_PCI_MSI,
+	.prefix			= "vIR-",
+	.init_dev_msi_info	= msi_parent_init_dev_msi_info,
+};
+
 /*
  * Support of Interrupt Remapping Unit Hotplug
  */
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 622f0fd5f829..9eec1ec19917 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -699,7 +699,7 @@ config PCI_INTEL_IDXD_IMS
 	tristate "Intel IDXD Interrupt Message Store controller"
 	depends on PCI_MSI
 	help
-	  Support for Intel IDXD IMS Interrupt Message Store controller
+	  Support for Intel IDXD Interrupt Message Store (IMS) controller
 	  with IMS slot storage in a slot array in device memory
 
 endmenu
diff --git a/drivers/irqchip/irq-pci-intel-idxd.c b/drivers/irqchip/irq-pci-intel-idxd.c
index 509450b08f47..d33c32787ad5 100644
--- a/drivers/irqchip/irq-pci-intel-idxd.c
+++ b/drivers/irqchip/irq-pci-intel-idxd.c
@@ -128,7 +128,7 @@ static const struct msi_domain_template idxd_ims_template = {
 /**
  * pci_intel_idxd_create_ims_domain - Create a IDXD IMS domain
  * @pdev:	IDXD PCI device to operate on
- * @slots:	Pointer to the mapped slot memory arrray
+ * @slots:	Pointer to the mapped slot memory array
  * @nr_slots:	The number of slots in the array
  *
  * Returns: True on success, false otherwise
diff --git a/drivers/pci/msi/api.c b/drivers/pci/msi/api.c
index dfcaa77108de..d60585875009 100644
--- a/drivers/pci/msi/api.c
+++ b/drivers/pci/msi/api.c
@@ -141,7 +141,7 @@ EXPORT_SYMBOL_GPL(pci_msix_can_alloc_dyn);
  * Return: A struct msi_map
  *
  *	On success msi_map::index contains the allocated index (>= 0) and
- *	msi_map::virq the allocated Linux interrupt number (> 0).
+ *	msi_map::virq contains the allocated Linux interrupt number (> 0).
  *
  *	On fail msi_map::index contains the error code and msi_map::virq
  *	is set to 0.
@@ -376,7 +376,7 @@ EXPORT_SYMBOL(pci_irq_get_affinity);
  * the index - if there is a hardware table - or in case of purely software
  * managed IMS implementation the association happens via the
  * irq_write_msi_msg() callback of the implementation specific interrupt
- * chip, which utilizes the provided @cookie to store the MSI message in
+ * chip, which utilizes the provided @icookie to store the MSI message in
  * the appropriate place.
  *
  * Return: A struct msi_map
diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c
index ad235d1b0f35..687a986365f9 100644
--- a/drivers/pci/msi/irqdomain.c
+++ b/drivers/pci/msi/irqdomain.c
@@ -148,14 +148,14 @@ static void pci_device_domain_set_desc(msi_alloc_info_t *arg, struct msi_desc *d
 	arg->hwirq = desc->msi_index;
 }
 
-static void pci_mask_msi(struct irq_data *data)
+static void pci_irq_mask_msi(struct irq_data *data)
 {
 	struct msi_desc *desc = irq_data_get_msi_desc(data);
 
 	pci_msi_mask(desc, BIT(data->irq - desc->irq));
 }
 
-static void pci_unmask_msi(struct irq_data *data)
+static void pci_irq_unmask_msi(struct irq_data *data)
 {
 	struct msi_desc *desc = irq_data_get_msi_desc(data);
 
@@ -176,8 +176,8 @@ static void pci_unmask_msi(struct irq_data *data)
 static struct msi_domain_template pci_msi_template = {
 	.chip = {
 		.name			= "PCI-MSI",
-		.irq_mask		= pci_mask_msi,
-		.irq_unmask		= pci_unmask_msi,
+		.irq_mask		= pci_irq_mask_msi,
+		.irq_unmask		= pci_irq_unmask_msi,
 		.irq_write_msi_msg	= pci_msi_domain_write_msg,
 		.flags			= IRQCHIP_ONESHOT_SAFE,
 	},
@@ -192,12 +192,12 @@ static struct msi_domain_template pci_msi_template = {
 	},
 };
 
-static void pci_mask_msix(struct irq_data *data)
+static void pci_irq_mask_msix(struct irq_data *data)
 {
 	pci_msix_mask(irq_data_get_msi_desc(data));
 }
 
-static void pci_unmask_msix(struct irq_data *data)
+static void pci_irq_unmask_msix(struct irq_data *data)
 {
 	pci_msix_unmask(irq_data_get_msi_desc(data));
 }
@@ -213,8 +213,8 @@ static void pci_msix_prepare_desc(struct irq_domain *domain, msi_alloc_info_t *a
 static struct msi_domain_template pci_msix_template = {
 	.chip = {
 		.name			= "PCI-MSIX",
-		.irq_mask		= pci_mask_msix,
-		.irq_unmask		= pci_unmask_msix,
+		.irq_mask		= pci_irq_mask_msix,
+		.irq_unmask		= pci_irq_unmask_msix,
 		.irq_write_msi_msg	= pci_msi_domain_write_msg,
 		.flags			= IRQCHIP_ONESHOT_SAFE,
 	},
@@ -302,7 +302,7 @@ bool pci_setup_msi_device_domain(struct pci_dev *pdev)
  */
 bool pci_setup_msix_device_domain(struct pci_dev *pdev, unsigned int hwsize)
 {
-	if (WARN_ON_ONCE(pdev->msix_enabled))
+	if (WARN_ON_ONCE(pdev->msi_enabled))
 		return false;
 
 	if (pci_match_device_domain(pdev, DOMAIN_BUS_PCI_DEVICE_MSIX))
diff --git a/include/linux/msi.h b/include/linux/msi.h
index f73d20ccd552..a112b913fff9 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -455,7 +463,10 @@ struct msi_domain_ops {
  * struct msi_domain_info - MSI interrupt domain data
  * @flags:		Flags to decribe features and capabilities
  * @bus_token:		The domain bus token
- * @hwsize:		The hardware table size (0 if unknown/unlimited)
+ * @hwsize:		The hardware table size or the software index limit.
+ *			If 0 then the size is considered unlimited and
+ *			gets initialized to the maximum software index limit
+ *			by the domain creation code.
  * @ops:		The callback data structure
  * @chip:		Optional: associated interrupt chip
  * @chip_data:		Optional: associated interrupt chip data
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 43bc0c6d66ec..c39f75e473ea 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -118,30 +85,22 @@ static int msi_insert_desc(struct device *dev, struct msi_desc *desc,
 			   unsigned int domid, unsigned int index)
 {
 	struct msi_device_data *md = dev->msi.data;
+	struct xarray *xa = &md->__domains[domid].store;
 	unsigned int hwsize;
-	int baseidx, ret;
-
-	baseidx = msi_get_domain_base_index(dev, domid);
-	if (baseidx < 0) {
-		ret = baseidx;
-		goto fail;
-	}
+	int ret;
 
 	hwsize = msi_domain_get_hwsize(dev, domid);
 
 	if (index == MSI_ANY_INDEX) {
-		struct xa_limit limit;
+		struct xa_limit limit = { .min = 0, .max = hwsize - 1 };
 		unsigned int index;
 
-		limit.min = baseidx;
-		limit.max = baseidx + hwsize - 1;
-
-		/* Let the xarray allocate a free index within the limits */
-		ret = xa_alloc(&md->__store, &index, desc, limit, GFP_KERNEL);
+		/* Let the xarray allocate a free index within the limit */
+		ret = xa_alloc(xa, &index, desc, limit, GFP_KERNEL);
 		if (ret)
 			goto fail;
 
-		desc->msi_index = index - baseidx;
+		desc->msi_index = index;
 		return 0;
 	} else {
 		if (index >= hwsize) {
@@ -150,8 +109,7 @@ static int msi_insert_desc(struct device *dev, struct msi_desc *desc,
 		}
 
 		desc->msi_index = index;
-		index += baseidx;
-		ret = xa_insert(&md->__store, index, desc, GFP_KERNEL);
+		ret = xa_insert(xa, index, desc, GFP_KERNEL);
 		if (ret)
 			goto fail;
 		return 0;
@@ -313,10 +268,13 @@ EXPORT_SYMBOL_GPL(get_cached_msi_msg);
 static void msi_device_data_release(struct device *dev, void *res)
 {
 	struct msi_device_data *md = res;
+	int i;
 
-	msi_remove_device_irqdomains(dev, md);
-	WARN_ON_ONCE(!xa_empty(&md->__store));
-	xa_destroy(&md->__store);
+	for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++) {
+		msi_remove_device_irq_domain(dev, i);
+		WARN_ON_ONCE(!xa_empty(&md->__domains[i].store));
+		xa_destroy(&md->__domains[i].store);
+	}
 	dev->msi.data = NULL;
 }
 
@@ -348,11 +306,19 @@ int msi_setup_device_data(struct device *dev)
 		return ret;
 	}
 
-	msi_setup_default_irqdomain(dev, md);
+	for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++)
+		xa_init_flags(&md->__domains[i].store, XA_FLAGS_ALLOC);
+
+	/*
+	 * If @dev::msi::domain is set and is a global MSI domain, copy the
+	 * pointer into the domain array so all code can operate on domain
+	 * ids. The NULL pointer check is required to keep the legacy
+	 * architecture specific PCI/MSI support working.
+	 */
+	if (dev->msi.domain && !irq_domain_is_msi_parent(dev->msi.domain))
+		md->__domains[MSI_DEFAULT_DOMAIN].domain = dev->msi.domain;
 
-	xa_init_flags(&md->__store, XA_FLAGS_ALLOC);
 	mutex_init(&md->mutex);
-	md->__iter_idx = MSI_XA_MAX_INDEX;
 	dev->msi.data = md;
 	devres_add(dev, md);
 	return 0;
@@ -631,7 +592,7 @@ static struct irq_domain *msi_get_device_domain(struct device *dev, unsigned int
 	if (WARN_ON_ONCE(domid >= MSI_MAX_DEVICE_IRQDOMAINS))
 		return NULL;
 
-	domain = dev->msi.data->__irqdomains[domid];
+	domain = dev->msi.data->__domains[domid].domain;
 	if (!domain)
 		return NULL;
 
@@ -646,18 +607,13 @@ static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid
 	struct msi_domain_info *info;
 	struct irq_domain *domain;
 
-	/*
-	 * Retrieve the MSI domain for range checking. If there is no
-	 * domain or the domain is not a per device domain, then assume
-	 * full MSI range and pray that the calling subsystem knows what it
-	 * is doing.
-	 */
 	domain = msi_get_device_domain(dev, domid);
-	if (domain && irq_domain_is_msi_device(domain)) {
+	if (domain) {
 		info = domain->host_data;
 		return info->hwsize;
 	}
-	return MSI_MAX_INDEX + 1;
+	/* No domain, no size... */
+	return 0;
 }
 
 static inline void irq_chip_write_msi_msg(struct irq_data *data,
@@ -858,6 +814,17 @@ static struct irq_domain *__msi_create_irq_domain(struct fwnode_handle *fwnode,
 {
 	struct irq_domain *domain;
 
+	if (info->hwsize > MSI_XA_DOMAIN_SIZE)
+		return NULL;
+
+	/*
+	 * Hardware size 0 is valid for backwards compatibility and for
+	 * domains which are not backed by a hardware table. Grant the
+	 * maximum index space.
+	 */
+	if (!info->hwsize)
+		info->hwsize = MSI_XA_DOMAIN_SIZE;
+
 	msi_domain_update_dom_ops(info);
 	if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
 		msi_domain_update_chip_ops(info);
@@ -997,7 +964,7 @@ bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
 	if (!bundle)
 		return false;
 
-	bundle->info.hwsize = hwsize ? hwsize : MSI_MAX_INDEX;
+	bundle->info.hwsize = hwsize;
 	bundle->info.chip = &bundle->chip;
 	bundle->info.ops = &bundle->ops;
 	bundle->info.data = domain_data;
@@ -1028,7 +995,7 @@ bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
 		goto fail;
 
 	domain->dev = dev;
-	dev->msi.data->__irqdomains[domid] = domain;
+	dev->msi.data->__domains[domid].domain = domain;
 	msi_unlock_descs(dev);
 	return true;
 
@@ -1058,7 +1025,7 @@ void msi_remove_device_irq_domain(struct device *dev, unsigned int domid)
 	if (!domain || !irq_domain_is_msi_device(domain))
 		goto unlock;
 
-	dev->msi.data->__irqdomains[domid] = NULL;
+	dev->msi.data->__domains[domid].domain = NULL;
 	info = domain->host_data;
 	irq_domain_remove(domain);
 	kfree(container_of(info, struct msi_domain_template, info));
@@ -1112,9 +1079,10 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
 		.last	= virq_base + nvec - 1,
 	};
 	struct msi_desc *desc;
+	struct xarray *xa;
 	int ret, virq;
 
-	if (!msi_ctrl_range_valid(dev, &ctrl))
+	if (!msi_ctrl_valid(dev, &ctrl))
 		return -EINVAL;
 
 	msi_lock_descs(dev);
@@ -1122,8 +1090,10 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
 	if (ret)
 		goto unlock;
 
+	xa = &dev->msi.data->__domains[ctrl.domid].store;
+
 	for (virq = virq_base; virq < virq_base + nvec; virq++) {
-		desc = xa_load(&dev->msi.data->__store, virq);
+		desc = xa_load(xa, virq);
 		desc->irq = virq;
 
 		ops->set_desc(arg, desc);
@@ -1257,8 +1227,8 @@ static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflag
 static int __msi_domain_alloc_irqs(struct device *dev, struct irq_domain *domain,
 				   struct msi_ctrl *ctrl)
 {
+	struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store;
 	struct msi_domain_info *info = domain->host_data;
-	struct xarray *xa = &dev->msi.data->__store;
 	struct msi_domain_ops *ops = info->ops;
 	unsigned int vflags = 0, allocated = 0;
 	msi_alloc_info_t arg = { };
@@ -1347,7 +1317,7 @@ static int __msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl)
 	struct irq_domain *domain;
 	int ret;
 
-	if (!msi_ctrl_range_valid(dev, ctrl))
+	if (!msi_ctrl_valid(dev, ctrl))
 		return -EINVAL;
 
 	domain = msi_get_device_domain(dev, ctrl->domid);
@@ -1527,16 +1497,14 @@ struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, u
 static void __msi_domain_free_irqs(struct device *dev, struct irq_domain *domain,
 				   struct msi_ctrl *ctrl)
 {
+	struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store;
 	struct msi_domain_info *info = domain->host_data;
-	struct xarray *xa = &dev->msi.data->__store;
 	struct irq_data *irqd;
 	struct msi_desc *desc;
 	unsigned long idx;
-	int i, base;
-
-	base = ctrl->domid * MSI_XA_DOMAIN_SIZE;
+	int i;
 
-	xa_for_each_range(xa, idx, desc, ctrl->first + base, ctrl->last + base) {
+	xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
 		/* Only handle MSI entries which have an interrupt associated */
 		if (!msi_desc_match(desc, MSI_DESC_ASSOCIATED))
 			continue;
@@ -1561,7 +1529,7 @@ static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl)
 	struct msi_domain_ops *ops;
 	struct irq_domain *domain;
 
-	if (!msi_ctrl_range_valid(dev, ctrl))
+	if (!msi_ctrl_valid(dev, ctrl))
 		return;
 
 	domain = msi_get_device_domain(dev, ctrl->domid);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ