[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <32ce256a2ece5d63e99d5858f953586859818ffc.1760312725.git.nicolinc@nvidia.com>
Date: Sun, 12 Oct 2025 17:04:59 -0700
From: Nicolin Chen <nicolinc@...dia.com>
To: <joro@...tes.org>, <jgg@...dia.com>, <kevin.tian@...el.com>
CC: <suravee.suthikulpanit@....com>, <will@...nel.org>,
<robin.murphy@....com>, <sven@...nel.org>, <j@...nau.net>,
<jean-philippe@...aro.org>, <robin.clark@....qualcomm.com>,
<dwmw2@...radead.org>, <baolu.lu@...ux.intel.com>, <yong.wu@...iatek.com>,
<matthias.bgg@...il.com>, <angelogioacchino.delregno@...labora.com>,
<tjeznach@...osinc.com>, <pjw@...nel.org>, <palmer@...belt.com>,
<aou@...s.berkeley.edu>, <heiko@...ech.de>, <schnelle@...ux.ibm.com>,
<mjrosato@...ux.ibm.com>, <wens@...e.org>, <jernej.skrabec@...il.com>,
<samuel@...lland.org>, <thierry.reding@...il.com>, <jonathanh@...dia.com>,
<iommu@...ts.linux.dev>, <linux-kernel@...r.kernel.org>,
<asahi@...ts.linux.dev>, <linux-arm-kernel@...ts.infradead.org>,
<linux-arm-msm@...r.kernel.org>, <linux-mediatek@...ts.infradead.org>,
<linux-riscv@...ts.infradead.org>, <linux-rockchip@...ts.infradead.org>,
<linux-s390@...r.kernel.org>, <linux-sunxi@...ts.linux.dev>,
<linux-tegra@...r.kernel.org>, <virtualization@...ts.linux.dev>,
<patches@...ts.linux.dev>
Subject: [PATCH v1 02/20] iommu: Introduce a test_dev domain op and an internal helper
Add a new test_dev domain op for driver to test the compatibility between
a domain and a device at the driver level, before calling into the actual
attachment/replacement of a domain. Support pasid for set_dev_pasid call.
Move existing core-level compatibility tests to a helper function. Invoke
it prior to:
* __iommu_attach_device() or its wrapper __iommu_device_set_domain()
* __iommu_set_group_pasid()
And keep them within the group->mutex, so drivers can simply move all the
sanity and compatibility tests from their attach_dev callbacks to the new
test_dev callbacks without concerning about a race condition.
This may be a public API someday for VFIO/IOMMUFD to run a list of attach
tests without doing any actual attachment, which may result in a list of
failed tests. So encourage drivers to avoid printks to prevent kernel log
spam.
Suggested-by: Jason Gunthorpe <jgg@...dia.com>
Signed-off-by: Nicolin Chen <nicolinc@...dia.com>
---
include/linux/iommu.h | 17 +++++--
drivers/iommu/iommu.c | 111 ++++++++++++++++++++++++++++++------------
2 files changed, 93 insertions(+), 35 deletions(-)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 801b2bd9e8d49..2ec99502dc29c 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -714,7 +714,12 @@ struct iommu_ops {
/**
* struct iommu_domain_ops - domain specific operations
- * @attach_dev: attach an iommu domain to a device
+ * @test_dev: Test compatibility prior to an @attach_dev or @set_dev_pasid call.
+ * A driver-level callback of this op should do a thorough sanity, to
+ * make sure a device is compatible with the domain. So the following
+ * @attach_dev and @set_dev_pasid functions would likely succeed with
+ * only one exception due to a temporary failure like out of memory.
+ * It's suggested to avoid the kernel prints in this op.
* Return:
* * 0 - success
* * EINVAL - can indicate that device and domain are incompatible due to
@@ -722,11 +727,15 @@ struct iommu_ops {
* driver shouldn't log an error, since it is legitimate for a
* caller to test reuse of existing domains. Otherwise, it may
* still represent some other fundamental problem
- * * ENOMEM - out of memory
- * * ENOSPC - non-ENOMEM type of resource allocation failures
* * EBUSY - device is attached to a domain and cannot be changed
* * ENODEV - device specific errors, not able to be attached
* * <others> - treated as ENODEV by the caller. Use is discouraged
+ * @attach_dev: attach an iommu domain to a device
+ * Return:
+ * * 0 - success
+ * * ENOMEM - out of memory
+ * * ENOSPC - non-ENOMEM type of resource allocation failures
+ * * <others> - Use is discouraged
* @set_dev_pasid: set or replace an iommu domain to a pasid of device. The pasid of
* the device should be left in the old config in error case.
* @map_pages: map a physically contiguous set of pages of the same size to
@@ -751,6 +760,8 @@ struct iommu_ops {
* @free: Release the domain after use.
*/
struct iommu_domain_ops {
+ int (*test_dev)(struct iommu_domain *domain, struct device *dev,
+ ioasid_t pasid, struct iommu_domain *old);
int (*attach_dev)(struct iommu_domain *domain, struct device *dev,
struct iommu_domain *old);
int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 170e522b5bda4..95f0e2898b6b5 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -99,6 +99,9 @@ static int bus_iommu_probe(const struct bus_type *bus);
static int iommu_bus_notifier(struct notifier_block *nb,
unsigned long action, void *data);
static void iommu_release_device(struct device *dev);
+static int __iommu_domain_test_device(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old);
static int __iommu_attach_device(struct iommu_domain *domain,
struct device *dev, struct iommu_domain *old);
static int __iommu_attach_group(struct iommu_domain *domain,
@@ -642,6 +645,10 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
if (group->default_domain)
iommu_create_device_direct_mappings(group->default_domain, dev);
if (group->domain) {
+ ret = __iommu_domain_test_device(group->domain, dev,
+ IOMMU_NO_PASID, NULL);
+ if (ret)
+ goto err_remove_gdev;
ret = __iommu_device_set_domain(group, dev, group->domain, NULL,
0);
if (ret)
@@ -2185,6 +2192,8 @@ EXPORT_SYMBOL_GPL(iommu_attach_device);
int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain)
{
+ int ret;
+
/*
* This is called on the dma mapping fast path so avoid locking. This is
* racy, but we have an expectation that the driver will setup its DMAs
@@ -2195,6 +2204,10 @@ int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain)
guard(mutex)(&dev->iommu_group->mutex);
+ ret = __iommu_domain_test_device(domain, dev, IOMMU_NO_PASID, NULL);
+ if (ret)
+ return ret;
+
return __iommu_attach_device(domain, dev, NULL);
}
@@ -2262,6 +2275,53 @@ static bool domain_iommu_ops_compatible(const struct iommu_ops *ops,
return false;
}
+/*
+ * Test if a future attach for a domain to the device will always fail. This may
+ * indicate that the device and the domain are incompatible in some way. Actual
+ * attach may still fail for some temporary failure like out of memory.
+ *
+ * If pasid != IOMMU_NO_PASID, it is meant to test a future set_dev_pasid call.
+ */
+static int __iommu_domain_test_device(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old)
+{
+ const struct iommu_ops *ops = dev_iommu_ops(dev);
+ struct iommu_group *group = dev->iommu_group;
+
+ lockdep_assert_held(&group->mutex);
+
+ if (!domain_iommu_ops_compatible(ops, domain))
+ return -EINVAL;
+
+ if (pasid != IOMMU_NO_PASID) {
+ struct group_device *gdev;
+
+ if (!domain->ops->set_dev_pasid || !ops->blocked_domain ||
+ !ops->blocked_domain->ops->set_dev_pasid)
+ return -EOPNOTSUPP;
+ /*
+ * Skip PASID validation for devices without PASID support
+ * (max_pasids = 0). These devices cannot issue transactions
+ * with PASID, so they don't affect group's PASID usage.
+ */
+ for_each_group_device(group, gdev) {
+ if (gdev->dev->iommu->max_pasids > 0 &&
+ pasid >= gdev->dev->iommu->max_pasids)
+ return -EINVAL;
+ }
+ }
+
+ /*
+ * Domains that don't implement a test_dev callback function must have a
+ * simple domain attach behavior. The sanity above should be enough.
+ */
+ if (!domain->ops->test_dev)
+ return 0;
+
+ return domain->ops->test_dev(domain, dev, pasid, old);
+}
+
static int __iommu_attach_group(struct iommu_domain *domain,
struct iommu_group *group)
{
@@ -2272,8 +2332,7 @@ static int __iommu_attach_group(struct iommu_domain *domain,
return -EBUSY;
dev = iommu_group_first_dev(group);
- if (!dev_has_iommu(dev) ||
- !domain_iommu_ops_compatible(dev_iommu_ops(dev), domain))
+ if (!dev_has_iommu(dev))
return -EINVAL;
return __iommu_group_set_domain(group, domain);
@@ -2381,6 +2440,13 @@ static int __iommu_group_set_domain_internal(struct iommu_group *group,
if (WARN_ON(!new_domain))
return -EINVAL;
+ for_each_group_device(group, gdev) {
+ ret = __iommu_domain_test_device(new_domain, gdev->dev,
+ IOMMU_NO_PASID, group->domain);
+ if (ret)
+ return ret;
+ }
+
/*
* Changing the domain is done by calling attach_dev() on the new
* domain. This switch does not have to be atomic and DMA can be
@@ -3479,38 +3545,19 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
{
/* Caller must be a probed driver on dev */
struct iommu_group *group = dev->iommu_group;
- struct group_device *device;
- const struct iommu_ops *ops;
void *entry;
int ret;
if (!group)
return -ENODEV;
-
- ops = dev_iommu_ops(dev);
-
- if (!domain->ops->set_dev_pasid ||
- !ops->blocked_domain ||
- !ops->blocked_domain->ops->set_dev_pasid)
- return -EOPNOTSUPP;
-
- if (!domain_iommu_ops_compatible(ops, domain) ||
- pasid == IOMMU_NO_PASID)
+ if (pasid == IOMMU_NO_PASID)
return -EINVAL;
mutex_lock(&group->mutex);
- for_each_group_device(group, device) {
- /*
- * Skip PASID validation for devices without PASID support
- * (max_pasids = 0). These devices cannot issue transactions
- * with PASID, so they don't affect group's PASID usage.
- */
- if ((device->dev->iommu->max_pasids > 0) &&
- (pasid >= device->dev->iommu->max_pasids)) {
- ret = -EINVAL;
- goto out_unlock;
- }
- }
+
+ ret = __iommu_domain_test_device(domain, dev, pasid, NULL);
+ if (ret)
+ goto out_unlock;
entry = iommu_make_pasid_array_entry(domain, handle);
@@ -3573,12 +3620,7 @@ int iommu_replace_device_pasid(struct iommu_domain *domain,
if (!group)
return -ENODEV;
-
- if (!domain->ops->set_dev_pasid)
- return -EOPNOTSUPP;
-
- if (!domain_iommu_ops_compatible(dev_iommu_ops(dev), domain) ||
- pasid == IOMMU_NO_PASID || !handle)
+ if (pasid == IOMMU_NO_PASID || !handle)
return -EINVAL;
mutex_lock(&group->mutex);
@@ -3615,6 +3657,11 @@ int iommu_replace_device_pasid(struct iommu_domain *domain,
ret = 0;
if (curr_domain != domain) {
+ ret = __iommu_domain_test_device(domain, dev, pasid,
+ curr_domain);
+ if (ret)
+ goto out_unlock;
+
ret = __iommu_set_group_pasid(domain, group,
pasid, curr_domain);
if (ret)
--
2.43.0
Powered by blists - more mailing lists