[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250218135751.GH3696814@ziepe.ca>
Date: Tue, 18 Feb 2025 09:57:51 -0400
From: Jason Gunthorpe <jgg@...pe.ca>
To: Baolu Lu <baolu.lu@...ux.intel.com>
Cc: Zhangfei Gao <zhangfei.gao@...aro.org>, Joerg Roedel <joro@...tes.org>,
Will Deacon <will@...nel.org>, Robin Murphy <robin.murphy@....com>,
Kevin Tian <kevin.tian@...el.com>,
Fenghua Yu <fenghua.yu@...el.com>,
Dave Jiang <dave.jiang@...el.com>, Vinod Koul <vkoul@...nel.org>,
Zhou Wang <wangzhou1@...ilicon.com>, iommu@...ts.linux.dev,
linux-kernel@...r.kernel.org,
Shameerali Kolothum Thodi <shameerali.kolothum.thodi@...wei.com>
Subject: Re: [PATCH 00/12] iommu: Remove IOMMU_DEV_FEAT_SVA/_IOPF
On Tue, Feb 18, 2025 at 10:57:30AM +0800, Baolu Lu wrote:
> > > > [ 304.961340] __iommu_attach_device+0x2c/0x110
> > > > [ 304.961343] __iommu_device_set_domain.isra.0+0x78/0xe0
> > > > [ 304.961345] __iommu_group_set_domain_internal+0x78/0x160
> > > > [ 304.961347] iommu_replace_group_handle+0x9c/0x150
> > > > [ 304.961350] iommufd_fault_domain_replace_dev+0x88/0x120
> > > > [ 304.961353] iommufd_device_do_replace+0x190/0x3c0
> > > > [ 304.961355] iommufd_device_change_pt+0x270/0x688
> > > > [ 304.961357] iommufd_device_replace+0x20/0x38
> > > > [ 304.961359] vfio_iommufd_physical_attach_ioas+0x30/0x78
> > > > [ 304.961363] vfio_df_ioctl_attach_pt+0xa8/0x188
> > > > [ 304.961366] vfio_device_fops_unl_ioctl+0x310/0x990
> > > >
> > > >
> > > > When page fault triggers:
> > > >
> > > > [ 1016.383578] ------------[ cut here ]-----------
> > > > [ 1016.388184] WARNING: CPU: 35 PID: 717 at
> > > > drivers/iommu/io-pgfault.c:231 iommu_report_device_fault+0x2c8/0x470
> > > It's likely that iopf_queue_add_device() was not called for this device.
> > iopf_queue_add_device is called, but quickly iopf_queue_remove_device
> > is called during guest bootup.
> > Then fault_param is set to NULL.
> >
> > arm_smmu_attach_commit
> > arm_smmu_remove_master_domain
> > // newly added in the first patch
> > if (master_domain) {
> > if (master_domain->using_iopf)
>
> It seems the above check is incorrect. We only need to disable iopf when
> an iopf-capable domain is about to be removed. Will the following
> additional change make any difference?
The check looks right, it should only disable if it was enabled? The
refcounting is what keep track of the 'about to be removed' and it
should be that using_iopf and domain->iopf_handler are mostly the
same.
Hmm, I think the issue is related to nested
to_smmu_domain_devices() returns the S2 parent for the nesting domain
always
Which means the smmu_domain->devices list (on the s2) will end up with
two entries for the same SID during the replace operation at VM boot,
one with faulting and one without.
I think that arm_smmu_remove_master_domain() will end up removing the
wrong master_domain because arm_smmu_find_master_domain() can't tell
the two apart.
When I wrote this there was no nested and the list devices list was
unique to each domain, so everything inside was the same.
Like below?
Jason
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index b14f1d0ee7076b..dc8708b414468e 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2710,10 +2710,9 @@ static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
pci_disable_pasid(pdev);
}
-static struct arm_smmu_master_domain *
-arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain,
- struct arm_smmu_master *master,
- ioasid_t ssid, bool nested_ats_flush)
+static struct arm_smmu_master_domain *arm_smmu_find_master_domain(
+ struct arm_smmu_domain *smmu_domain, struct iommu_domain *domain,
+ struct arm_smmu_master *master, ioasid_t ssid, bool nested_ats_flush)
{
struct arm_smmu_master_domain *master_domain;
@@ -2722,6 +2721,7 @@ arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain,
list_for_each_entry(master_domain, &smmu_domain->devices,
devices_elm) {
if (master_domain->master == master &&
+ master_domain->domain == domain &&
master_domain->ssid == ssid &&
master_domain->nested_ats_flush == nested_ats_flush)
return master_domain;
@@ -2812,8 +2812,8 @@ static void arm_smmu_remove_master_domain(struct arm_smmu_master *master,
nested_ats_flush = to_smmu_nested_domain(domain)->enable_ats;
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
- master_domain = arm_smmu_find_master_domain(smmu_domain, master, ssid,
- nested_ats_flush);
+ master_domain = arm_smmu_find_master_domain(smmu_domain, domain, master,
+ ssid, nested_ats_flush);
if (master_domain) {
list_del(&master_domain->devices_elm);
if (master->ats_enabled)
@@ -2889,6 +2889,7 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL);
if (!master_domain)
return -ENOMEM;
+ master_domain->domain = new_domain;
master_domain->master = master;
master_domain->ssid = state->ssid;
if (new_domain->type == IOMMU_DOMAIN_NESTED)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 5653d7417db7d9..fe6b88affa4a60 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -907,6 +907,11 @@ void arm_smmu_make_sva_cd(struct arm_smmu_cd *target,
struct arm_smmu_master_domain {
struct list_head devices_elm;
struct arm_smmu_master *master;
+ /*
+ * For nested domains the master_domain is threaded onto the S2 parent,
+ * this points to the IOMMU_DOMAIN_NESTED to disambiguate the masters.
+ */
+ struct iommu_domain *domain;
ioasid_t ssid;
bool nested_ats_flush : 1;
bool using_iopf : 1;
Powered by blists - more mailing lists