[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240229094613.121575-3-baolu.lu@linux.intel.com>
Date: Thu, 29 Feb 2024 17:46:13 +0800
From: Lu Baolu <baolu.lu@...ux.intel.com>
To: Joerg Roedel <joro@...tes.org>,
Will Deacon <will@...nel.org>,
Robin Murphy <robin.murphy@....com>,
Jason Gunthorpe <jgg@...pe.ca>,
Kevin Tian <kevin.tian@...el.com>,
Eric Badger <ebadger@...estorage.com>
Cc: iommu@...ts.linux.dev,
linux-kernel@...r.kernel.org,
Lu Baolu <baolu.lu@...ux.intel.com>
Subject: [PATCH v2 2/2] iommu/vt-d: Fix NULL domain on device release
In the kdump kernel, the IOMMU operates in deferred_attach mode. In this
mode, info->domain may not yet be assigned by the time the release_device
function is called. It leads to the following crash in the crash kernel:
BUG: kernel NULL pointer dereference, address: 000000000000003c
...
RIP: 0010:do_raw_spin_lock+0xa/0xa0
...
_raw_spin_lock_irqsave+0x1b/0x30
intel_iommu_release_device+0x96/0x170
iommu_deinit_device+0x39/0xf0
__iommu_group_remove_device+0xa0/0xd0
iommu_bus_notifier+0x55/0xb0
notifier_call_chain+0x5a/0xd0
blocking_notifier_call_chain+0x41/0x60
bus_notify+0x34/0x50
device_del+0x269/0x3d0
pci_remove_bus_device+0x77/0x100
p2sb_bar+0xae/0x1d0
...
i801_probe+0x423/0x740
Use the release_domain mechanism to fix it. The scalable mode context
entry which is not part of release_domain should be cleared in
release_device().
Fixes: 586081d3f6b1 ("iommu/vt-d: Remove DEFER_DEVICE_DOMAIN_INFO")
Reported-by: Eric Badger <ebadger@...estorage.com>
Closes: https://lore.kernel.org/r/20240113181713.1817855-1-ebadger@purestorage.com
Signed-off-by: Lu Baolu <baolu.lu@...ux.intel.com>
---
drivers/iommu/intel/pasid.h | 1 +
drivers/iommu/intel/iommu.c | 31 +++-----------
drivers/iommu/intel/pasid.c | 83 +++++++++++++++++++++++++++++++++++++
3 files changed, 90 insertions(+), 25 deletions(-)
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index 487ede039bdd..42fda97fd851 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -318,4 +318,5 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
bool fault_ignore);
void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu,
struct device *dev, u32 pasid);
+void intel_pasid_teardown_sm_context(struct device *dev);
#endif /* __INTEL_PASID_H */
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index cc3994efd362..f74d42d3258f 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -3869,30 +3869,6 @@ static void domain_context_clear(struct device_domain_info *info)
&domain_context_clear_one_cb, info);
}
-static void dmar_remove_one_dev_info(struct device *dev)
-{
- struct device_domain_info *info = dev_iommu_priv_get(dev);
- struct dmar_domain *domain = info->domain;
- struct intel_iommu *iommu = info->iommu;
- unsigned long flags;
-
- if (!dev_is_real_dma_subdevice(info->dev)) {
- if (dev_is_pci(info->dev) && sm_supported(iommu))
- intel_pasid_tear_down_entry(iommu, info->dev,
- IOMMU_NO_PASID, false);
-
- iommu_disable_pci_caps(info);
- domain_context_clear(info);
- }
-
- spin_lock_irqsave(&domain->lock, flags);
- list_del(&info->link);
- spin_unlock_irqrestore(&domain->lock, flags);
-
- domain_detach_iommu(domain, iommu);
- info->domain = NULL;
-}
-
/*
* Clear the page table pointer in context or pasid table entries so that
* all DMA requests without PASID from the device are blocked. If the page
@@ -4431,7 +4407,11 @@ static void intel_iommu_release_device(struct device *dev)
mutex_lock(&iommu->iopf_lock);
device_rbtree_remove(info);
mutex_unlock(&iommu->iopf_lock);
- dmar_remove_one_dev_info(dev);
+
+ if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
+ !context_copied(iommu, info->bus, info->devfn))
+ intel_pasid_teardown_sm_context(dev);
+
intel_pasid_free_table(dev);
intel_iommu_debugfs_remove_dev(info);
kfree(info);
@@ -4922,6 +4902,7 @@ static const struct iommu_dirty_ops intel_dirty_ops = {
const struct iommu_ops intel_iommu_ops = {
.blocked_domain = &blocking_domain,
+ .release_domain = &blocking_domain,
.capable = intel_iommu_capable,
.hw_info = intel_iommu_hw_info,
.domain_alloc = intel_iommu_domain_alloc,
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 108158e2b907..52068cf52fe2 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -667,3 +667,86 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
return 0;
}
+
+/*
+ * Interfaces to setup or teardown a pasid table to the scalable-mode
+ * context table entry:
+ */
+
+/*
+ * Cache invalidation for changes to a scalable-mode context table
+ * entry.
+ *
+ * Section 6.5.3.3 of the VT-d spec:
+ * - Device-selective context-cache invalidation;
+ * - Domain-selective PASID-cache invalidation to affected domains
+ * (can be skipped if all PASID entries were not-present);
+ * - Domain-selective IOTLB invalidation to affected domains;
+ * - Global Device-TLB invalidation to affected functions.
+ *
+ * Note that RWBF (Required Write-Buffer Flushing) capability has
+ * been deprecated for scable mode. Section 11.4.2 of the VT-d spec:
+ *
+ * HRWBF: Hardware implementations reporting Scalable Mode Translation
+ * Support (SMTS) as Set also report this field as Clear.
+ */
+static void sm_context_flush_caches(struct device *dev)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+
+ iommu->flush.flush_context(iommu, 0, PCI_DEVID(info->bus, info->devfn),
+ DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL);
+ qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
+ iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
+ devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID);
+}
+
+static void context_entry_teardown_pasid_table(struct intel_iommu *iommu,
+ struct context_entry *context)
+{
+ context_clear_entry(context);
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(context, sizeof(*context));
+}
+
+static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+ struct context_entry *context;
+
+ spin_lock(&iommu->lock);
+ context = iommu_context_addr(iommu, bus, devfn, false);
+ if (!context) {
+ spin_unlock(&iommu->lock);
+ return;
+ }
+
+ context_entry_teardown_pasid_table(iommu, context);
+ spin_unlock(&iommu->lock);
+
+ sm_context_flush_caches(dev);
+}
+
+static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data)
+{
+ struct device *dev = data;
+
+ if (dev == &pdev->dev)
+ device_pasid_table_teardown(dev, PCI_BUS_NUM(alias), alias & 0xff);
+
+ return 0;
+}
+
+void intel_pasid_teardown_sm_context(struct device *dev)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+
+ if (!dev_is_pci(dev)) {
+ device_pasid_table_teardown(dev, info->bus, info->devfn);
+ return;
+ }
+
+ pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_teardown, dev);
+}
--
2.34.1
Powered by blists - more mailing lists