[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240227021441.50434-9-baolu.lu@linux.intel.com>
Date: Tue, 27 Feb 2024 10:14:41 +0800
From: Lu Baolu <baolu.lu@...ux.intel.com>
To: Joerg Roedel <joro@...tes.org>
Cc: Tina Zhang <tina.zhang@...el.com>,
Erick Archer <erick.archer@....com>,
Jingqi Liu <Jingqi.liu@...el.com>,
iommu@...ts.linux.dev,
linux-kernel@...r.kernel.org
Subject: [PATCH 8/8] iommu/vt-d: Use device rbtree in iopf reporting path
The existing I/O page fault handler currently locates the PCI device by
calling pci_get_domain_bus_and_slot(). This function searches the list
of all PCI devices until the desired device is found. To improve lookup
efficiency, replace it with device_rbtree_find() to search the device
within the probed device rbtree.
The I/O page fault is initiated by the device, which does not have any
synchronization mechanism with the software to ensure that the device
stays in the probed device tree. Theoretically, a device could be released
by the IOMMU subsystem after device_rbtree_find() and before
iopf_get_dev_fault_param(), which would cause a use-after-free problem.
Add a mutex to synchronize the I/O page fault reporting path and the IOMMU
release device path. This lock doesn't introduce any performance overhead,
as the conflict between I/O page fault reporting and device releasing is
very rare.
Signed-off-by: Lu Baolu <baolu.lu@...ux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg@...dia.com>
Link: https://lore.kernel.org/r/20240220065939.121116-3-baolu.lu@linux.intel.com
---
drivers/iommu/intel/iommu.h | 2 ++
drivers/iommu/intel/dmar.c | 1 +
drivers/iommu/intel/iommu.c | 3 +++
drivers/iommu/intel/svm.c | 17 +++++++++--------
4 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index df00240ebe90..cd267ba64eda 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -719,6 +719,8 @@ struct intel_iommu {
#endif
struct iopf_queue *iopf_queue;
unsigned char iopfq_name[16];
+ /* Synchronization between fault report and iommu device release. */
+ struct mutex iopf_lock;
struct q_inval *qi; /* Queued invalidation info */
u32 iommu_state[MAX_SR_DMAR_REGS]; /* Store iommu states between suspend and resume.*/
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index f9b63c2875f7..d14797aabb7a 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1097,6 +1097,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
iommu->segment = drhd->segment;
iommu->device_rbtree = RB_ROOT;
spin_lock_init(&iommu->device_rbtree_lock);
+ mutex_init(&iommu->iopf_lock);
iommu->node = NUMA_NO_NODE;
ver = readl(iommu->reg + DMAR_VER_REG);
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 5568f17d867f..eaa648c6c389 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4427,8 +4427,11 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
static void intel_iommu_release_device(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+ mutex_lock(&iommu->iopf_lock);
device_rbtree_remove(info);
+ mutex_unlock(&iommu->iopf_lock);
dmar_remove_one_dev_info(dev);
intel_pasid_free_table(dev);
intel_iommu_debugfs_remove_dev(info);
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 1dd56d4eb88c..bdf3584ca0af 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -643,7 +643,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
struct intel_iommu *iommu = d;
struct page_req_dsc *req;
int head, tail, handled;
- struct pci_dev *pdev;
+ struct device *dev;
u64 address;
/*
@@ -689,23 +689,24 @@ static irqreturn_t prq_event_thread(int irq, void *d)
if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
goto prq_advance;
- pdev = pci_get_domain_bus_and_slot(iommu->segment,
- PCI_BUS_NUM(req->rid),
- req->rid & 0xff);
/*
* If prq is to be handled outside iommu driver via receiver of
* the fault notifiers, we skip the page response here.
*/
- if (!pdev)
+ mutex_lock(&iommu->iopf_lock);
+ dev = device_rbtree_find(iommu, req->rid);
+ if (!dev) {
+ mutex_unlock(&iommu->iopf_lock);
goto bad_req;
+ }
- if (intel_svm_prq_report(iommu, &pdev->dev, req))
+ if (intel_svm_prq_report(iommu, dev, req))
handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
else
- trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1,
+ trace_prq_report(iommu, dev, req->qw_0, req->qw_1,
req->priv_data[0], req->priv_data[1],
iommu->prq_seq_number++);
- pci_dev_put(pdev);
+ mutex_unlock(&iommu->iopf_lock);
prq_advance:
head = (head + sizeof(*req)) & PRQ_RING_MASK;
}
--
2.34.1
Powered by blists - more mailing lists