[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <7889db2790263640c6e9bb98956c3a3d55b87ee6.1751096303.git.nicolinc@nvidia.com>
Date: Sat, 28 Jun 2025 00:42:42 -0700
From: Nicolin Chen <nicolinc@...dia.com>
To: <jgg@...dia.com>, <joro@...tes.org>, <will@...nel.org>,
<robin.murphy@....com>, <rafael@...nel.org>, <lenb@...nel.org>,
<bhelgaas@...gle.com>
CC: <iommu@...ts.linux.dev>, <linux-kernel@...r.kernel.org>,
<linux-acpi@...r.kernel.org>, <linux-pci@...r.kernel.org>,
<patches@...ts.linux.dev>, <pjaroszynski@...dia.com>, <vsethi@...dia.com>,
<helgaas@...nel.org>, <baolu.lu@...ux.intel.com>
Subject: [PATCH RFC v2 4/4] pci: Suspend iommu function prior to resetting a device
PCIe permits a device to ignore ATS invalidation TLPs, while processing a
reset. This creates a problem visible to the OS where an ATS invalidation
command will time out: e.g. an SVA domain will have no coordination with a
reset event and can racily issue ATS invalidations to a resetting device.
The PCIe spec in sec 10.3.1 IMPLEMENTATION NOTE recommends to disable and
block ATS before initiating a Function Level Reset. It also mentions that
other reset methods could have the same vulnerability as well.
Now iommu_dev_reset_prepare/done() helpers are introduced for this matter.
Use them in all the existing reset functions, which will attach the device
to an IOMMU_DOMAIN_BLOCKED during a reset, so as to allow IOMMU driver to:
- invoke pci_disable_ats() and pci_enable_ats() respectively
- wait for all ATS invalidations to complete
- stop issuing new ATS invalidations
- fence any incoming ATS queries
Add a warning if ATS isn't disabled, in which case IOMMU driver should fix
itself to disable ATS following the design in iommu_dev_reset_prepare().
Signed-off-by: Nicolin Chen <nicolinc@...dia.com>
---
drivers/pci/pci-acpi.c | 21 ++++++++++-
drivers/pci/pci.c | 84 +++++++++++++++++++++++++++++++++++++++---
drivers/pci/quirks.c | 27 +++++++++++++-
3 files changed, 124 insertions(+), 8 deletions(-)
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index b78e0e417324..727957f193ca 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -9,6 +9,7 @@
#include <linux/delay.h>
#include <linux/init.h>
+#include <linux/iommu.h>
#include <linux/irqdomain.h>
#include <linux/pci.h>
#include <linux/msi.h>
@@ -974,6 +975,7 @@ void pci_set_acpi_fwnode(struct pci_dev *dev)
int pci_dev_acpi_reset(struct pci_dev *dev, bool probe)
{
acpi_handle handle = ACPI_HANDLE(&dev->dev);
+ int ret = 0;
if (!handle || !acpi_has_method(handle, "_RST"))
return -ENOTTY;
@@ -981,12 +983,27 @@ int pci_dev_acpi_reset(struct pci_dev *dev, bool probe)
if (probe)
return 0;
+ /*
+ * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables ATS
+ * before initiating a reset. Notify the iommu driver that enabled ATS.
+ */
+ ret = iommu_dev_reset_prepare(&dev->dev);
+ if (ret) {
+ pci_err(dev, "failed to stop IOMMU\n");
+ return ret;
+ }
+
+ /* Something wrong with the iommu driver that failed to disable ATS */
+ if (dev->ats_enabled)
+ pci_err(dev, "failed to stop ATS. ATS invalidation may time out\n");
+
if (ACPI_FAILURE(acpi_evaluate_object(handle, "_RST", NULL, NULL))) {
pci_warn(dev, "ACPI _RST failed\n");
- return -ENOTTY;
+ ret = -ENOTTY;
}
- return 0;
+ iommu_dev_reset_done(&dev->dev);
+ return ret;
}
bool acpi_pci_power_manageable(struct pci_dev *dev)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index e9448d55113b..ddb7a10ef500 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -13,6 +13,7 @@
#include <linux/delay.h>
#include <linux/dmi.h>
#include <linux/init.h>
+#include <linux/iommu.h>
#include <linux/msi.h>
#include <linux/of.h>
#include <linux/pci.h>
@@ -4518,13 +4519,30 @@ EXPORT_SYMBOL(pci_wait_for_pending_transaction);
*/
int pcie_flr(struct pci_dev *dev)
{
+ int ret = 0;
+
if (!pci_wait_for_pending_transaction(dev))
pci_err(dev, "timed out waiting for pending transaction; performing function level reset anyway\n");
+ /*
+ * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables ATS
+ * before initiating a reset. Notify the iommu driver that enabled ATS.
+ * Have to call it after waiting for pending DMA transaction.
+ */
+ ret = iommu_dev_reset_prepare(&dev->dev);
+ if (ret) {
+ pci_err(dev, "failed to stop IOMMU\n");
+ return ret;
+ }
+
+ /* Something wrong with the iommu driver that failed to disable ATS */
+ if (dev->ats_enabled)
+ pci_err(dev, "failed to stop ATS. ATS invalidation may time out\n");
+
pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR);
if (dev->imm_ready)
- return 0;
+ goto done;
/*
* Per PCIe r4.0, sec 6.6.2, a device must complete an FLR within
@@ -4533,7 +4551,11 @@ int pcie_flr(struct pci_dev *dev)
*/
msleep(100);
- return pci_dev_wait(dev, "FLR", PCIE_RESET_READY_POLL_MS);
+ ret = pci_dev_wait(dev, "FLR", PCIE_RESET_READY_POLL_MS);
+
+done:
+ iommu_dev_reset_done(&dev->dev);
+ return ret;
}
EXPORT_SYMBOL_GPL(pcie_flr);
@@ -4561,6 +4583,7 @@ EXPORT_SYMBOL_GPL(pcie_reset_flr);
static int pci_af_flr(struct pci_dev *dev, bool probe)
{
+ int ret = 0;
int pos;
u8 cap;
@@ -4587,10 +4610,25 @@ static int pci_af_flr(struct pci_dev *dev, bool probe)
PCI_AF_STATUS_TP << 8))
pci_err(dev, "timed out waiting for pending transaction; performing AF function level reset anyway\n");
+ /*
+ * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables ATS
+ * before initiating a reset. Notify the iommu driver that enabled ATS.
+ * Have to call it after waiting for pending DMA transaction.
+ */
+ ret = iommu_dev_reset_prepare(&dev->dev);
+ if (ret) {
+ pci_err(dev, "failed to stop IOMMU\n");
+ return ret;
+ }
+
+ /* Something wrong with the iommu driver that failed to disable ATS */
+ if (dev->ats_enabled)
+ pci_err(dev, "failed to stop ATS. ATS invalidation may time out\n");
+
pci_write_config_byte(dev, pos + PCI_AF_CTRL, PCI_AF_CTRL_FLR);
if (dev->imm_ready)
- return 0;
+ goto done;
/*
* Per Advanced Capabilities for Conventional PCI ECN, 13 April 2006,
@@ -4600,7 +4638,11 @@ static int pci_af_flr(struct pci_dev *dev, bool probe)
*/
msleep(100);
- return pci_dev_wait(dev, "AF_FLR", PCIE_RESET_READY_POLL_MS);
+ ret = pci_dev_wait(dev, "AF_FLR", PCIE_RESET_READY_POLL_MS);
+
+done:
+ iommu_dev_reset_done(&dev->dev);
+ return ret;
}
/**
@@ -4621,6 +4663,7 @@ static int pci_af_flr(struct pci_dev *dev, bool probe)
static int pci_pm_reset(struct pci_dev *dev, bool probe)
{
u16 csr;
+ int ret;
if (!dev->pm_cap || dev->dev_flags & PCI_DEV_FLAGS_NO_PM_RESET)
return -ENOTTY;
@@ -4635,6 +4678,20 @@ static int pci_pm_reset(struct pci_dev *dev, bool probe)
if (dev->current_state != PCI_D0)
return -EINVAL;
+ /*
+ * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables ATS
+ * before initiating a reset. Notify the iommu driver that enabled ATS.
+ */
+ ret = iommu_dev_reset_prepare(&dev->dev);
+ if (ret) {
+ pci_err(dev, "failed to stop IOMMU\n");
+ return ret;
+ }
+
+ /* Something wrong with the iommu driver that failed to disable ATS */
+ if (dev->ats_enabled)
+ pci_err(dev, "failed to stop ATS. ATS invalidation may time out\n");
+
csr &= ~PCI_PM_CTRL_STATE_MASK;
csr |= PCI_D3hot;
pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
@@ -4645,7 +4702,9 @@ static int pci_pm_reset(struct pci_dev *dev, bool probe)
pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
pci_dev_d3_sleep(dev);
- return pci_dev_wait(dev, "PM D3hot->D0", PCIE_RESET_READY_POLL_MS);
+ ret = pci_dev_wait(dev, "PM D3hot->D0", PCIE_RESET_READY_POLL_MS);
+ iommu_dev_reset_done(&dev->dev);
+ return ret;
}
/**
@@ -5100,6 +5159,20 @@ static int cxl_reset_bus_function(struct pci_dev *dev, bool probe)
if (rc)
return -ENOTTY;
+ /*
+ * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables ATS
+ * before initiating a reset. Notify the iommu driver that enabled ATS.
+ */
+ rc = iommu_dev_reset_prepare(&dev->dev);
+ if (rc) {
+ pci_err(dev, "failed to stop IOMMU\n");
+ return rc;
+ }
+
+ /* Something wrong with the iommu driver that failed to disable ATS */
+ if (dev->ats_enabled)
+ pci_err(dev, "failed to stop ATS. ATS invalidation may time out\n");
+
if (reg & PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR) {
val = reg;
} else {
@@ -5114,6 +5187,7 @@ static int cxl_reset_bus_function(struct pci_dev *dev, bool probe)
pci_write_config_word(bridge, dvsec + PCI_DVSEC_CXL_PORT_CTL,
reg);
+ iommu_dev_reset_done(&dev->dev);
return rc;
}
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index d7f4ee634263..7a66c01392d9 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -21,6 +21,7 @@
#include <linux/pci.h>
#include <linux/isa-dma.h> /* isa_dma_bridge_buggy */
#include <linux/init.h>
+#include <linux/iommu.h>
#include <linux/delay.h>
#include <linux/acpi.h>
#include <linux/dmi.h>
@@ -4223,6 +4224,30 @@ static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
{ 0 }
};
+static int __pci_dev_specific_reset(struct pci_dev *dev, bool probe,
+ const struct pci_dev_reset_methods *i)
+{
+ int ret;
+
+ /*
+ * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables ATS
+ * before initiating a reset. Notify the iommu driver that enabled ATS.
+ */
+ ret = iommu_dev_reset_prepare(&dev->dev);
+ if (ret) {
+ pci_err(dev, "failed to stop IOMMU\n");
+ return ret;
+ }
+
+ /* Something wrong with the iommu driver that failed to disable ATS */
+ if (dev->ats_enabled)
+ pci_err(dev, "failed to stop ATS. ATS invalidation may time out\n");
+
+ ret = i->reset(dev, probe);
+ iommu_dev_reset_done(&dev->dev);
+ return ret;
+}
+
/*
* These device-specific reset methods are here rather than in a driver
* because when a host assigns a device to a guest VM, the host may need
@@ -4237,7 +4262,7 @@ int pci_dev_specific_reset(struct pci_dev *dev, bool probe)
i->vendor == (u16)PCI_ANY_ID) &&
(i->device == dev->device ||
i->device == (u16)PCI_ANY_ID))
- return i->reset(dev, probe);
+ return __pci_dev_specific_reset(dev, probe, i);
}
return -ENOTTY;
--
2.43.0
Powered by blists - more mailing lists