[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220124181726.19174-5-abhsahu@nvidia.com>
Date: Mon, 24 Jan 2022 23:47:25 +0530
From: Abhishek Sahu <abhsahu@...dia.com>
To: <kvm@...r.kernel.org>,
Alex Williamson <alex.williamson@...hat.com>,
Cornelia Huck <cohuck@...hat.com>
CC: Max Gurtovoy <mgurtovoy@...dia.com>,
Yishai Hadas <yishaih@...dia.com>,
Zhen Lei <thunder.leizhen@...wei.com>,
Jason Gunthorpe <jgg@...dia.com>,
<linux-kernel@...r.kernel.org>, Abhishek Sahu <abhsahu@...dia.com>
Subject: [RFC PATCH v2 4/5] vfio/pci: Invalidate mmaps and block the access in D3hot power state
According to [PCIe v5 5.3.1.4.1] for D3hot state
"Configuration and Message requests are the only TLPs accepted by a
Function in the D3Hot state. All other received Requests must be
handled as Unsupported Requests, and all received Completions may
optionally be handled as Unexpected Completions."
Currently, if the vfio PCI device has been put into D3hot state and if
user makes non-config related read/write request in D3hot state, these
requests will be forwarded to the host and this access may cause
issues on a few systems.
This patch leverages the memory-disable support added in commit
'abafbc551fdd ("vfio-pci: Invalidate mmaps and block MMIO access on
disabled memory")' to generate page fault on mmap access and
return error for the direct read/write. If the device is D3hot state,
then the error needs to be returned for all kinds of BAR
related access (memory, IO and ROM). Also, the power related structure
fields need to be protected so we can use the same 'memory_lock' to
protect these fields also. For the few cases, this 'memory_lock' will be
already acquired by callers so introduce a separate function
vfio_pci_set_power_state_locked(). The original
vfio_pci_set_power_state() now contains the code to do the locking
related operations.
Signed-off-by: Abhishek Sahu <abhsahu@...dia.com>
---
drivers/vfio/pci/vfio_pci_core.c | 47 +++++++++++++++++++++++++-------
drivers/vfio/pci/vfio_pci_rdwr.c | 20 ++++++++++----
2 files changed, 51 insertions(+), 16 deletions(-)
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index ee2fb8af57fa..38440d48973f 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -201,11 +201,12 @@ static void vfio_pci_probe_power_state(struct vfio_pci_core_device *vdev)
}
/*
- * pci_set_power_state() wrapper handling devices which perform a soft reset on
- * D3->D0 transition. Save state prior to D0/1/2->D3, stash it on the vdev,
- * restore when returned to D0. Saved separately from pci_saved_state for use
- * by PM capability emulation and separately from pci_dev internal saved state
- * to avoid it being overwritten and consumed around other resets.
+ * vfio_pci_set_power_state_locked() wrapper handling devices which perform a
+ * soft reset on D3->D0 transition. Save state prior to D0/1/2->D3, stash it
+ * on the vdev, restore when returned to D0. Saved separately from
+ * pci_saved_state for use by PM capability emulation and separately from
+ * pci_dev internal saved state to avoid it being overwritten and consumed
+ * around other resets.
*
* There are few cases where the PCI power state can be changed to D0
* without the involvement of this API. So, cache the power state locally
@@ -215,7 +216,8 @@ static void vfio_pci_probe_power_state(struct vfio_pci_core_device *vdev)
* The memory taken for saving this PCI state needs to be freed to
* prevent memory leak.
*/
-int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, pci_power_t state)
+static int vfio_pci_set_power_state_locked(struct vfio_pci_core_device *vdev,
+ pci_power_t state)
{
struct pci_dev *pdev = vdev->pdev;
bool needs_restore = false, needs_save = false;
@@ -260,6 +262,26 @@ int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, pci_power_t stat
return ret;
}
+/*
+ * vfio_pci_set_power_state() takes all the required locks to protect
+ * the access of power related variables and then invokes
+ * vfio_pci_set_power_state_locked().
+ */
+int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev,
+ pci_power_t state)
+{
+ int ret;
+
+ if (state >= PCI_D3hot)
+ vfio_pci_zap_and_down_write_memory_lock(vdev);
+ else
+ down_write(&vdev->memory_lock);
+
+ ret = vfio_pci_set_power_state_locked(vdev, state);
+ up_write(&vdev->memory_lock);
+ return ret;
+}
+
int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
{
struct pci_dev *pdev = vdev->pdev;
@@ -354,7 +376,7 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
* in running the logic needed for D0 power state. The subsequent
* runtime PM API's will put the device into the low power state again.
*/
- vfio_pci_set_power_state(vdev, PCI_D0);
+ vfio_pci_set_power_state_locked(vdev, PCI_D0);
/* Stop the device from further DMA */
pci_clear_master(pdev);
@@ -967,7 +989,7 @@ long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
* interaction. Update the power state in vfio driver to perform
* the logic needed for D0 power state.
*/
- vfio_pci_set_power_state(vdev, PCI_D0);
+ vfio_pci_set_power_state_locked(vdev, PCI_D0);
up_write(&vdev->memory_lock);
return ret;
@@ -1453,6 +1475,11 @@ static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
goto up_out;
}
+ if (vdev->power_state >= PCI_D3hot) {
+ ret = VM_FAULT_SIGBUS;
+ goto up_out;
+ }
+
/*
* We populate the whole vma on fault, so we need to test whether
* the vma has already been mapped, such as for concurrent faults
@@ -1902,7 +1929,7 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev)
* be able to get to D3. Therefore first do a D0 transition
* before enabling runtime PM.
*/
- vfio_pci_set_power_state(vdev, PCI_D0);
+ vfio_pci_set_power_state_locked(vdev, PCI_D0);
pm_runtime_allow(&pdev->dev);
if (!disable_idle_d3)
@@ -2117,7 +2144,7 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
* interaction. Update the power state in vfio driver to perform
* the logic needed for D0 power state.
*/
- vfio_pci_set_power_state(cur, PCI_D0);
+ vfio_pci_set_power_state_locked(cur, PCI_D0);
if (cur == cur_mem)
is_mem = false;
if (cur == cur_vma)
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index 57d3b2cbbd8e..e97ba14c4aa0 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -41,8 +41,13 @@
static int vfio_pci_iowrite##size(struct vfio_pci_core_device *vdev, \
bool test_mem, u##size val, void __iomem *io) \
{ \
+ down_read(&vdev->memory_lock); \
+ if (vdev->power_state >= PCI_D3hot) { \
+ up_read(&vdev->memory_lock); \
+ return -EIO; \
+ } \
+ \
if (test_mem) { \
- down_read(&vdev->memory_lock); \
if (!__vfio_pci_memory_enabled(vdev)) { \
up_read(&vdev->memory_lock); \
return -EIO; \
@@ -51,8 +56,7 @@ static int vfio_pci_iowrite##size(struct vfio_pci_core_device *vdev, \
\
vfio_iowrite##size(val, io); \
\
- if (test_mem) \
- up_read(&vdev->memory_lock); \
+ up_read(&vdev->memory_lock); \
\
return 0; \
}
@@ -68,8 +72,13 @@ VFIO_IOWRITE(64)
static int vfio_pci_ioread##size(struct vfio_pci_core_device *vdev, \
bool test_mem, u##size *val, void __iomem *io) \
{ \
+ down_read(&vdev->memory_lock); \
+ if (vdev->power_state >= PCI_D3hot) { \
+ up_read(&vdev->memory_lock); \
+ return -EIO; \
+ } \
+ \
if (test_mem) { \
- down_read(&vdev->memory_lock); \
if (!__vfio_pci_memory_enabled(vdev)) { \
up_read(&vdev->memory_lock); \
return -EIO; \
@@ -78,8 +87,7 @@ static int vfio_pci_ioread##size(struct vfio_pci_core_device *vdev, \
\
*val = vfio_ioread##size(io); \
\
- if (test_mem) \
- up_read(&vdev->memory_lock); \
+ up_read(&vdev->memory_lock); \
\
return 0; \
}
--
2.17.1
Powered by blists - more mailing lists