[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251218081650.555015-2-kevin.tian@intel.com>
Date: Thu, 18 Dec 2025 08:16:49 +0000
From: Kevin Tian <kevin.tian@...el.com>
To: Alex Williamson <alex@...zbot.org>,
Ankit Agrawal <ankita@...dia.com>
Cc: Jason Gunthorpe <jgg@...pe.ca>,
Yishai Hadas <yishaih@...dia.com>,
Shameer Kolothum <skolothumtho@...dia.com>,
Kevin Tian <kevin.tian@...el.com>,
Ramesh Thomas <ramesh.thomas@...el.com>,
Yunxiang Li <Yunxiang.Li@....com>,
kvm@...r.kernel.org,
linux-kernel@...r.kernel.org,
Farrah Chen <farrah.chen@...el.com>,
stable@...r.kernel.org
Subject: [PATCH v2 1/2] vfio/pci: Disable qword access to the PCI ROM bar
Commit 2b938e3db335 ("vfio/pci: Enable iowrite64 and ioread64 for vfio
pci") enables qword access to the PCI bar resources. However certain
devices (e.g. Intel X710) are observed with problem upon qword accesses
to the rom bar, e.g. triggering PCI aer errors.
This is triggered by Qemu which caches the rom content by simply does a
pread() of the remaining size until it gets the full contents. The other
bars would only perform operations at the same access width as their
guest drivers.
Instead of trying to identify all broken devices, universally disable
qword access to the rom bar i.e. going back to the old way which worked
reliably for years.
Reported-by: Farrah Chen <farrah.chen@...el.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220740
Fixes: 2b938e3db335 ("vfio/pci: Enable iowrite64 and ioread64 for vfio pci")
Cc: stable@...r.kernel.org
Signed-off-by: Kevin Tian <kevin.tian@...el.com>
---
drivers/vfio/pci/nvgrace-gpu/main.c | 4 ++--
drivers/vfio/pci/vfio_pci_rdwr.c | 25 ++++++++++++++++++-------
include/linux/vfio_pci_core.h | 10 +++++++++-
3 files changed, 29 insertions(+), 10 deletions(-)
diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c
index 84d142a47ec6..b45a24d00387 100644
--- a/drivers/vfio/pci/nvgrace-gpu/main.c
+++ b/drivers/vfio/pci/nvgrace-gpu/main.c
@@ -561,7 +561,7 @@ nvgrace_gpu_map_and_read(struct nvgrace_gpu_pci_core_device *nvdev,
ret = vfio_pci_core_do_io_rw(&nvdev->core_device, false,
nvdev->resmem.ioaddr,
buf, offset, mem_count,
- 0, 0, false);
+ 0, 0, false, VFIO_PCI_IO_WIDTH_8);
}
return ret;
@@ -693,7 +693,7 @@ nvgrace_gpu_map_and_write(struct nvgrace_gpu_pci_core_device *nvdev,
ret = vfio_pci_core_do_io_rw(&nvdev->core_device, false,
nvdev->resmem.ioaddr,
(char __user *)buf, pos, mem_count,
- 0, 0, true);
+ 0, 0, true, VFIO_PCI_IO_WIDTH_8);
}
return ret;
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index 6192788c8ba3..25380b7dfe18 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -135,7 +135,8 @@ VFIO_IORDWR(64)
ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
void __iomem *io, char __user *buf,
loff_t off, size_t count, size_t x_start,
- size_t x_end, bool iswrite)
+ size_t x_end, bool iswrite,
+ enum vfio_pci_io_width max_width)
{
ssize_t done = 0;
int ret;
@@ -150,20 +151,19 @@ ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
else
fillable = 0;
- if (fillable >= 8 && !(off % 8)) {
+ if (fillable >= 8 && !(off % 8) && max_width >= 8) {
ret = vfio_pci_iordwr64(vdev, iswrite, test_mem,
io, buf, off, &filled);
if (ret)
return ret;
- } else
- if (fillable >= 4 && !(off % 4)) {
+ } else if (fillable >= 4 && !(off % 4) && max_width >= 4) {
ret = vfio_pci_iordwr32(vdev, iswrite, test_mem,
io, buf, off, &filled);
if (ret)
return ret;
- } else if (fillable >= 2 && !(off % 2)) {
+ } else if (fillable >= 2 && !(off % 2) && max_width >= 2) {
ret = vfio_pci_iordwr16(vdev, iswrite, test_mem,
io, buf, off, &filled);
if (ret)
@@ -234,6 +234,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
void __iomem *io;
struct resource *res = &vdev->pdev->resource[bar];
ssize_t done;
+ enum vfio_pci_io_width max_width = VFIO_PCI_IO_WIDTH_8;
if (pci_resource_start(pdev, bar))
end = pci_resource_len(pdev, bar);
@@ -262,6 +263,16 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
if (!io)
return -ENOMEM;
x_end = end;
+
+ /*
+ * Certain devices (e.g. Intel X710) don't support qword
+ * access to the ROM bar. Otherwise PCI AER errors might be
+ * triggered.
+ *
+ * Disable qword access to the ROM bar universally, which
+ * worked reliably for years before qword access is enabled.
+ */
+ max_width = VFIO_PCI_IO_WIDTH_4;
} else {
int ret = vfio_pci_core_setup_barmap(vdev, bar);
if (ret) {
@@ -278,7 +289,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
}
done = vfio_pci_core_do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
- count, x_start, x_end, iswrite);
+ count, x_start, x_end, iswrite, max_width);
if (done >= 0)
*ppos += done;
@@ -352,7 +363,7 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
* to the memory enable bit in the command register.
*/
done = vfio_pci_core_do_io_rw(vdev, false, iomem, buf, off, count,
- 0, 0, iswrite);
+ 0, 0, iswrite, VFIO_PCI_IO_WIDTH_8);
vga_put(vdev->pdev, rsrc);
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 706877f998ff..1ac86896875c 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -145,6 +145,13 @@ struct vfio_pci_core_device {
struct list_head dmabufs;
};
+enum vfio_pci_io_width {
+ VFIO_PCI_IO_WIDTH_1 = 1,
+ VFIO_PCI_IO_WIDTH_2 = 2,
+ VFIO_PCI_IO_WIDTH_4 = 4,
+ VFIO_PCI_IO_WIDTH_8 = 8,
+};
+
/* Will be exported for vfio pci drivers usage */
int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev,
unsigned int type, unsigned int subtype,
@@ -188,7 +195,8 @@ pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev,
ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
void __iomem *io, char __user *buf,
loff_t off, size_t count, size_t x_start,
- size_t x_end, bool iswrite);
+ size_t x_end, bool iswrite,
+ enum vfio_pci_io_width max_width);
bool __vfio_pci_memory_enabled(struct vfio_pci_core_device *vdev);
bool vfio_pci_core_range_intersect_range(loff_t buf_start, size_t buf_cnt,
loff_t reg_start, size_t reg_cnt,
--
2.43.0
Powered by blists - more mailing lists