[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20251209165019.2643142-14-mhonap@nvidia.com>
Date: Tue, 9 Dec 2025 22:20:17 +0530
From: <mhonap@...dia.com>
To: <aniketa@...dia.com>, <ankita@...dia.com>, <alwilliamson@...dia.com>,
<vsethi@...dia.com>, <jgg@...dia.com>, <mochs@...dia.com>,
<skolothumtho@...dia.com>, <alejandro.lucero-palau@....com>,
<dave@...olabs.net>, <jonathan.cameron@...wei.com>, <dave.jiang@...el.com>,
<alison.schofield@...el.com>, <vishal.l.verma@...el.com>,
<ira.weiny@...el.com>, <dan.j.williams@...el.com>, <jgg@...pe.ca>,
<yishaih@...dia.com>, <kevin.tian@...el.com>
CC: <cjia@...dia.com>, <kwankhede@...dia.com>, <targupta@...dia.com>,
<zhiw@...dia.com>, <kjaju@...dia.com>, <linux-kernel@...r.kernel.org>,
<linux-cxl@...r.kernel.org>, <kvm@...r.kernel.org>, <mhonap@...dia.com>
Subject: [RFC v2 13/15] vfio/pci: introduce CXL device awareness
From: Zhi Wang <zhiw@...dia.com>
CXL device programming interfaces are built upon PCI interfaces. Thus
the vfio-pci-core can be leveraged to handle a CXL device.
However, CXL device also has difference with PCI devicce:
- No INTX support, only MSI/MSIX is supported.
- Reset is done via CXL reset. FLR only reset CXL.io.
Introduce the CXL device awareness to the vfio-pci-core. Expose a new
VFIO device flags to the userspace to identify the VFIO device is a CXL
device. Disable INTX support in the vfio-pci-core. Disable FLR reset for
the CXL device as the kernel CXL core hasn't support CXL reset yet.
Disable mmap support on the CXL MMIO BAR in vfio-pci-core.
Signed-off-by: Zhi Wang <zhiw@...dia.com>
Signed-off-by: Manish Honap <mhonap@...dia.com>
---
drivers/vfio/pci/vfio_cxl_core.c | 18 +++++++++++++++++
drivers/vfio/pci/vfio_pci_core.c | 33 ++++++++++++++++++++++++++++----
drivers/vfio/pci/vfio_pci_rdwr.c | 11 ++++++++---
include/linux/vfio_pci_core.h | 3 +++
include/uapi/linux/vfio.h | 10 ++++++++++
5 files changed, 68 insertions(+), 7 deletions(-)
diff --git a/drivers/vfio/pci/vfio_cxl_core.c b/drivers/vfio/pci/vfio_cxl_core.c
index c0bdf55997da..84e4f42d97de 100644
--- a/drivers/vfio/pci/vfio_cxl_core.c
+++ b/drivers/vfio/pci/vfio_cxl_core.c
@@ -25,6 +25,19 @@
#define DRIVER_AUTHOR "Zhi Wang <zhiw@...dia.com>"
#define DRIVER_DESC "core driver for VFIO based CXL devices"
+static void init_cxl_cap(struct vfio_cxl_core_device *cxl)
+{
+ struct vfio_pci_core_device *pci = &cxl->pci_core;
+ struct vfio_device_info_cap_cxl *cap = &pci->cxl_cap;
+
+ cap->header.id = VFIO_DEVICE_INFO_CAP_CXL;
+ cap->header.version = 1;
+ cap->hdm_count = cxl->hdm_count;
+ cap->hdm_reg_offset = cxl->comp_reg_offset + cxl->hdm_reg_offset;
+ cap->hdm_reg_size = cxl->hdm_reg_size;
+ cap->hdm_reg_bar_index = cxl->comp_reg_bar;
+}
+
/* Standard CXL-type 2 driver initialization sequence */
static int enable_cxl(struct vfio_cxl_core_device *cxl, u16 dvsec,
struct vfio_cxl_dev_info *info)
@@ -74,6 +87,8 @@ static int enable_cxl(struct vfio_cxl_core_device *cxl, u16 dvsec,
if (IS_ERR(cxl_core->cxlmd))
return PTR_ERR(cxl_core->cxlmd);
+ init_cxl_cap(cxl);
+
cxl_core->region.noncached = info->noncached_region;
return 0;
@@ -266,6 +281,9 @@ int vfio_cxl_core_enable(struct vfio_cxl_core_device *cxl,
if (ret)
return ret;
+ pci->is_cxl = true;
+ pci->comp_reg_bar = cxl->comp_reg_bar;
+
ret = vfio_pci_core_enable(pci);
if (ret)
goto err_pci_core_enable;
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 502880e927fc..5f8334748841 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -483,7 +483,12 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
goto out_power;
/* If reset fails because of the device lock, fail this path entirely */
- ret = pci_try_reset_function(pdev);
+ if (!vdev->is_cxl)
+ ret = pci_try_reset_function(pdev);
+ else
+ /* TODO: CXL reset support is on-going. */
+ ret = -ENODEV;
+
if (ret == -EAGAIN)
goto out_disable_device;
@@ -618,8 +623,12 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
if (!vdev->barmap[bar])
continue;
pci_iounmap(pdev, vdev->barmap[bar]);
- pci_release_selected_regions(pdev, 1 << bar);
vdev->barmap[bar] = NULL;
+
+ if (vdev->is_cxl && i == vdev->comp_reg_bar)
+ continue;
+
+ pci_release_selected_regions(pdev, 1 << bar);
}
list_for_each_entry_safe(dummy_res, tmp,
@@ -960,6 +969,15 @@ static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev,
if (vdev->reset_works)
info.flags |= VFIO_DEVICE_FLAGS_RESET;
+ if (vdev->is_cxl) {
+ ret = vfio_info_add_capability(&caps, &vdev->cxl_cap.header,
+ sizeof(vdev->cxl_cap));
+ if (ret)
+ return ret;
+
+ info.flags |= VFIO_DEVICE_FLAGS_CXL;
+ }
+
info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
info.num_irqs = VFIO_PCI_NUM_IRQS;
@@ -1752,14 +1770,21 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma
* we need to request the region and the barmap tracks that.
*/
if (!vdev->barmap[index]) {
+ int bars;
+
+ if (vdev->is_cxl && vdev->comp_reg_bar == index)
+ bars = 0;
+ else
+ bars = 1 << index;
+
ret = pci_request_selected_regions(pdev,
- 1 << index, "vfio-pci");
+ bars, "vfio-pci");
if (ret)
return ret;
vdev->barmap[index] = pci_iomap(pdev, index, 0);
if (!vdev->barmap[index]) {
- pci_release_selected_regions(pdev, 1 << index);
+ pci_release_selected_regions(pdev, bars);
return -ENOMEM;
}
}
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index 6192788c8ba3..057cd0c69f2a 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -201,19 +201,24 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw);
int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
{
struct pci_dev *pdev = vdev->pdev;
- int ret;
+ int bars, ret;
void __iomem *io;
if (vdev->barmap[bar])
return 0;
- ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
+ if (vdev->is_cxl && vdev->comp_reg_bar == bar)
+ bars = 0;
+ else
+ bars = 1 << bar;
+
+ ret = pci_request_selected_regions(pdev, bars, "vfio");
if (ret)
return ret;
io = pci_iomap(pdev, bar, 0);
if (!io) {
- pci_release_selected_regions(pdev, 1 << bar);
+ pci_release_selected_regions(pdev, bars);
return -ENOMEM;
}
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 8293910e0a96..0a354c7788b3 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -82,6 +82,9 @@ struct vfio_pci_core_device {
bool needs_pm_restore:1;
bool pm_intx_masked:1;
bool pm_runtime_engaged:1;
+ bool is_cxl:1;
+ int comp_reg_bar;
+ struct vfio_device_info_cap_cxl cxl_cap;
struct pci_saved_state *pci_saved_state;
struct pci_saved_state *pm_save;
int ioeventfds_nr;
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 95be987d2ed5..0a9968cd6601 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -214,6 +214,7 @@ struct vfio_device_info {
#define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6) /* vfio-fsl-mc device */
#define VFIO_DEVICE_FLAGS_CAPS (1 << 7) /* Info supports caps */
#define VFIO_DEVICE_FLAGS_CDX (1 << 8) /* vfio-cdx device */
+#define VFIO_DEVICE_FLAGS_CXL (1 << 9) /* Device supports CXL */
__u32 num_regions; /* Max region index + 1 */
__u32 num_irqs; /* Max IRQ index + 1 */
__u32 cap_offset; /* Offset within info struct of first cap */
@@ -256,6 +257,15 @@ struct vfio_device_info_cap_pci_atomic_comp {
__u32 reserved;
};
+#define VFIO_DEVICE_INFO_CAP_CXL 6
+struct vfio_device_info_cap_cxl {
+ struct vfio_info_cap_header header;
+ __u8 hdm_count;
+ __u8 hdm_reg_bar_index;
+ __u64 hdm_reg_size;
+ __u64 hdm_reg_offset;
+};
+
/**
* VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
* struct vfio_region_info)
--
2.25.1
Powered by blists - more mailing lists