lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251209165019.2643142-13-mhonap@nvidia.com>
Date: Tue, 9 Dec 2025 22:20:16 +0530
From: <mhonap@...dia.com>
To: <aniketa@...dia.com>, <ankita@...dia.com>, <alwilliamson@...dia.com>,
	<vsethi@...dia.com>, <jgg@...dia.com>, <mochs@...dia.com>,
	<skolothumtho@...dia.com>, <alejandro.lucero-palau@....com>,
	<dave@...olabs.net>, <jonathan.cameron@...wei.com>, <dave.jiang@...el.com>,
	<alison.schofield@...el.com>, <vishal.l.verma@...el.com>,
	<ira.weiny@...el.com>, <dan.j.williams@...el.com>, <jgg@...pe.ca>,
	<yishaih@...dia.com>, <kevin.tian@...el.com>
CC: <cjia@...dia.com>, <kwankhede@...dia.com>, <targupta@...dia.com>,
	<zhiw@...dia.com>, <kjaju@...dia.com>, <linux-kernel@...r.kernel.org>,
	<linux-cxl@...r.kernel.org>, <kvm@...r.kernel.org>, <mhonap@...dia.com>
Subject: [RFC v2 12/15] vfio/cxl: introduce the emulation of CXL configuration space

From: Zhi Wang <zhiw@...dia.com>

CXL devices have CXL DVSEC registers in the configuration space.
Many of them affects the behaviors of the devices. E.g. enabling
CXL.io/CXL.mem/CXL.cache.

However, these configuration are owned by the host and a virtualization
policy should be applied when handling the access from the guest.

Introduce the emulation of CXL configuration space to handle the access
of the virtual CXL configuration space from the guest.

Signed-off-by: Zhi Wang <zhiw@...dia.com>
Signed-off-by: Manish Honap <mhonap@...dia.com>
---
 drivers/vfio/pci/vfio_cxl_core_emu.c | 340 ++++++++++++++++++++++++++-
 drivers/vfio/pci/vfio_pci_config.c   |  10 +-
 include/linux/vfio_pci_core.h        |   4 +
 3 files changed, 346 insertions(+), 8 deletions(-)

diff --git a/drivers/vfio/pci/vfio_cxl_core_emu.c b/drivers/vfio/pci/vfio_cxl_core_emu.c
index 6711ff8975ef..8037737838ba 100644
--- a/drivers/vfio/pci/vfio_cxl_core_emu.c
+++ b/drivers/vfio/pci/vfio_cxl_core_emu.c
@@ -28,6 +28,334 @@ new_reg_block(struct vfio_cxl_core_device *cxl, u64 offset, u64 size,
 	return block;
 }
 
+static int new_config_block(struct vfio_cxl_core_device *cxl, u64 offset,
+			    u64 size, reg_handler_t *read, reg_handler_t *write)
+{
+	struct vfio_emulated_regblock *block;
+
+	block = new_reg_block(cxl, offset, size, read, write);
+	if (IS_ERR(block))
+		return PTR_ERR(block);
+
+	list_add_tail(&block->list, &cxl->config_regblocks_head);
+	return 0;
+}
+
+static ssize_t virt_config_reg_read(struct vfio_cxl_core_device *cxl, void *buf,
+				    u64 offset, u64 size)
+{
+	memcpy(buf, cxl->config_virt + offset, size);
+	return size;
+}
+
+static ssize_t virt_config_reg_write(struct vfio_cxl_core_device *cxl, void *buf,
+				     u64 offset, u64 size)
+{
+	memcpy(cxl->config_virt + offset, buf, size);
+	return size;
+}
+
+static ssize_t hw_config_reg_read(struct vfio_cxl_core_device *cxl, void *buf,
+				  u64 offset, u64 size)
+{
+	return vfio_user_config_read(cxl->pci_core.pdev, offset, buf, size);
+}
+
+static ssize_t hw_config_reg_write(struct vfio_cxl_core_device *cxl, void *buf,
+				   u64 offset, u64 size)
+{
+	__le32 write_val = *(__le32 *)buf;
+
+	return vfio_user_config_write(cxl->pci_core.pdev, offset, write_val, size);
+}
+
+static ssize_t cxl_control_write(struct vfio_cxl_core_device *cxl, void *buf,
+				 u64 offset, u64 size)
+{
+	u16 lock = le16_to_cpu(*(u16 *)(cxl->config_virt + cxl->dvsec + 0x14));
+	u16 cap3 = le16_to_cpu(*(u16 *)(cxl->config_virt + cxl->dvsec + 0x38));
+	u16 new_val = le16_to_cpu(*(u16 *)buf);
+	u16 rev_mask;
+
+	if (WARN_ON_ONCE(size != 2))
+		return -EINVAL;
+
+	/* register is locked */
+	if (lock & BIT(0))
+		return size;
+
+	/* handle reserved bits in the spec */
+	rev_mask = BIT(13) | BIT(15);
+
+	/* no direct p2p cap */
+	if (!(cap3 & BIT(4)))
+		rev_mask |= BIT(12);
+
+	new_val &= ~rev_mask;
+
+	/* CXL.io is always enabled. */
+	new_val |= BIT(1);
+
+	memcpy(cxl->config_virt + offset, &new_val, size);
+	return size;
+}
+
+static ssize_t cxl_status_write(struct vfio_cxl_core_device *cxl, void *buf,
+				u64 offset, u64 size)
+{
+	u16 cur_val = le16_to_cpu(*(u16 *)(cxl->config_virt + offset));
+	u16 new_val = le16_to_cpu(*(u16 *)buf);
+	u16 rev_mask = GENMASK(13, 0) | BIT(15);
+
+	if (WARN_ON_ONCE(size != 2))
+		return -EINVAL;
+
+	/* handle reserved bits in the spec */
+	new_val &= ~rev_mask;
+
+	/* emulate RW1C bit */
+	if (new_val & BIT(14)) {
+		new_val &= ~BIT(14);
+	} else {
+		new_val &= ~BIT(14);
+		new_val |= cur_val & BIT(14);
+	}
+
+	new_val = cpu_to_le16(new_val);
+	memcpy(cxl->config_virt + offset, &new_val, size);
+	return size;
+}
+
+static ssize_t cxl_control_2_write(struct vfio_cxl_core_device *cxl, void *buf,
+				   u64 offset, u64 size)
+{
+	struct pci_dev *pdev = cxl->pci_core.pdev;
+	u16 cap2 = le16_to_cpu(*(u16 *)(cxl->config_virt + cxl->dvsec + 0x16));
+	u16 cap3 = le16_to_cpu(*(u16 *)(cxl->config_virt + cxl->dvsec + 0x38));
+	u16 new_val = le16_to_cpu(*(u16 *)buf);
+	u16 rev_mask = GENMASK(15, 6) | BIT(1) | BIT(2);
+	u16 hw_bits = BIT(0) | BIT(1) | BIT(3);
+	bool initiate_cxl_reset = new_val & BIT(2);
+
+	if (WARN_ON_ONCE(size != 2))
+		return -EINVAL;
+
+	/* no desired volatile HDM state after host reset */
+	if (!(cap3 & BIT(2)))
+		rev_mask |= BIT(4);
+
+	/* no modified completion enable */
+	if (!(cap2 & BIT(6)))
+		rev_mask |= BIT(5);
+
+	/* handle reserved bits in the spec */
+	new_val &= ~rev_mask;
+
+	/* bits go to the HW */
+	hw_bits &= new_val;
+
+	/* update the virt regs */
+	new_val = cpu_to_le16(new_val);
+	memcpy(cxl->config_virt + offset, &new_val, size);
+
+	if (hw_bits)
+		pci_write_config_word(pdev, offset, cpu_to_le16(hw_bits));
+
+	if (initiate_cxl_reset) {
+		/* TODO: call linux CXL reset */
+	}
+	return size;
+}
+
+static ssize_t cxl_status_2_write(struct vfio_cxl_core_device *cxl, void *buf,
+				  u64 offset, u64 size)
+{
+	struct pci_dev *pdev = cxl->pci_core.pdev;
+	u16 cap3 = le16_to_cpu(*(u16 *)(cxl->config_virt + cxl->dvsec + 0x38));
+	u16 new_val = le16_to_cpu(*(u16 *)buf);
+
+	if (WARN_ON_ONCE(size != 2))
+		return -EINVAL;
+
+	/* write RW1CS if supports */
+	if ((cap3 & BIT(2)) && (new_val & BIT(3)))
+		pci_write_config_word(pdev, offset, BIT(3));
+
+	/* No need to update the virt regs, CXL status reads from the HW */
+	return size;
+}
+
+static ssize_t cxl_lock_write(struct vfio_cxl_core_device *cxl, void *buf,
+			      u64 offset, u64 size)
+{
+	u16 cur_val = le16_to_cpu(*(u16 *)(cxl->config_virt + offset));
+	u16 new_val = le16_to_cpu(*(u16 *)buf);
+	u16 rev_mask = GENMASK(15, 1);
+
+	if (WARN_ON_ONCE(size != 2))
+		return -EINVAL;
+
+	/* LOCK is not allowed to be cleared unless conventional reset. */
+	if (cur_val & BIT(0))
+		return size;
+
+	/* handle reserved bits in the spec */
+	new_val &= ~rev_mask;
+
+	new_val = cpu_to_le16(new_val);
+	memcpy(cxl->config_virt + offset, &new_val, size);
+	return size;
+}
+
+static ssize_t cxl_base_lo_write(struct vfio_cxl_core_device *cxl, void *buf,
+				 u64 offset, u64 size)
+{
+	u32 new_val = le32_to_cpu(*(u32 *)buf);
+	u32 rev_mask = GENMASK(27, 0);
+
+	if (WARN_ON_ONCE(size != 4))
+		return -EINVAL;
+
+	/* handle reserved bits in the spec */
+	new_val &= ~rev_mask;
+
+	new_val = cpu_to_le32(new_val);
+	memcpy(cxl->config_virt + offset, &new_val, size);
+	return size;
+}
+
+static ssize_t virt_config_reg_ro_write(struct vfio_cxl_core_device *cxl, void *buf,
+					u64 offset, u64 size)
+{
+	return size;
+}
+
+static int setup_config_emulation(struct vfio_cxl_core_device *cxl)
+{
+	u16 offset;
+	int ret;
+
+#define ALLOC_BLOCK(offset, size, read, write) do {		\
+	ret = new_config_block(cxl, offset, size, read, write); \
+	if (ret)						\
+		return ret;					\
+	} while (0)
+
+	ALLOC_BLOCK(cxl->dvsec, 4,
+		    virt_config_reg_read,
+		    virt_config_reg_ro_write);
+
+	ALLOC_BLOCK(cxl->dvsec + 0x4, 4,
+		    virt_config_reg_read,
+		    virt_config_reg_ro_write);
+
+	ALLOC_BLOCK(cxl->dvsec + 0x8, 2,
+		    virt_config_reg_read,
+		    virt_config_reg_ro_write);
+
+	/* CXL CAPABILITY */
+	ALLOC_BLOCK(cxl->dvsec + 0xa, 2,
+		    virt_config_reg_read,
+		    virt_config_reg_ro_write);
+
+	/* CXL CONTROL */
+	ALLOC_BLOCK(cxl->dvsec + 0xc, 2,
+		    virt_config_reg_read,
+		    cxl_control_write);
+
+	/* CXL STATUS */
+	ALLOC_BLOCK(cxl->dvsec + 0xe, 2,
+		    virt_config_reg_read,
+		    cxl_status_write);
+
+	/* CXL CONTROL 2 */
+	ALLOC_BLOCK(cxl->dvsec + 0x10, 2,
+		    virt_config_reg_read,
+		    cxl_control_2_write);
+
+	/* CXL STATUS 2 */
+	ALLOC_BLOCK(cxl->dvsec + 0x12, 2,
+		    hw_config_reg_read,
+		    cxl_status_2_write);
+
+	/* CXL LOCK */
+	ALLOC_BLOCK(cxl->dvsec + 0x14, 2,
+		    virt_config_reg_read,
+		    cxl_lock_write);
+
+	/* CXL CAPABILITY 2 */
+	ALLOC_BLOCK(cxl->dvsec + 0x16, 2,
+		    virt_config_reg_read,
+		    virt_config_reg_ro_write);
+
+	/* CXL RANGE 1 SIZE HIGH & LOW */
+	ALLOC_BLOCK(cxl->dvsec + 0x18, 4,
+		    virt_config_reg_read,
+		    virt_config_reg_ro_write);
+
+	ALLOC_BLOCK(cxl->dvsec + 0x1c, 4,
+		    virt_config_reg_read,
+		    virt_config_reg_ro_write);
+
+	/* CXL RANG BASE 1 HIGH */
+	ALLOC_BLOCK(cxl->dvsec + 0x20, 4,
+		    virt_config_reg_read,
+		    virt_config_reg_write);
+
+	/* CXL RANG BASE 1 LOW */
+	ALLOC_BLOCK(cxl->dvsec + 0x24, 4,
+		    virt_config_reg_read,
+		    cxl_base_lo_write);
+
+	/* CXL RANGE 2 SIZE HIGH & LOW */
+	ALLOC_BLOCK(cxl->dvsec + 0x28, 4,
+		    virt_config_reg_read,
+		    virt_config_reg_ro_write);
+
+	ALLOC_BLOCK(cxl->dvsec + 0x2c, 4,
+		    virt_config_reg_read,
+		    virt_config_reg_ro_write);
+
+	/* CXL RANG BASE 2 HIGH */
+	ALLOC_BLOCK(cxl->dvsec + 0x30, 4,
+		    virt_config_reg_read,
+		    virt_config_reg_write);
+
+	/* CXL RANG BASE 2 LOW */
+	ALLOC_BLOCK(cxl->dvsec + 0x34, 4,
+		    virt_config_reg_read,
+		    cxl_base_lo_write);
+
+	/* CXL CAPABILITY 3 */
+	ALLOC_BLOCK(cxl->dvsec + 0x38, 2,
+		    virt_config_reg_read,
+		    virt_config_reg_ro_write);
+
+	while ((offset = pci_find_next_ext_capability(cxl->pci_core.pdev,
+						      offset,
+						      PCI_EXT_CAP_ID_DOE))) {
+		ALLOC_BLOCK(offset + PCI_DOE_CTRL, 4,
+			    hw_config_reg_read,
+			    hw_config_reg_write);
+
+		ALLOC_BLOCK(offset + PCI_DOE_STATUS, 4,
+			    hw_config_reg_read,
+			    hw_config_reg_write);
+
+		ALLOC_BLOCK(offset + PCI_DOE_WRITE, 4,
+			    hw_config_reg_read,
+			    hw_config_reg_write);
+
+		ALLOC_BLOCK(offset + PCI_DOE_READ, 4,
+			    hw_config_reg_read,
+			    hw_config_reg_write);
+	}
+
+#undef ALLOC_BLOCK
+
+	return 0;
+}
+
 static int new_mmio_block(struct vfio_cxl_core_device *cxl, u64 offset, u64 size,
 			  reg_handler_t *read, reg_handler_t *write)
 {
@@ -179,10 +507,10 @@ static int setup_mmio_emulation(struct vfio_cxl_core_device *cxl)
 
 	base = hdm_reg_base(cxl);
 
-#define ALLOC_BLOCK(offset, size, read, write) do {			\
-		ret = new_mmio_block(cxl, offset, size, read, write);	\
-		if (ret)						\
-			return ret;					\
+#define ALLOC_BLOCK(offset, size, read, write) do { \
+	ret = new_mmio_block(cxl, offset, size, read, write); \
+	if (ret) \
+		return ret; \
 	} while (0)
 
 	ALLOC_BLOCK(base + 0x4, 4,
@@ -255,6 +583,10 @@ int vfio_cxl_core_setup_register_emulation(struct vfio_cxl_core_device *cxl)
 	INIT_LIST_HEAD(&cxl->config_regblocks_head);
 	INIT_LIST_HEAD(&cxl->mmio_regblocks_head);
 
+	ret = setup_config_emulation(cxl);
+	if (ret)
+		goto err;
+
 	ret = setup_mmio_emulation(cxl);
 	if (ret)
 		goto err;
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index 8f02f236b5b4..4847d09e58b4 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -120,8 +120,8 @@ struct perm_bits {
 #define	NO_WRITE	0
 #define	ALL_WRITE	0xFFFFFFFFU
 
-static int vfio_user_config_read(struct pci_dev *pdev, int offset,
-				 __le32 *val, int count)
+int vfio_user_config_read(struct pci_dev *pdev, int offset,
+			  __le32 *val, int count)
 {
 	int ret = -EINVAL;
 	u32 tmp_val = 0;
@@ -150,9 +150,10 @@ static int vfio_user_config_read(struct pci_dev *pdev, int offset,
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(vfio_user_config_read);
 
-static int vfio_user_config_write(struct pci_dev *pdev, int offset,
-				  __le32 val, int count)
+int vfio_user_config_write(struct pci_dev *pdev, int offset,
+			   __le32 val, int count)
 {
 	int ret = -EINVAL;
 	u32 tmp_val = le32_to_cpu(val);
@@ -171,6 +172,7 @@ static int vfio_user_config_write(struct pci_dev *pdev, int offset,
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(vfio_user_config_write);
 
 static int vfio_default_config_read(struct vfio_pci_core_device *vdev, int pos,
 				    int count, struct perm_bits *perm,
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 31fd28626846..8293910e0a96 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -201,6 +201,10 @@ ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
 			       void __iomem *io, char __user *buf,
 			       loff_t off, size_t count, size_t x_start,
 			       size_t x_end, bool iswrite);
+int vfio_user_config_read(struct pci_dev *pdev, int offset,
+			  __le32 *val, int count);
+int vfio_user_config_write(struct pci_dev *pdev, int offset,
+			   __le32 val, int count);
 bool vfio_pci_core_range_intersect_range(loff_t buf_start, size_t buf_cnt,
 					 loff_t reg_start, size_t reg_cnt,
 					 loff_t *buf_offset,
-- 
2.25.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ