[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251209165019.2643142-12-mhonap@nvidia.com>
Date: Tue, 9 Dec 2025 22:20:15 +0530
From: <mhonap@...dia.com>
To: <aniketa@...dia.com>, <ankita@...dia.com>, <alwilliamson@...dia.com>,
<vsethi@...dia.com>, <jgg@...dia.com>, <mochs@...dia.com>,
<skolothumtho@...dia.com>, <alejandro.lucero-palau@....com>,
<dave@...olabs.net>, <jonathan.cameron@...wei.com>, <dave.jiang@...el.com>,
<alison.schofield@...el.com>, <vishal.l.verma@...el.com>,
<ira.weiny@...el.com>, <dan.j.williams@...el.com>, <jgg@...pe.ca>,
<yishaih@...dia.com>, <kevin.tian@...el.com>
CC: <cjia@...dia.com>, <kwankhede@...dia.com>, <targupta@...dia.com>,
<zhiw@...dia.com>, <kjaju@...dia.com>, <linux-kernel@...r.kernel.org>,
<linux-cxl@...r.kernel.org>, <kvm@...r.kernel.org>, <mhonap@...dia.com>
Subject: [RFC v2 11/15] vfio/cxl: introduce the emulation of HDM registers
From: Manish Honap <mhonap@...dia.com>
CXL devices have HDM registers in its CXL MMIO bar. Many HDM registers
requires a PA and they are owned by the host in virtualization.
Thus, the HDM registers needs to be emulated accordingly so that the
guest kernel CXL core can configure the virtual HDM decoders.
Intorduce the emulation of HDM registers that emulates the HDM decoders.
Co-developed-by: Zhi Wang <zhiw@...dia.com>
Signed-off-by: Zhi Wang <zhiw@...dia.com>
Signed-off-by: Manish Honap <mhonap@...dia.com>
---
drivers/vfio/pci/vfio_cxl_core.c | 7 +-
drivers/vfio/pci/vfio_cxl_core_emu.c | 242 +++++++++++++++++++++++++++
include/linux/vfio_pci_core.h | 2 +
3 files changed, 248 insertions(+), 3 deletions(-)
diff --git a/drivers/vfio/pci/vfio_cxl_core.c b/drivers/vfio/pci/vfio_cxl_core.c
index cb75e9f668a7..c0bdf55997da 100644
--- a/drivers/vfio/pci/vfio_cxl_core.c
+++ b/drivers/vfio/pci/vfio_cxl_core.c
@@ -247,8 +247,6 @@ int vfio_cxl_core_enable(struct vfio_cxl_core_device *cxl,
if (!dvsec)
return -ENODEV;
- cxl->dvsec = dvsec;
-
cxl_core = devm_cxl_dev_state_create(&pdev->dev, CXL_DEVTYPE_DEVMEM,
pdev->dev.id, dvsec, struct vfio_cxl,
cxlds, false);
@@ -257,9 +255,12 @@ int vfio_cxl_core_enable(struct vfio_cxl_core_device *cxl,
return -ENOMEM;
}
+ cxl->dvsec = dvsec;
+ cxl->cxl_core = cxl_core;
+
ret = find_comp_regs(cxl);
if (ret)
- return -ENODEV;
+ return ret;
ret = setup_virt_regs(cxl);
if (ret)
diff --git a/drivers/vfio/pci/vfio_cxl_core_emu.c b/drivers/vfio/pci/vfio_cxl_core_emu.c
index a0674bacecd7..6711ff8975ef 100644
--- a/drivers/vfio/pci/vfio_cxl_core_emu.c
+++ b/drivers/vfio/pci/vfio_cxl_core_emu.c
@@ -5,6 +5,239 @@
#include "vfio_cxl_core_priv.h"
+typedef ssize_t reg_handler_t(struct vfio_cxl_core_device *cxl, void *buf,
+ u64 offset, u64 size);
+
+static struct vfio_emulated_regblock *
+new_reg_block(struct vfio_cxl_core_device *cxl, u64 offset, u64 size,
+ reg_handler_t *read, reg_handler_t *write)
+{
+ struct vfio_emulated_regblock *block;
+
+ block = kzalloc(sizeof(*block), GFP_KERNEL);
+ if (!block)
+ return ERR_PTR(-ENOMEM);
+
+ block->range.start = offset;
+ block->range.end = offset + size - 1;
+ block->read = read;
+ block->write = write;
+
+ INIT_LIST_HEAD(&block->list);
+
+ return block;
+}
+
+static int new_mmio_block(struct vfio_cxl_core_device *cxl, u64 offset, u64 size,
+ reg_handler_t *read, reg_handler_t *write)
+{
+ struct vfio_emulated_regblock *block;
+
+ block = new_reg_block(cxl, offset, size, read, write);
+ if (IS_ERR(block))
+ return PTR_ERR(block);
+
+ list_add_tail(&block->list, &cxl->mmio_regblocks_head);
+ return 0;
+}
+
+static u64 hdm_reg_base(struct vfio_cxl_core_device *cxl)
+{
+ return cxl->comp_reg_offset + cxl->hdm_reg_offset;
+}
+
+static u64 to_hdm_reg_offset(struct vfio_cxl_core_device *cxl, u64 offset)
+{
+ return offset - hdm_reg_base(cxl);
+}
+
+static void *hdm_reg_virt(struct vfio_cxl_core_device *cxl, u64 hdm_reg_offset)
+{
+ return cxl->comp_reg_virt + cxl->hdm_reg_offset + hdm_reg_offset;
+}
+
+static ssize_t virt_hdm_reg_read(struct vfio_cxl_core_device *cxl, void *buf,
+ u64 offset, u64 size)
+{
+ offset = to_hdm_reg_offset(cxl, offset);
+ memcpy(buf, hdm_reg_virt(cxl, offset), size);
+
+ return size;
+}
+
+static ssize_t virt_hdm_reg_write(struct vfio_cxl_core_device *cxl, void *buf,
+ u64 offset, u64 size)
+{
+ offset = to_hdm_reg_offset(cxl, offset);
+ memcpy(hdm_reg_virt(cxl, offset), buf, size);
+
+ return size;
+}
+
+static ssize_t virt_hdm_rev_reg_write(struct vfio_cxl_core_device *cxl,
+ void *buf, u64 offset, u64 size)
+{
+ /* Discard writes on reserved registers. */
+ return size;
+}
+
+static ssize_t hdm_decoder_n_lo_write(struct vfio_cxl_core_device *cxl,
+ void *buf, u64 offset, u64 size)
+{
+ u32 new_val = le32_to_cpu(*(u32 *)buf);
+
+ if (WARN_ON_ONCE(size != 4))
+ return -EINVAL;
+
+ /* Bit [27:0] are reserved. */
+ new_val &= ~GENMASK(27, 0);
+
+ new_val = cpu_to_le32(new_val);
+ offset = to_hdm_reg_offset(cxl, offset);
+ memcpy(hdm_reg_virt(cxl, offset), &new_val, size);
+ return size;
+}
+
+static ssize_t hdm_decoder_global_ctrl_write(struct vfio_cxl_core_device *cxl,
+ void *buf, u64 offset, u64 size)
+{
+ u32 hdm_decoder_global_cap;
+ u32 new_val = le32_to_cpu(*(u32 *)buf);
+
+ if (WARN_ON_ONCE(size != 4))
+ return -EINVAL;
+
+ /* Bit [31:2] are reserved. */
+ new_val &= ~GENMASK(31, 2);
+
+ /* Poison On Decode Error Enable bit is 0 and RO if not support. */
+ hdm_decoder_global_cap = le32_to_cpu(*(u32 *)hdm_reg_virt(cxl, 0));
+ if (!(hdm_decoder_global_cap & BIT(10)))
+ new_val &= ~BIT(0);
+
+ new_val = cpu_to_le32(new_val);
+ offset = to_hdm_reg_offset(cxl, offset);
+ memcpy(hdm_reg_virt(cxl, offset), &new_val, size);
+ return size;
+}
+
+static ssize_t hdm_decoder_n_ctrl_write(struct vfio_cxl_core_device *cxl,
+ void *buf, u64 offset, u64 size)
+{
+ u32 hdm_decoder_global_cap;
+ u32 ro_mask, rev_mask;
+ u32 new_val = le32_to_cpu(*(u32 *)buf);
+ u32 cur_val;
+
+ if (WARN_ON_ONCE(size != 4))
+ return -EINVAL;
+
+ offset = to_hdm_reg_offset(cxl, offset);
+ cur_val = le32_to_cpu(*(u32 *)hdm_reg_virt(cxl, offset));
+
+ /* Lock on commit */
+ if (cur_val & BIT(8))
+ return size;
+
+ hdm_decoder_global_cap = le32_to_cpu(*(u32 *)hdm_reg_virt(cxl, 0));
+
+ /* RO and reserved bits in the spec */
+ ro_mask = BIT(10) | BIT(11);
+ rev_mask = BIT(15) | GENMASK(31, 28);
+
+ /* bits are not valid for devices */
+ ro_mask |= BIT(12);
+ rev_mask |= GENMASK(19, 16) | GENMASK(23, 20);
+
+ /* bits are reserved when UIO is not supported */
+ if (!(hdm_decoder_global_cap & BIT(13)))
+ rev_mask |= BIT(14) | GENMASK(27, 24);
+
+ /* clear reserved bits */
+ new_val &= ~rev_mask;
+
+ /* keep the RO bits */
+ cur_val &= ro_mask;
+ new_val &= ~ro_mask;
+ new_val |= cur_val;
+
+ /* emulate HDM decoder commit/de-commit */
+ if (new_val & BIT(9))
+ new_val |= BIT(10);
+ else
+ new_val &= ~BIT(10);
+
+ new_val = cpu_to_le32(new_val);
+ memcpy(hdm_reg_virt(cxl, offset), &new_val, size);
+ return size;
+}
+
+static int setup_mmio_emulation(struct vfio_cxl_core_device *cxl)
+{
+ u64 offset, base;
+ int ret;
+
+ base = hdm_reg_base(cxl);
+
+#define ALLOC_BLOCK(offset, size, read, write) do { \
+ ret = new_mmio_block(cxl, offset, size, read, write); \
+ if (ret) \
+ return ret; \
+ } while (0)
+
+ ALLOC_BLOCK(base + 0x4, 4,
+ virt_hdm_reg_read,
+ hdm_decoder_global_ctrl_write);
+
+ offset = base + 0x10;
+ while (offset < base + cxl->hdm_reg_size) {
+ /* HDM N BASE LOW */
+ ALLOC_BLOCK(offset, 4,
+ virt_hdm_reg_read,
+ hdm_decoder_n_lo_write);
+
+ /* HDM N BASE HIGH */
+ ALLOC_BLOCK(offset + 0x4, 4,
+ virt_hdm_reg_read,
+ virt_hdm_reg_write);
+
+ /* HDM N SIZE LOW */
+ ALLOC_BLOCK(offset + 0x8, 4,
+ virt_hdm_reg_read,
+ hdm_decoder_n_lo_write);
+
+ /* HDM N SIZE HIGH */
+ ALLOC_BLOCK(offset + 0xc, 4,
+ virt_hdm_reg_read,
+ virt_hdm_reg_write);
+
+ /* HDM N CONTROL */
+ ALLOC_BLOCK(offset + 0x10, 4,
+ virt_hdm_reg_read,
+ hdm_decoder_n_ctrl_write);
+
+ /* HDM N TARGET LIST LOW */
+ ALLOC_BLOCK(offset + 0x14, 0x4,
+ virt_hdm_reg_read,
+ virt_hdm_rev_reg_write);
+
+ /* HDM N TARGET LIST HIGH */
+ ALLOC_BLOCK(offset + 0x18, 0x4,
+ virt_hdm_reg_read,
+ virt_hdm_rev_reg_write);
+
+ /* HDM N REV */
+ ALLOC_BLOCK(offset + 0x1c, 0x4,
+ virt_hdm_reg_read,
+ virt_hdm_rev_reg_write);
+
+ offset += 0x20;
+ }
+
+#undef ALLOC_BLOCK
+ return 0;
+}
+
void vfio_cxl_core_clean_register_emulation(struct vfio_cxl_core_device *cxl)
{
struct list_head *pos, *n;
@@ -17,10 +250,19 @@ void vfio_cxl_core_clean_register_emulation(struct vfio_cxl_core_device *cxl)
int vfio_cxl_core_setup_register_emulation(struct vfio_cxl_core_device *cxl)
{
+ int ret;
+
INIT_LIST_HEAD(&cxl->config_regblocks_head);
INIT_LIST_HEAD(&cxl->mmio_regblocks_head);
+ ret = setup_mmio_emulation(cxl);
+ if (ret)
+ goto err;
+
return 0;
+err:
+ vfio_cxl_core_clean_register_emulation(cxl);
+ return ret;
}
static struct vfio_emulated_regblock *
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 12ded67c7db7..31fd28626846 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -251,5 +251,7 @@ ssize_t vfio_cxl_core_write(struct vfio_device *core_vdev, const char __user *bu
size_t count, loff_t *ppos);
long vfio_cxl_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
unsigned long arg);
+int vfio_cxl_core_setup_register_emulation(struct vfio_cxl_core_device *cxl);
+void vfio_cxl_core_clean_register_emulation(struct vfio_cxl_core_device *cxl);
#endif /* VFIO_PCI_CORE_H */
--
2.25.1
Powered by blists - more mailing lists