[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <80c04058-833b-4056-b47c-54a3a50f5f89@intel.com>
Date: Thu, 11 Dec 2025 11:13:21 -0700
From: Dave Jiang <dave.jiang@...el.com>
To: mhonap@...dia.com, aniketa@...dia.com, ankita@...dia.com,
alwilliamson@...dia.com, vsethi@...dia.com, jgg@...dia.com,
mochs@...dia.com, skolothumtho@...dia.com, alejandro.lucero-palau@....com,
dave@...olabs.net, jonathan.cameron@...wei.com, alison.schofield@...el.com,
vishal.l.verma@...el.com, ira.weiny@...el.com, dan.j.williams@...el.com,
jgg@...pe.ca, yishaih@...dia.com, kevin.tian@...el.com
Cc: cjia@...dia.com, kwankhede@...dia.com, targupta@...dia.com,
zhiw@...dia.com, kjaju@...dia.com, linux-kernel@...r.kernel.org,
linux-cxl@...r.kernel.org, kvm@...r.kernel.org
Subject: Re: [RFC v2 11/15] vfio/cxl: introduce the emulation of HDM registers
On 12/9/25 9:50 AM, mhonap@...dia.com wrote:
> From: Manish Honap <mhonap@...dia.com>
>
> CXL devices have HDM registers in its CXL MMIO bar. Many HDM registers
> requires a PA and they are owned by the host in virtualization.
>
> Thus, the HDM registers needs to be emulated accordingly so that the
> guest kernel CXL core can configure the virtual HDM decoders.
>
> Intorduce the emulation of HDM registers that emulates the HDM decoders.
>
> Co-developed-by: Zhi Wang <zhiw@...dia.com>
> Signed-off-by: Zhi Wang <zhiw@...dia.com>
> Signed-off-by: Manish Honap <mhonap@...dia.com>
> ---
> drivers/vfio/pci/vfio_cxl_core.c | 7 +-
> drivers/vfio/pci/vfio_cxl_core_emu.c | 242 +++++++++++++++++++++++++++
> include/linux/vfio_pci_core.h | 2 +
> 3 files changed, 248 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/vfio/pci/vfio_cxl_core.c b/drivers/vfio/pci/vfio_cxl_core.c
> index cb75e9f668a7..c0bdf55997da 100644
> --- a/drivers/vfio/pci/vfio_cxl_core.c
> +++ b/drivers/vfio/pci/vfio_cxl_core.c
> @@ -247,8 +247,6 @@ int vfio_cxl_core_enable(struct vfio_cxl_core_device *cxl,
> if (!dvsec)
> return -ENODEV;
>
> - cxl->dvsec = dvsec;
> -
> cxl_core = devm_cxl_dev_state_create(&pdev->dev, CXL_DEVTYPE_DEVMEM,
> pdev->dev.id, dvsec, struct vfio_cxl,
> cxlds, false);
> @@ -257,9 +255,12 @@ int vfio_cxl_core_enable(struct vfio_cxl_core_device *cxl,
> return -ENOMEM;
> }
>
> + cxl->dvsec = dvsec;
> + cxl->cxl_core = cxl_core;
> +
> ret = find_comp_regs(cxl);
> if (ret)
> - return -ENODEV;
> + return ret;
>
> ret = setup_virt_regs(cxl);
> if (ret)
> diff --git a/drivers/vfio/pci/vfio_cxl_core_emu.c b/drivers/vfio/pci/vfio_cxl_core_emu.c
> index a0674bacecd7..6711ff8975ef 100644
> --- a/drivers/vfio/pci/vfio_cxl_core_emu.c
> +++ b/drivers/vfio/pci/vfio_cxl_core_emu.c
> @@ -5,6 +5,239 @@
>
> #include "vfio_cxl_core_priv.h"
>
> +typedef ssize_t reg_handler_t(struct vfio_cxl_core_device *cxl, void *buf,
> + u64 offset, u64 size);
> +
> +static struct vfio_emulated_regblock *
> +new_reg_block(struct vfio_cxl_core_device *cxl, u64 offset, u64 size,
> + reg_handler_t *read, reg_handler_t *write)
> +{
> + struct vfio_emulated_regblock *block;
> +
> + block = kzalloc(sizeof(*block), GFP_KERNEL);
> + if (!block)
> + return ERR_PTR(-ENOMEM);
> +
> + block->range.start = offset;
> + block->range.end = offset + size - 1;
> + block->read = read;
> + block->write = write;
> +
> + INIT_LIST_HEAD(&block->list);
> +
> + return block;
> +}
> +
> +static int new_mmio_block(struct vfio_cxl_core_device *cxl, u64 offset, u64 size,
> + reg_handler_t *read, reg_handler_t *write)
> +{
> + struct vfio_emulated_regblock *block;
> +
> + block = new_reg_block(cxl, offset, size, read, write);
> + if (IS_ERR(block))
> + return PTR_ERR(block);
> +
> + list_add_tail(&block->list, &cxl->mmio_regblocks_head);
> + return 0;
> +}
> +
> +static u64 hdm_reg_base(struct vfio_cxl_core_device *cxl)
> +{
> + return cxl->comp_reg_offset + cxl->hdm_reg_offset;
> +}
> +
> +static u64 to_hdm_reg_offset(struct vfio_cxl_core_device *cxl, u64 offset)
> +{
> + return offset - hdm_reg_base(cxl);
> +}
> +
> +static void *hdm_reg_virt(struct vfio_cxl_core_device *cxl, u64 hdm_reg_offset)
> +{
> + return cxl->comp_reg_virt + cxl->hdm_reg_offset + hdm_reg_offset;
> +}
> +
> +static ssize_t virt_hdm_reg_read(struct vfio_cxl_core_device *cxl, void *buf,
> + u64 offset, u64 size)
> +{
> + offset = to_hdm_reg_offset(cxl, offset);
> + memcpy(buf, hdm_reg_virt(cxl, offset), size);
> +
> + return size;
> +}
> +
> +static ssize_t virt_hdm_reg_write(struct vfio_cxl_core_device *cxl, void *buf,
> + u64 offset, u64 size)
> +{
> + offset = to_hdm_reg_offset(cxl, offset);
> + memcpy(hdm_reg_virt(cxl, offset), buf, size);
> +
> + return size;
> +}
> +
> +static ssize_t virt_hdm_rev_reg_write(struct vfio_cxl_core_device *cxl,
> + void *buf, u64 offset, u64 size)
> +{
> + /* Discard writes on reserved registers. */
> + return size;
> +}
> +
> +static ssize_t hdm_decoder_n_lo_write(struct vfio_cxl_core_device *cxl,
> + void *buf, u64 offset, u64 size)
> +{
> + u32 new_val = le32_to_cpu(*(u32 *)buf);
> +
> + if (WARN_ON_ONCE(size != 4))
> + return -EINVAL;
> +
> + /* Bit [27:0] are reserved. */
> + new_val &= ~GENMASK(27, 0);
maybe define the mask
> +
> + new_val = cpu_to_le32(new_val);
> + offset = to_hdm_reg_offset(cxl, offset);
> + memcpy(hdm_reg_virt(cxl, offset), &new_val, size);
> + return size;
> +}
> +
> +static ssize_t hdm_decoder_global_ctrl_write(struct vfio_cxl_core_device *cxl,
> + void *buf, u64 offset, u64 size)
> +{
> + u32 hdm_decoder_global_cap;
> + u32 new_val = le32_to_cpu(*(u32 *)buf);
> +
> + if (WARN_ON_ONCE(size != 4))
> + return -EINVAL;
> +
> + /* Bit [31:2] are reserved. */
> + new_val &= ~GENMASK(31, 2);
same here re mask
> +
> + /* Poison On Decode Error Enable bit is 0 and RO if not support. */
> + hdm_decoder_global_cap = le32_to_cpu(*(u32 *)hdm_reg_virt(cxl, 0));
> + if (!(hdm_decoder_global_cap & BIT(10)))
> + new_val &= ~BIT(0);
Would be good to define the register bits to ease reading the code
> +
> + new_val = cpu_to_le32(new_val);
> + offset = to_hdm_reg_offset(cxl, offset);
> + memcpy(hdm_reg_virt(cxl, offset), &new_val, size);
> + return size;
> +}
> +
> +static ssize_t hdm_decoder_n_ctrl_write(struct vfio_cxl_core_device *cxl,
> + void *buf, u64 offset, u64 size)
> +{
> + u32 hdm_decoder_global_cap;
> + u32 ro_mask, rev_mask;
> + u32 new_val = le32_to_cpu(*(u32 *)buf);
> + u32 cur_val;
> +
> + if (WARN_ON_ONCE(size != 4))
> + return -EINVAL;
> +
> + offset = to_hdm_reg_offset(cxl, offset);
> + cur_val = le32_to_cpu(*(u32 *)hdm_reg_virt(cxl, offset));
> +
> + /* Lock on commit */
> + if (cur_val & BIT(8))
define bit(s). same comment for the rest of the patch.
DJ
> + return size;
> +
> + hdm_decoder_global_cap = le32_to_cpu(*(u32 *)hdm_reg_virt(cxl, 0));
> +
> + /* RO and reserved bits in the spec */
> + ro_mask = BIT(10) | BIT(11);
> + rev_mask = BIT(15) | GENMASK(31, 28);
> +
> + /* bits are not valid for devices */
> + ro_mask |= BIT(12);
> + rev_mask |= GENMASK(19, 16) | GENMASK(23, 20);
> +
> + /* bits are reserved when UIO is not supported */
> + if (!(hdm_decoder_global_cap & BIT(13)))
> + rev_mask |= BIT(14) | GENMASK(27, 24);
> +
> + /* clear reserved bits */
> + new_val &= ~rev_mask;
> +
> + /* keep the RO bits */
> + cur_val &= ro_mask;
> + new_val &= ~ro_mask;
> + new_val |= cur_val;
> +
> + /* emulate HDM decoder commit/de-commit */
> + if (new_val & BIT(9))
> + new_val |= BIT(10);
> + else
> + new_val &= ~BIT(10);
> +
> + new_val = cpu_to_le32(new_val);
> + memcpy(hdm_reg_virt(cxl, offset), &new_val, size);
> + return size;
> +}
> +
> +static int setup_mmio_emulation(struct vfio_cxl_core_device *cxl)
> +{
> + u64 offset, base;
> + int ret;
> +
> + base = hdm_reg_base(cxl);
> +
> +#define ALLOC_BLOCK(offset, size, read, write) do { \
> + ret = new_mmio_block(cxl, offset, size, read, write); \
> + if (ret) \
> + return ret; \
> + } while (0)
> +
> + ALLOC_BLOCK(base + 0x4, 4,
> + virt_hdm_reg_read,
> + hdm_decoder_global_ctrl_write);
> +
> + offset = base + 0x10;
> + while (offset < base + cxl->hdm_reg_size) {
> + /* HDM N BASE LOW */
> + ALLOC_BLOCK(offset, 4,
> + virt_hdm_reg_read,
> + hdm_decoder_n_lo_write);
> +
> + /* HDM N BASE HIGH */
> + ALLOC_BLOCK(offset + 0x4, 4,
> + virt_hdm_reg_read,
> + virt_hdm_reg_write);
> +
> + /* HDM N SIZE LOW */
> + ALLOC_BLOCK(offset + 0x8, 4,
> + virt_hdm_reg_read,
> + hdm_decoder_n_lo_write);
> +
> + /* HDM N SIZE HIGH */
> + ALLOC_BLOCK(offset + 0xc, 4,
> + virt_hdm_reg_read,
> + virt_hdm_reg_write);
> +
> + /* HDM N CONTROL */
> + ALLOC_BLOCK(offset + 0x10, 4,
> + virt_hdm_reg_read,
> + hdm_decoder_n_ctrl_write);
> +
> + /* HDM N TARGET LIST LOW */
> + ALLOC_BLOCK(offset + 0x14, 0x4,
> + virt_hdm_reg_read,
> + virt_hdm_rev_reg_write);
> +
> + /* HDM N TARGET LIST HIGH */
> + ALLOC_BLOCK(offset + 0x18, 0x4,
> + virt_hdm_reg_read,
> + virt_hdm_rev_reg_write);
> +
> + /* HDM N REV */
> + ALLOC_BLOCK(offset + 0x1c, 0x4,
> + virt_hdm_reg_read,
> + virt_hdm_rev_reg_write);
> +
> + offset += 0x20;
> + }
> +
> +#undef ALLOC_BLOCK
> + return 0;
> +}
> +
> void vfio_cxl_core_clean_register_emulation(struct vfio_cxl_core_device *cxl)
> {
> struct list_head *pos, *n;
> @@ -17,10 +250,19 @@ void vfio_cxl_core_clean_register_emulation(struct vfio_cxl_core_device *cxl)
>
> int vfio_cxl_core_setup_register_emulation(struct vfio_cxl_core_device *cxl)
> {
> + int ret;
> +
> INIT_LIST_HEAD(&cxl->config_regblocks_head);
> INIT_LIST_HEAD(&cxl->mmio_regblocks_head);
>
> + ret = setup_mmio_emulation(cxl);
> + if (ret)
> + goto err;
> +
> return 0;
> +err:
> + vfio_cxl_core_clean_register_emulation(cxl);
> + return ret;
> }
>
> static struct vfio_emulated_regblock *
> diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
> index 12ded67c7db7..31fd28626846 100644
> --- a/include/linux/vfio_pci_core.h
> +++ b/include/linux/vfio_pci_core.h
> @@ -251,5 +251,7 @@ ssize_t vfio_cxl_core_write(struct vfio_device *core_vdev, const char __user *bu
> size_t count, loff_t *ppos);
> long vfio_cxl_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
> unsigned long arg);
> +int vfio_cxl_core_setup_register_emulation(struct vfio_cxl_core_device *cxl);
> +void vfio_cxl_core_clean_register_emulation(struct vfio_cxl_core_device *cxl);
>
> #endif /* VFIO_PCI_CORE_H */
Powered by blists - more mailing lists