lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <80c04058-833b-4056-b47c-54a3a50f5f89@intel.com>
Date: Thu, 11 Dec 2025 11:13:21 -0700
From: Dave Jiang <dave.jiang@...el.com>
To: mhonap@...dia.com, aniketa@...dia.com, ankita@...dia.com,
 alwilliamson@...dia.com, vsethi@...dia.com, jgg@...dia.com,
 mochs@...dia.com, skolothumtho@...dia.com, alejandro.lucero-palau@....com,
 dave@...olabs.net, jonathan.cameron@...wei.com, alison.schofield@...el.com,
 vishal.l.verma@...el.com, ira.weiny@...el.com, dan.j.williams@...el.com,
 jgg@...pe.ca, yishaih@...dia.com, kevin.tian@...el.com
Cc: cjia@...dia.com, kwankhede@...dia.com, targupta@...dia.com,
 zhiw@...dia.com, kjaju@...dia.com, linux-kernel@...r.kernel.org,
 linux-cxl@...r.kernel.org, kvm@...r.kernel.org
Subject: Re: [RFC v2 11/15] vfio/cxl: introduce the emulation of HDM registers



On 12/9/25 9:50 AM, mhonap@...dia.com wrote:
> From: Manish Honap <mhonap@...dia.com>
> 
> CXL devices have HDM registers in its CXL MMIO bar. Many HDM registers
> requires a PA and they are owned by the host in virtualization.
> 
> Thus, the HDM registers needs to be emulated accordingly so that the
> guest kernel CXL core can configure the virtual HDM decoders.
> 
> Intorduce the emulation of HDM registers that emulates the HDM decoders.
> 
> Co-developed-by: Zhi Wang <zhiw@...dia.com>
> Signed-off-by: Zhi Wang <zhiw@...dia.com>
> Signed-off-by: Manish Honap <mhonap@...dia.com>
> ---
>  drivers/vfio/pci/vfio_cxl_core.c     |   7 +-
>  drivers/vfio/pci/vfio_cxl_core_emu.c | 242 +++++++++++++++++++++++++++
>  include/linux/vfio_pci_core.h        |   2 +
>  3 files changed, 248 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_cxl_core.c b/drivers/vfio/pci/vfio_cxl_core.c
> index cb75e9f668a7..c0bdf55997da 100644
> --- a/drivers/vfio/pci/vfio_cxl_core.c
> +++ b/drivers/vfio/pci/vfio_cxl_core.c
> @@ -247,8 +247,6 @@ int vfio_cxl_core_enable(struct vfio_cxl_core_device *cxl,
>  	if (!dvsec)
>  		return -ENODEV;
>  
> -	cxl->dvsec = dvsec;
> -
>  	cxl_core = devm_cxl_dev_state_create(&pdev->dev, CXL_DEVTYPE_DEVMEM,
>  					     pdev->dev.id, dvsec, struct vfio_cxl,
>  					     cxlds, false);
> @@ -257,9 +255,12 @@ int vfio_cxl_core_enable(struct vfio_cxl_core_device *cxl,
>  		return -ENOMEM;
>  	}
>  
> +	cxl->dvsec = dvsec;
> +	cxl->cxl_core = cxl_core;
> +
>  	ret = find_comp_regs(cxl);
>  	if (ret)
> -		return -ENODEV;
> +		return ret;
>  
>  	ret = setup_virt_regs(cxl);
>  	if (ret)
> diff --git a/drivers/vfio/pci/vfio_cxl_core_emu.c b/drivers/vfio/pci/vfio_cxl_core_emu.c
> index a0674bacecd7..6711ff8975ef 100644
> --- a/drivers/vfio/pci/vfio_cxl_core_emu.c
> +++ b/drivers/vfio/pci/vfio_cxl_core_emu.c
> @@ -5,6 +5,239 @@
>  
>  #include "vfio_cxl_core_priv.h"
>  
> +typedef ssize_t reg_handler_t(struct vfio_cxl_core_device *cxl, void *buf,
> +			      u64 offset, u64 size);
> +
> +static struct vfio_emulated_regblock *
> +new_reg_block(struct vfio_cxl_core_device *cxl, u64 offset, u64 size,
> +	      reg_handler_t *read, reg_handler_t *write)
> +{
> +	struct vfio_emulated_regblock *block;
> +
> +	block = kzalloc(sizeof(*block), GFP_KERNEL);
> +	if (!block)
> +		return ERR_PTR(-ENOMEM);
> +
> +	block->range.start = offset;
> +	block->range.end = offset + size - 1;
> +	block->read = read;
> +	block->write = write;
> +
> +	INIT_LIST_HEAD(&block->list);
> +
> +	return block;
> +}
> +
> +static int new_mmio_block(struct vfio_cxl_core_device *cxl, u64 offset, u64 size,
> +			  reg_handler_t *read, reg_handler_t *write)
> +{
> +	struct vfio_emulated_regblock *block;
> +
> +	block = new_reg_block(cxl, offset, size, read, write);
> +	if (IS_ERR(block))
> +		return PTR_ERR(block);
> +
> +	list_add_tail(&block->list, &cxl->mmio_regblocks_head);
> +	return 0;
> +}
> +
> +static u64 hdm_reg_base(struct vfio_cxl_core_device *cxl)
> +{
> +	return cxl->comp_reg_offset + cxl->hdm_reg_offset;
> +}
> +
> +static u64 to_hdm_reg_offset(struct vfio_cxl_core_device *cxl, u64 offset)
> +{
> +	return offset - hdm_reg_base(cxl);
> +}
> +
> +static void *hdm_reg_virt(struct vfio_cxl_core_device *cxl, u64 hdm_reg_offset)
> +{
> +	return cxl->comp_reg_virt + cxl->hdm_reg_offset + hdm_reg_offset;
> +}
> +
> +static ssize_t virt_hdm_reg_read(struct vfio_cxl_core_device *cxl, void *buf,
> +				 u64 offset, u64 size)
> +{
> +	offset = to_hdm_reg_offset(cxl, offset);
> +	memcpy(buf, hdm_reg_virt(cxl, offset), size);
> +
> +	return size;
> +}
> +
> +static ssize_t virt_hdm_reg_write(struct vfio_cxl_core_device *cxl, void *buf,
> +				  u64 offset, u64 size)
> +{
> +	offset = to_hdm_reg_offset(cxl, offset);
> +	memcpy(hdm_reg_virt(cxl, offset), buf, size);
> +
> +	return size;
> +}
> +
> +static ssize_t virt_hdm_rev_reg_write(struct vfio_cxl_core_device *cxl,
> +				      void *buf, u64 offset, u64 size)
> +{
> +	/* Discard writes on reserved registers. */
> +	return size;
> +}
> +
> +static ssize_t hdm_decoder_n_lo_write(struct vfio_cxl_core_device *cxl,
> +				      void *buf, u64 offset, u64 size)
> +{
> +	u32 new_val = le32_to_cpu(*(u32 *)buf);
> +
> +	if (WARN_ON_ONCE(size != 4))
> +		return -EINVAL;
> +
> +	/* Bit [27:0] are reserved. */
> +	new_val &= ~GENMASK(27, 0);

maybe define the mask

> +
> +	new_val = cpu_to_le32(new_val);
> +	offset = to_hdm_reg_offset(cxl, offset);
> +	memcpy(hdm_reg_virt(cxl, offset), &new_val, size);
> +	return size;
> +}
> +
> +static ssize_t hdm_decoder_global_ctrl_write(struct vfio_cxl_core_device *cxl,
> +					     void *buf, u64 offset, u64 size)
> +{
> +	u32 hdm_decoder_global_cap;
> +	u32 new_val = le32_to_cpu(*(u32 *)buf);
> +
> +	if (WARN_ON_ONCE(size != 4))
> +		return -EINVAL;
> +
> +	/* Bit [31:2] are reserved. */
> +	new_val &= ~GENMASK(31, 2);

same here re mask

> +
> +	/* Poison On Decode Error Enable bit is 0 and RO if not support. */
> +	hdm_decoder_global_cap = le32_to_cpu(*(u32 *)hdm_reg_virt(cxl, 0));
> +	if (!(hdm_decoder_global_cap & BIT(10)))
> +		new_val &= ~BIT(0);

Would be good to define the register bits to ease reading the code

> +
> +	new_val = cpu_to_le32(new_val);
> +	offset = to_hdm_reg_offset(cxl, offset);
> +	memcpy(hdm_reg_virt(cxl, offset), &new_val, size);
> +	return size;
> +}
> +
> +static ssize_t hdm_decoder_n_ctrl_write(struct vfio_cxl_core_device *cxl,
> +					void *buf, u64 offset, u64 size)
> +{
> +	u32 hdm_decoder_global_cap;
> +	u32 ro_mask, rev_mask;
> +	u32 new_val = le32_to_cpu(*(u32 *)buf);
> +	u32 cur_val;
> +
> +	if (WARN_ON_ONCE(size != 4))
> +		return -EINVAL;
> +
> +	offset = to_hdm_reg_offset(cxl, offset);
> +	cur_val = le32_to_cpu(*(u32 *)hdm_reg_virt(cxl, offset));
> +
> +	/* Lock on commit */
> +	if (cur_val & BIT(8))

define bit(s). same comment for the rest of the patch.

DJ

> +		return size;
> +
> +	hdm_decoder_global_cap = le32_to_cpu(*(u32 *)hdm_reg_virt(cxl, 0));
> +
> +	/* RO and reserved bits in the spec */
> +	ro_mask = BIT(10) | BIT(11);
> +	rev_mask = BIT(15) | GENMASK(31, 28);
> +
> +	/* bits are not valid for devices */
> +	ro_mask |= BIT(12);
> +	rev_mask |= GENMASK(19, 16) | GENMASK(23, 20);
> +
> +	/* bits are reserved when UIO is not supported */
> +	if (!(hdm_decoder_global_cap & BIT(13)))
> +		rev_mask |= BIT(14) | GENMASK(27, 24);
> +
> +	/* clear reserved bits */
> +	new_val &= ~rev_mask;
> +
> +	/* keep the RO bits */
> +	cur_val &= ro_mask;
> +	new_val &= ~ro_mask;
> +	new_val |= cur_val;
> +
> +	/* emulate HDM decoder commit/de-commit */
> +	if (new_val & BIT(9))
> +		new_val |= BIT(10);
> +	else
> +		new_val &= ~BIT(10);
> +
> +	new_val = cpu_to_le32(new_val);
> +	memcpy(hdm_reg_virt(cxl, offset), &new_val, size);
> +	return size;
> +}
> +
> +static int setup_mmio_emulation(struct vfio_cxl_core_device *cxl)
> +{
> +	u64 offset, base;
> +	int ret;
> +
> +	base = hdm_reg_base(cxl);
> +
> +#define ALLOC_BLOCK(offset, size, read, write) do {			\
> +		ret = new_mmio_block(cxl, offset, size, read, write);	\
> +		if (ret)						\
> +			return ret;					\
> +	} while (0)
> +
> +	ALLOC_BLOCK(base + 0x4, 4,
> +		    virt_hdm_reg_read,
> +		    hdm_decoder_global_ctrl_write);
> +
> +	offset = base + 0x10;
> +	while (offset < base + cxl->hdm_reg_size) {
> +		/* HDM N BASE LOW */
> +		ALLOC_BLOCK(offset, 4,
> +			    virt_hdm_reg_read,
> +			    hdm_decoder_n_lo_write);
> +
> +		/* HDM N BASE HIGH */
> +		ALLOC_BLOCK(offset + 0x4, 4,
> +			    virt_hdm_reg_read,
> +			    virt_hdm_reg_write);
> +
> +		/* HDM N SIZE LOW */
> +		ALLOC_BLOCK(offset + 0x8, 4,
> +			    virt_hdm_reg_read,
> +			    hdm_decoder_n_lo_write);
> +
> +		/* HDM N SIZE HIGH */
> +		ALLOC_BLOCK(offset + 0xc, 4,
> +			    virt_hdm_reg_read,
> +			    virt_hdm_reg_write);
> +
> +		/* HDM N CONTROL */
> +		ALLOC_BLOCK(offset + 0x10, 4,
> +			    virt_hdm_reg_read,
> +			    hdm_decoder_n_ctrl_write);
> +
> +		/* HDM N TARGET LIST LOW */
> +		ALLOC_BLOCK(offset + 0x14, 0x4,
> +			    virt_hdm_reg_read,
> +			    virt_hdm_rev_reg_write);
> +
> +		/* HDM N TARGET LIST HIGH */
> +		ALLOC_BLOCK(offset + 0x18, 0x4,
> +			    virt_hdm_reg_read,
> +			    virt_hdm_rev_reg_write);
> +
> +		/* HDM N REV */
> +		ALLOC_BLOCK(offset + 0x1c, 0x4,
> +			    virt_hdm_reg_read,
> +			    virt_hdm_rev_reg_write);
> +
> +		offset += 0x20;
> +	}
> +
> +#undef ALLOC_BLOCK
> +	return 0;
> +}
> +
>  void vfio_cxl_core_clean_register_emulation(struct vfio_cxl_core_device *cxl)
>  {
>  	struct list_head *pos, *n;
> @@ -17,10 +250,19 @@ void vfio_cxl_core_clean_register_emulation(struct vfio_cxl_core_device *cxl)
>  
>  int vfio_cxl_core_setup_register_emulation(struct vfio_cxl_core_device *cxl)
>  {
> +	int ret;
> +
>  	INIT_LIST_HEAD(&cxl->config_regblocks_head);
>  	INIT_LIST_HEAD(&cxl->mmio_regblocks_head);
>  
> +	ret = setup_mmio_emulation(cxl);
> +	if (ret)
> +		goto err;
> +
>  	return 0;
> +err:
> +	vfio_cxl_core_clean_register_emulation(cxl);
> +	return ret;
>  }
>  
>  static struct vfio_emulated_regblock *
> diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
> index 12ded67c7db7..31fd28626846 100644
> --- a/include/linux/vfio_pci_core.h
> +++ b/include/linux/vfio_pci_core.h
> @@ -251,5 +251,7 @@ ssize_t vfio_cxl_core_write(struct vfio_device *core_vdev, const char __user *bu
>  			    size_t count, loff_t *ppos);
>  long vfio_cxl_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
>  			 unsigned long arg);
> +int vfio_cxl_core_setup_register_emulation(struct vfio_cxl_core_device *cxl);
> +void vfio_cxl_core_clean_register_emulation(struct vfio_cxl_core_device *cxl);
>  
>  #endif /* VFIO_PCI_CORE_H */


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ