lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9f2d5327-6d23-4d4a-aede-f6161a59f086@intel.com>
Date: Thu, 11 Dec 2025 09:06:12 -0700
From: Dave Jiang <dave.jiang@...el.com>
To: mhonap@...dia.com, aniketa@...dia.com, ankita@...dia.com,
 alwilliamson@...dia.com, vsethi@...dia.com, jgg@...dia.com,
 mochs@...dia.com, skolothumtho@...dia.com, alejandro.lucero-palau@....com,
 dave@...olabs.net, jonathan.cameron@...wei.com, alison.schofield@...el.com,
 vishal.l.verma@...el.com, ira.weiny@...el.com, dan.j.williams@...el.com,
 jgg@...pe.ca, yishaih@...dia.com, kevin.tian@...el.com
Cc: cjia@...dia.com, kwankhede@...dia.com, targupta@...dia.com,
 zhiw@...dia.com, kjaju@...dia.com, linux-kernel@...r.kernel.org,
 linux-cxl@...r.kernel.org, kvm@...r.kernel.org
Subject: Re: [RFC v2 07/15] vfio/cxl: expose CXL region to the userspace via a
 new VFIO device region



On 12/9/25 9:50 AM, mhonap@...dia.com wrote:
> From: Manish Honap <mhonap@...dia.com>
> 
> To directly access the device memory, a CXL region is required. Creating
> a CXL region requires to configure HDM decoders on the path to map the
> access of HPA level by level and evetually hit the DPA in the CXL
> topology.
> 
> For the userspace, e.g. QEMU, to access the CXL region, the region is
> required to be exposed via VFIO interfaces.
> 
> Introduce a new VFIO device region and region ops to expose the created
> CXL region when initialize the device in the vfio-cxl-core. Introduce a
> new sub region type for the userspace to identify a CXL region.
> 
> Co-developed-by: Zhi Wang <zhiw@...dia.com>
> Signed-off-by: Zhi Wang <zhiw@...dia.com>
> Signed-off-by: Manish Honap <mhonap@...dia.com>
> ---
>  drivers/vfio/pci/vfio_cxl_core.c | 122 +++++++++++++++++++++++++++++++
>  drivers/vfio/pci/vfio_pci_core.c |   3 +-
>  include/linux/vfio_pci_core.h    |   5 ++
>  include/uapi/linux/vfio.h        |   4 +
>  4 files changed, 133 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/vfio/pci/vfio_cxl_core.c b/drivers/vfio/pci/vfio_cxl_core.c
> index cf53720c0cb7..35d95de47fa8 100644
> --- a/drivers/vfio/pci/vfio_cxl_core.c
> +++ b/drivers/vfio/pci/vfio_cxl_core.c
> @@ -231,6 +231,128 @@ void vfio_cxl_core_destroy_cxl_region(struct vfio_cxl_core_device *cxl)
>  }
>  EXPORT_SYMBOL_GPL(vfio_cxl_core_destroy_cxl_region);
>  
> +static int vfio_cxl_region_mmap(struct vfio_pci_core_device *pci,
> +				struct vfio_pci_region *region,
> +				struct vm_area_struct *vma)
> +{
> +	struct vfio_cxl_region *cxl_region = region->data;
> +	u64 req_len, pgoff, req_start, end;
> +	int ret;
> +
> +	if (!(region->flags & VFIO_REGION_INFO_FLAG_MMAP))
> +		return -EINVAL;
> +
> +	if (!(region->flags & VFIO_REGION_INFO_FLAG_READ) &&
> +	    (vma->vm_flags & VM_READ))
> +		return -EPERM;
> +
> +	if (!(region->flags & VFIO_REGION_INFO_FLAG_WRITE) &&
> +	    (vma->vm_flags & VM_WRITE))
> +		return -EPERM;
> +
> +	pgoff = vma->vm_pgoff &
> +		((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
> +
> +	if (check_sub_overflow(vma->vm_end, vma->vm_start, &req_len) ||
> +	    check_add_overflow(PHYS_PFN(cxl_region->addr), pgoff, &req_start) ||
> +	    check_add_overflow(PFN_PHYS(pgoff), req_len, &end))
> +		return -EOVERFLOW;
> +
> +	if (end > cxl_region->size)
> +		return -EINVAL;
> +
> +	if (cxl_region->noncached)
> +		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
> +	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
> +
> +	vm_flags_set(vma, VM_ALLOW_ANY_UNCACHED | VM_IO | VM_PFNMAP |
> +		     VM_DONTEXPAND | VM_DONTDUMP);
> +
> +	ret = remap_pfn_range(vma, vma->vm_start, req_start,
> +			      req_len, vma->vm_page_prot);
> +	if (ret)
> +		return ret;
> +
> +	vma->vm_pgoff = req_start;
> +
> +	return 0;
> +}
> +
> +static ssize_t vfio_cxl_region_rw(struct vfio_pci_core_device *core_dev,
> +				  char __user *buf, size_t count, loff_t *ppos,
> +				  bool iswrite)
> +{
> +	unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
> +	struct vfio_cxl_region *cxl_region = core_dev->region[i].data;
> +	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
> +
> +	if (!count)
> +		return 0;
> +
> +	return vfio_pci_core_do_io_rw(core_dev, false,
> +				      cxl_region->vaddr,
> +				      (char __user *)buf, pos, count,
> +				      0, 0, iswrite);
> +}
> +
> +static void vfio_cxl_region_release(struct vfio_pci_core_device *vdev,
> +				    struct vfio_pci_region *region)
> +{
> +}
> +
> +static const struct vfio_pci_regops vfio_cxl_regops = {
> +	.rw             = vfio_cxl_region_rw,
> +	.mmap           = vfio_cxl_region_mmap,
> +	.release        = vfio_cxl_region_release,
> +};
> +
> +int vfio_cxl_core_register_cxl_region(struct vfio_cxl_core_device *cxl)
> +{
> +	struct vfio_pci_core_device *pci = &cxl->pci_core;
> +	struct vfio_cxl *cxl_core = cxl->cxl_core;
> +	u32 flags;
> +	int ret;
> +
> +	if (WARN_ON(!cxl_core->region.region || cxl_core->region.vaddr))
> +		return -EEXIST;
> +
> +	cxl_core->region.vaddr = ioremap(cxl_core->region.addr, cxl_core->region.size);
> +	if (!cxl_core->region.addr)

I think you are wanting to check cxl_core->region.vaddr here right?

Also, what is the ioremap'd region for?

DJ

> +		return -EFAULT;
> +
> +	flags = VFIO_REGION_INFO_FLAG_READ |
> +		VFIO_REGION_INFO_FLAG_WRITE |
> +		VFIO_REGION_INFO_FLAG_MMAP;
> +
> +	ret = vfio_pci_core_register_dev_region(pci,
> +						PCI_VENDOR_ID_CXL |
> +						VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
> +						VFIO_REGION_SUBTYPE_CXL,
> +						&vfio_cxl_regops,
> +						cxl_core->region.size, flags,
> +						&cxl_core->region);
> +	if (ret) {
> +		iounmap(cxl_core->region.vaddr);
> +		cxl_core->region.vaddr = NULL;
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(vfio_cxl_core_register_cxl_region);
> +
> +void vfio_cxl_core_unregister_cxl_region(struct vfio_cxl_core_device *cxl)
> +{
> +	struct vfio_cxl *cxl_core = cxl->cxl_core;
> +
> +	if (WARN_ON(!cxl_core->region.region || !cxl_core->region.vaddr))
> +		return;
> +
> +	iounmap(cxl_core->region.vaddr);
> +	cxl_core->region.vaddr = NULL;
> +}
> +EXPORT_SYMBOL_GPL(vfio_cxl_core_unregister_cxl_region);
> +
>  MODULE_LICENSE("GPL");
>  MODULE_AUTHOR(DRIVER_AUTHOR);
>  MODULE_DESCRIPTION(DRIVER_DESC);
> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> index 7dcf5439dedc..c0695b5db66d 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -1698,12 +1698,13 @@ static vm_fault_t vfio_pci_mmap_page_fault(struct vm_fault *vmf)
>  	return vfio_pci_mmap_huge_fault(vmf, 0);
>  }
>  
> -static const struct vm_operations_struct vfio_pci_mmap_ops = {
> +const struct vm_operations_struct vfio_pci_mmap_ops = {
>  	.fault = vfio_pci_mmap_page_fault,
>  #ifdef CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP
>  	.huge_fault = vfio_pci_mmap_huge_fault,
>  #endif
>  };
> +EXPORT_SYMBOL_GPL(vfio_pci_mmap_ops);
>  
>  int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma)
>  {
> diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
> index a343b91d2580..3474835f5d65 100644
> --- a/include/linux/vfio_pci_core.h
> +++ b/include/linux/vfio_pci_core.h
> @@ -102,6 +102,7 @@ struct vfio_cxl_region {
>  	struct cxl_region *region;
>  	u64 size;
>  	u64 addr;
> +	void *vaddr;
>  	bool noncached;
>  };
>  
> @@ -203,6 +204,8 @@ vfio_pci_core_to_cxl(struct vfio_pci_core_device *pci)
>  	return container_of(pci, struct vfio_cxl_core_device, pci_core);
>  }
>  
> +extern const struct vm_operations_struct vfio_pci_mmap_ops;
> +
>  int vfio_cxl_core_enable(struct vfio_cxl_core_device *cxl,
>  			 struct vfio_cxl_dev_info *info);
>  void vfio_cxl_core_finish_enable(struct vfio_cxl_core_device *cxl);
> @@ -210,5 +213,7 @@ void vfio_cxl_core_disable(struct vfio_cxl_core_device *cxl);
>  void vfio_cxl_core_close_device(struct vfio_device *vdev);
>  int vfio_cxl_core_create_cxl_region(struct vfio_cxl_core_device *cxl, u64 size);
>  void vfio_cxl_core_destroy_cxl_region(struct vfio_cxl_core_device *cxl);
> +int vfio_cxl_core_register_cxl_region(struct vfio_cxl_core_device *cxl);
> +void vfio_cxl_core_unregister_cxl_region(struct vfio_cxl_core_device *cxl);
>  
>  #endif /* VFIO_PCI_CORE_H */
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 75100bf009ba..95be987d2ed5 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -372,6 +372,10 @@ struct vfio_region_info_cap_type {
>  /* sub-types for VFIO_REGION_TYPE_GFX */
>  #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
>  
> +/* 1e98 vendor PCI sub-types */
> +/* sub-type for VFIO CXL region */
> +#define VFIO_REGION_SUBTYPE_CXL                 (1)
> +
>  /**
>   * struct vfio_region_gfx_edid - EDID region layout.
>   *


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ