[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20251017234048.GA344394@nvidia.com>
Date: Fri, 17 Oct 2025 20:40:48 -0300
From: Jason Gunthorpe <jgg@...dia.com>
To: Leon Romanovsky <leon@...nel.org>
Cc: Alex Williamson <alex.williamson@...hat.com>,
Leon Romanovsky <leonro@...dia.com>,
Andrew Morton <akpm@...ux-foundation.org>,
Bjorn Helgaas <bhelgaas@...gle.com>,
Christian König <christian.koenig@....com>,
dri-devel@...ts.freedesktop.org, iommu@...ts.linux.dev,
Jens Axboe <axboe@...nel.dk>, Joerg Roedel <joro@...tes.org>,
kvm@...r.kernel.org, linaro-mm-sig@...ts.linaro.org,
linux-block@...r.kernel.org, linux-kernel@...r.kernel.org,
linux-media@...r.kernel.org, linux-mm@...ck.org,
linux-pci@...r.kernel.org, Logan Gunthorpe <logang@...tatee.com>,
Marek Szyprowski <m.szyprowski@...sung.com>,
Robin Murphy <robin.murphy@....com>,
Sumit Semwal <sumit.semwal@...aro.org>,
Vivek Kasireddy <vivek.kasireddy@...el.com>,
Will Deacon <will@...nel.org>
Subject: Re: [PATCH v5 9/9] vfio/pci: Add dma-buf export support for MMIO
regions
On Mon, Oct 13, 2025 at 06:26:11PM +0300, Leon Romanovsky wrote:
> From: Leon Romanovsky <leonro@...dia.com>
>
> Add support for exporting PCI device MMIO regions through dma-buf,
> enabling safe sharing of non-struct page memory with controlled
> lifetime management. This allows RDMA and other subsystems to import
> dma-buf FDs and build them into memory regions for PCI P2P operations.
I was looking at how to address Alex's note about not all drivers
being compatible, and how to enable the non-compatible drivers.
It looks like the simplest thing is to make dma_ranges_to_p2p_phys
into an ops and have the driver provide it. If not provided the no
support.
Drivers with special needs can fill in phys in their own way and get
their own provider.
Sort of like this:
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index ac10f14417f2f3..6d41cf26b53994 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -147,6 +147,10 @@ static const struct vfio_device_ops vfio_pci_ops = {
.pasid_detach_ioas = vfio_iommufd_physical_pasid_detach_ioas,
};
+static const struct vfio_pci_device_ops vfio_pci_dev_ops = {
+ .get_dmabuf_phys = vfio_pci_core_get_dmabuf_phys,
+};
+
static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct vfio_pci_core_device *vdev;
@@ -161,6 +165,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return PTR_ERR(vdev);
dev_set_drvdata(&pdev->dev, vdev);
+ vdev->pci_ops = &vfio_pci_dev_ops;
ret = vfio_pci_core_register_device(vdev);
if (ret)
goto out_put_vdev;
diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c
index 358856e6b8a820..dad880781a9352 100644
--- a/drivers/vfio/pci/vfio_pci_dmabuf.c
+++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
@@ -309,47 +309,52 @@ int vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment,
}
EXPORT_SYMBOL_GPL(vfio_pci_dma_buf_iommufd_map);
-static int dma_ranges_to_p2p_phys(struct vfio_pci_dma_buf *priv,
- struct vfio_device_feature_dma_buf *dma_buf,
+int vfio_pci_core_get_dmabuf_phys(struct vfio_pci_core_device *vdev,
+ struct p2pdma_provider **provider,
+ unsigned int region_index,
+ struct phys_vec *phys_vec,
struct vfio_region_dma_range *dma_ranges,
- struct p2pdma_provider *provider)
+ size_t nr_ranges)
{
- struct pci_dev *pdev = priv->vdev->pdev;
- phys_addr_t len = pci_resource_len(pdev, dma_buf->region_index);
+ struct pci_dev *pdev = vdev->pdev;
+ phys_addr_t len = pci_resource_len(pdev, region_index);
phys_addr_t pci_start;
phys_addr_t pci_last;
u32 i;
if (!len)
return -EINVAL;
- pci_start = pci_resource_start(pdev, dma_buf->region_index);
+
+ *provider = pcim_p2pdma_provider(pdev, region_index);
+ if (!*provider)
+ return -EINVAL;
+
+ pci_start = pci_resource_start(pdev, region_index);
pci_last = pci_start + len - 1;
- for (i = 0; i < dma_buf->nr_ranges; i++) {
+ for (i = 0; i < nr_ranges; i++) {
phys_addr_t last;
if (!dma_ranges[i].length)
return -EINVAL;
if (check_add_overflow(pci_start, dma_ranges[i].offset,
- &priv->phys_vec[i].paddr) ||
- check_add_overflow(priv->phys_vec[i].paddr,
+ &phys_vec[i].paddr) ||
+ check_add_overflow(phys_vec[i].paddr,
dma_ranges[i].length - 1, &last))
return -EOVERFLOW;
if (last > pci_last)
return -EINVAL;
- priv->phys_vec[i].len = dma_ranges[i].length;
- priv->size += priv->phys_vec[i].len;
+ phys_vec[i].len = dma_ranges[i].length;
}
- priv->nr_ranges = dma_buf->nr_ranges;
- priv->provider = provider;
return 0;
}
+EXPORT_SYMBOL_GPL(vfio_pci_core_get_dmabuf_phys);
static int validate_dmabuf_input(struct vfio_pci_core_device *vdev,
struct vfio_device_feature_dma_buf *dma_buf,
struct vfio_region_dma_range *dma_ranges,
- struct p2pdma_provider **provider)
+ size_t *lengthp)
{
struct pci_dev *pdev = vdev->pdev;
u32 bar = dma_buf->region_index;
@@ -365,10 +370,6 @@ static int validate_dmabuf_input(struct vfio_pci_core_device *vdev,
if (bar >= VFIO_PCI_ROM_REGION_INDEX)
return -ENODEV;
- *provider = pcim_p2pdma_provider(pdev, bar);
- if (!*provider)
- return -EINVAL;
-
bar_size = pci_resource_len(pdev, bar);
for (i = 0; i < dma_buf->nr_ranges; i++) {
u64 offset = dma_ranges[i].offset;
@@ -397,6 +398,7 @@ static int validate_dmabuf_input(struct vfio_pci_core_device *vdev,
if (overflows_type(length, size_t) || length & DMA_IOVA_USE_SWIOTLB)
return -EINVAL;
+ *lengthp = length;
return 0;
}
@@ -407,10 +409,13 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
struct vfio_device_feature_dma_buf get_dma_buf = {};
struct vfio_region_dma_range *dma_ranges;
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
- struct p2pdma_provider *provider;
struct vfio_pci_dma_buf *priv;
+ size_t length;
int ret;
+ if (!vdev->pci_ops->get_dmabuf_phys)
+ return -EOPNOTSUPP;
+
ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
sizeof(get_dma_buf));
if (ret != 1)
@@ -427,7 +432,7 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
if (IS_ERR(dma_ranges))
return PTR_ERR(dma_ranges);
- ret = validate_dmabuf_input(vdev, &get_dma_buf, dma_ranges, &provider);
+ ret = validate_dmabuf_input(vdev, &get_dma_buf, dma_ranges, &length);
if (ret)
goto err_free_ranges;
@@ -444,10 +449,16 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
}
priv->vdev = vdev;
- ret = dma_ranges_to_p2p_phys(priv, &get_dma_buf, dma_ranges, provider);
+ priv->nr_ranges = get_dma_buf.nr_ranges;
+ priv->size = length;
+ ret = vdev->pci_ops->get_dmabuf_phys(vdev, &priv->provider,
+ get_dma_buf.region_index,
+ priv->phys_vec, dma_ranges,
+ priv->nr_ranges);
if (ret)
goto err_free_phys;
+
kfree(dma_ranges);
dma_ranges = NULL;
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 37ce02e30c7632..4ea2095381eb24 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -26,6 +26,7 @@
struct vfio_pci_core_device;
struct vfio_pci_region;
+struct p2pdma_provider;
struct vfio_pci_regops {
ssize_t (*rw)(struct vfio_pci_core_device *vdev, char __user *buf,
@@ -49,9 +50,26 @@ struct vfio_pci_region {
u32 flags;
};
+struct vfio_pci_device_ops {
+ int (*get_dmabuf_phys)(struct vfio_pci_core_device *vdev,
+ struct p2pdma_provider **provider,
+ unsigned int region_index,
+ struct phys_vec *phys_vec,
+ struct vfio_region_dma_range *dma_ranges,
+ size_t nr_ranges);
+};
+
+int vfio_pci_core_get_dmabuf_phys(struct vfio_pci_core_device *vdev,
+ struct p2pdma_provider **provider,
+ unsigned int region_index,
+ struct phys_vec *phys_vec,
+ struct vfio_region_dma_range *dma_ranges,
+ size_t nr_ranges);
+
struct vfio_pci_core_device {
struct vfio_device vdev;
struct pci_dev *pdev;
+ const struct vfio_pci_device_ops *pci_ops;
void __iomem *barmap[PCI_STD_NUM_BARS];
bool bar_mmap_supported[PCI_STD_NUM_BARS];
u8 *pci_config_map;
Powered by blists - more mailing lists