[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aRJotYNoW5vwb5gZ@devgpu015.cco6.facebook.com>
Date: Mon, 10 Nov 2025 14:35:33 -0800
From: Alex Mastro <amastro@...com>
To: Alex Williamson <alex@...zbot.org>
CC: David Matlack <dmatlack@...gle.com>, Shuah Khan <shuah@...nel.org>,
<kvm@...r.kernel.org>, <linux-kselftest@...r.kernel.org>,
<linux-kernel@...r.kernel.org>, Jason Gunthorpe <jgg@...pe.ca>
Subject: Re: [PATCH 1/4] vfio: selftests: add iova range query helpers
On Mon, Nov 10, 2025 at 02:31:53PM -0700, Alex Williamson wrote:
> On Mon, 10 Nov 2025 13:10:41 -0800
> Alex Mastro <amastro@...com> wrote:
>
> > VFIO selftests need to map IOVAs from legally accessible ranges, which
> > could vary between hardware. Tests in vfio_dma_mapping_test.c are making
> > excessively strong assumptions about which IOVAs can be mapped.
> >
> > Add vfio_iommu_iova_ranges(), which queries IOVA ranges from the
> > IOMMUFD or VFIO container associated with the device. The queried ranges
> > are normalized to IOMMUFD's iommu_iova_range representation so that
> > handling of IOVA ranges up the stack can be implementation-agnostic.
> > iommu_iova_range and vfio_iova_range are equivalent, so bias to using the
> > new interface's struct.
> >
> > Query IOMMUFD's ranges with IOMMU_IOAS_IOVA_RANGES.
> > Query VFIO container's ranges with VFIO_IOMMU_GET_INFO and
> > VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE.
> >
> > The underlying vfio_iommu_type1_info buffer-related functionality has
> > been kept generic so the same helpers can be used to query other
> > capability chain information, if needed.
> >
> > Signed-off-by: Alex Mastro <amastro@...com>
> > ---
> > .../testing/selftests/vfio/lib/include/vfio_util.h | 8 +-
> > tools/testing/selftests/vfio/lib/vfio_pci_device.c | 161 +++++++++++++++++++++
> > 2 files changed, 168 insertions(+), 1 deletion(-)
> >
> > diff --git a/tools/testing/selftests/vfio/lib/include/vfio_util.h b/tools/testing/selftests/vfio/lib/include/vfio_util.h
> > index 240409bf5f8a..fb5efec52316 100644
> > --- a/tools/testing/selftests/vfio/lib/include/vfio_util.h
> > +++ b/tools/testing/selftests/vfio/lib/include/vfio_util.h
> > @@ -4,9 +4,12 @@
> >
> > #include <fcntl.h>
> > #include <string.h>
> > -#include <linux/vfio.h>
> > +
> > +#include <uapi/linux/types.h>
> > +#include <linux/iommufd.h>
> > #include <linux/list.h>
> > #include <linux/pci_regs.h>
> > +#include <linux/vfio.h>
> >
> > #include "../../../kselftest.h"
> >
> > @@ -206,6 +209,9 @@ struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_
> > void vfio_pci_device_cleanup(struct vfio_pci_device *device);
> > void vfio_pci_device_reset(struct vfio_pci_device *device);
> >
> > +struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device,
> > + size_t *nranges);
> > +
> > int __vfio_pci_dma_map(struct vfio_pci_device *device,
> > struct vfio_dma_region *region);
> > int __vfio_pci_dma_unmap(struct vfio_pci_device *device,
> > diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
> > index a381fd253aa7..6bedbe65f0a1 100644
> > --- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c
> > +++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
> > @@ -29,6 +29,167 @@
> > VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \
> > } while (0)
> >
> > +static struct vfio_info_cap_header *next_cap_hdr(void *buf, size_t bufsz,
> > + size_t *cap_offset)
> > +{
> > + struct vfio_info_cap_header *hdr;
> > +
> > + if (!*cap_offset)
> > + return NULL;
> > +
> > + /* Cap offset must be in bounds */
> > + VFIO_ASSERT_LT(*cap_offset, bufsz);
> > + /* There must be enough remaining space to contain the header */
> > + VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr));
> > +
> > + hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset);
> > +
> > + /* If there is a next, offset must increase by at least the header size */
> > + if (hdr->next) {
> > + VFIO_ASSERT_GT(hdr->next, *cap_offset);
> > + VFIO_ASSERT_GE(hdr->next - *cap_offset, sizeof(*hdr));
> > + }
> > +
> > + *cap_offset = hdr->next;
> > +
> > + return hdr;
> > +}
> > +
> > +static struct vfio_info_cap_header *vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info *buf,
> > + u16 cap_id)
> > +{
> > + struct vfio_info_cap_header *hdr;
> > + size_t cap_offset = buf->cap_offset;
> > +
> > + if (!(buf->flags & VFIO_IOMMU_INFO_CAPS))
> > + return NULL;
> > +
> > + if (cap_offset)
> > + VFIO_ASSERT_GE(cap_offset, sizeof(struct vfio_iommu_type1_info));
> > +
> > + while ((hdr = next_cap_hdr(buf, buf->argsz, &cap_offset))) {
> > + if (hdr->id == cap_id)
> > + return hdr;
> > + }
> > +
> > + return NULL;
> > +}
> > +
> > +/* Return buffer including capability chain, if present. Free with free() */
> > +static struct vfio_iommu_type1_info *vfio_iommu_info_buf(struct vfio_pci_device *device)
> > +{
> > + struct vfio_iommu_type1_info *buf;
> > +
> > + buf = malloc(sizeof(*buf));
> > + VFIO_ASSERT_NOT_NULL(buf);
> > +
> > + *buf = (struct vfio_iommu_type1_info) {
> > + .argsz = sizeof(*buf),
> > + };
> > +
> > + ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, buf);
> > +
> > + buf = realloc(buf, buf->argsz);
> > + VFIO_ASSERT_NOT_NULL(buf);
> > +
> > + ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, buf);
> > +
> > + return buf;
> > +}
> > +
> > +/*
> > + * Return iova ranges for the device's container. Normalize vfio_iommu_type1 to
> > + * report iommufd's iommu_iova_range. Free with free().
> > + */
> > +static struct iommu_iova_range *vfio_iommu_iova_ranges(struct vfio_pci_device *device,
> > + size_t *nranges)
> > +{
> > + struct vfio_iommu_type1_info_cap_iova_range *cap_range;
> > + struct vfio_iommu_type1_info *buf;
> > + struct vfio_info_cap_header *hdr;
> > + struct iommu_iova_range *ranges = NULL;
> > +
> > + buf = vfio_iommu_info_buf(device);
> > + VFIO_ASSERT_NOT_NULL(buf);
> > +
> > + hdr = vfio_iommu_info_cap_hdr(buf, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
> > + if (!hdr)
> > + goto free_buf;
> > +
> > + cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header);
> > + if (!cap_range->nr_iovas)
> > + goto free_buf;
> > +
> > + ranges = malloc(cap_range->nr_iovas * sizeof(*ranges));
>
>
> Natural calloc() use case.
Ack.
>
> > + VFIO_ASSERT_NOT_NULL(ranges);
> > +
> > + for (u32 i = 0; i < cap_range->nr_iovas; i++) {
> > + ranges[i] = (struct iommu_iova_range){
> > + .start = cap_range->iova_ranges[i].start,
> > + .last = cap_range->iova_ranges[i].end,
> > + };
> > + }
> > +
> > + *nranges = cap_range->nr_iovas;
> > +
> > +free_buf:
> > + free(buf);
> > + return ranges;
> > +}
> > +
> > +/* Return iova ranges of the device's IOAS. Free with free() */
> > +struct iommu_iova_range *iommufd_iova_ranges(struct vfio_pci_device *device,
> > + size_t *nranges)
> > +{
> > + struct iommu_iova_range *ranges;
> > + int ret;
> > +
> > + struct iommu_ioas_iova_ranges query = {
> > + .size = sizeof(query),
> > + .ioas_id = device->ioas_id,
> > + };
> > +
> > + ret = ioctl(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
> > + VFIO_ASSERT_EQ(ret, -1);
> > + VFIO_ASSERT_EQ(errno, EMSGSIZE);
> > + VFIO_ASSERT_GT(query.num_iovas, 0);
> > +
> > + ranges = malloc(query.num_iovas * sizeof(*ranges));
>
> Same.
Ack.
>
> > + VFIO_ASSERT_NOT_NULL(ranges);
> > +
> > + query.allowed_iovas = (uintptr_t)ranges;
> > +
> > + ioctl_assert(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
> > + *nranges = query.num_iovas;
> > +
> > + return ranges;
> > +}
> > +
> > +struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device,
> > + size_t *nranges)
> > +{
> > + struct iommu_iova_range *ranges;
> > +
> > + if (device->iommufd)
> > + ranges = iommufd_iova_ranges(device, nranges);
> > + else
> > + ranges = vfio_iommu_iova_ranges(device, nranges);
> > +
> > + if (!ranges)
> > + return NULL;
> > +
> > + /* ranges should be valid, ascending, and non-overlapping */
>
> I don't recall that ranges are required to be in any particular order.
Yes, this is assuming more than the UAPI guarantees. I'll update this to
sort what the kernel vends so that we can preserve the sanity checks.
> Thanks,
>
> Alex
>
> > + VFIO_ASSERT_GT(*nranges, 0);
> > + VFIO_ASSERT_LT(ranges[0].start, ranges[0].last);
> > +
> > + for (size_t i = 1; i < *nranges; i++) {
> > + VFIO_ASSERT_LT(ranges[i].start, ranges[i].last);
> > + VFIO_ASSERT_LT(ranges[i - 1].last, ranges[i].start);
> > + }
> > +
> > + return ranges;
> > +}
> > +
> > iova_t __to_iova(struct vfio_pci_device *device, void *vaddr)
> > {
> > struct vfio_dma_region *region;
> >
>
Powered by blists - more mailing lists