lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 7 Dec 2017 13:55:33 +0100
From:   Pierre Morel <pmorel@...ux.vnet.ibm.com>
To:     Shameerali Kolothum Thodi <shameerali.kolothum.thodi@...wei.com>,
        "alex.williamson@...hat.com" <alex.williamson@...hat.com>,
        "eric.auger@...hat.com" <eric.auger@...hat.com>
Cc:     "kvm@...r.kernel.org" <kvm@...r.kernel.org>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        Linuxarm <linuxarm@...wei.com>
Subject: Re: [RFC] vfio/type1: Add IOVA_RANGE capability support

On 06/12/2017 17:15, Shameerali Kolothum Thodi wrote:
> Hi Pierre,
> 
>> -----Original Message-----
>> From: Shameerali Kolothum Thodi
>> Sent: Wednesday, December 06, 2017 4:08 PM
>> To: alex.williamson@...hat.com; eric.auger@...hat.com;
>> pmorel@...ux.vnet.ibm.com
>> Cc: kvm@...r.kernel.org; linux-kernel@...r.kernel.org; Linuxarm
>> <linuxarm@...wei.com>; Shameerali Kolothum Thodi
>> <shameerali.kolothum.thodi@...wei.com>
>> Subject: [RFC] vfio/type1: Add IOVA_RANGE capability support
>>
>> This patch allows the user-space to retrieve the supported
>> IOVA range(s), excluding any reserved regions. The implementation
>> is based on capability chains, added to the VFIO_IOMMU_GET_INFO ioctl.
>>
>> This is following the discussions here[1] and is based on the RFC patch[2].
>>
>> ToDo:
>>   - This currently derives the default supported iova range from the first
>>     iommu domain. This needs to be changed to go through the domain_list
>>     instead.
>>   - Sync with Pierre's patch[3].
> 
> Thanks to Eric[1], came to know that you have posted a patch to retrieve the
> iommu aperture info. This RFC does a similar thing but try to take care of
> any reserved regions and adds to the capability chain.
> 
> Please take a look and if there is a possibility to sync up your next revision
> and this, please let me know.

Hi Shameer,

Indeed it is close to what I was developing.

I have a single concern, the aperture is strongly hardware related while 
reserved areas are much more flexible.

As a consequence, I think it would be better if they were handled in 
separate capabilities and let the user space decide if it wants to know 
about one or the other.

For my immediate needs, this patch would be OK since we (s390x) do not 
use reserved regions.

@Alex: what do you prefer
If we need two capabilities, I will send the patch serie I made on the 
aperture capability for VFIO IOMMU.
If not I will use Shameer's patch.

Thanks,

Best regards,

Pierre

> 
> Thanks,
> Shameer
> 
> 1. https://patchwork.kernel.org/patch/10056967/
> 
>> 1.https://lists.gnu.org/archive/html/qemu-devel/2017-11/msg03651.html
>> 2.https://lists.linuxfoundation.org/pipermail/iommu/2016-
>> November/019002.html
>> 3.https://patchwork.kernel.org/patch/10084655/
>>
>> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@...wei.com>
>> ---
>>   drivers/vfio/vfio_iommu_type1.c | 172
>> +++++++++++++++++++++++++++++++++++++++-
>>   include/uapi/linux/vfio.h       |  13 +++
>>   2 files changed, 184 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
>> index e30e29a..72ca78a 100644
>> --- a/drivers/vfio/vfio_iommu_type1.c
>> +++ b/drivers/vfio/vfio_iommu_type1.c
>> @@ -28,6 +28,7 @@
>>   #include <linux/device.h>
>>   #include <linux/fs.h>
>>   #include <linux/iommu.h>
>> +#include <linux/list_sort.h>
>>   #include <linux/module.h>
>>   #include <linux/mm.h>
>>   #include <linux/rbtree.h>
>> @@ -92,6 +93,12 @@ struct vfio_group {
>>   	struct list_head	next;
>>   };
>>
>> +struct vfio_iommu_iova {
>> +	struct list_head	list;
>> +	phys_addr_t		start;
>> +	phys_addr_t		end;
>> +};
>> +
>>   /*
>>    * Guest RAM pinning working set or DMA target
>>    */
>> @@ -1537,6 +1544,144 @@ static int vfio_domains_have_iommu_cache(struct
>> vfio_iommu *iommu)
>>   	return ret;
>>   }
>>
>> +static int vfio_add_iova_cap(struct vfio_info_cap *caps, u64 start, u64 end)
>> +{
>> +	struct vfio_iommu_type1_info_cap_iova_range *cap;
>> +	struct vfio_info_cap_header *header;
>> +
>> +	header = vfio_info_cap_add(caps, sizeof(*cap),
>> +			VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE, 1);
>> +	if (IS_ERR(header))
>> +		return PTR_ERR(header);
>> +
>> +	cap = container_of(header,
>> +			   struct vfio_iommu_type1_info_cap_iova_range,
>> +			   header);
>> +
>> +	cap->start = start;
>> +	cap->end = end;
>> +
>> +	return 0;
>> +}
>> +
>> +static int vfio_insert_iova(phys_addr_t start, phys_addr_t end,
>> +				struct list_head *head)
>> +{
>> +	struct vfio_iommu_iova *region;
>> +
>> +	region = kzalloc(sizeof(*region), GFP_KERNEL);
>> +	if (!region)
>> +		return -ENOMEM;
>> +
>> +	INIT_LIST_HEAD(&region->list);
>> +	region->start = start;
>> +	region->end = end;
>> +
>> +	list_add_tail(&region->list, head);
>> +	return 0;
>> +}
>> +
>> +/*
>> + * Check and update iova region list in case a reserved region
>> + * overlaps the iommu iova range.
>> + */
>> +static int vfio_update_iommu_iova_range(phys_addr_t start, phys_addr_t end,
>> +					struct list_head *iova)
>> +{
>> +	struct vfio_iommu_iova *node;
>> +	phys_addr_t a, b;
>> +	int ret = 0;
>> +
>> +	if (list_empty(iova))
>> +		return -ENODEV;
>> +
>> +	node = list_last_entry(iova, struct vfio_iommu_iova, list);
>> +	a = node->start;
>> +	b = node->end;
>> +
>> +	/* No overlap */
>> +	if ((start > b) || (end < a))
>> +		return 0;
>> +
>> +	if (start > a)
>> +		ret = vfio_insert_iova(a, start - 1, &node->list);
>> +	if (ret)
>> +		goto done;
>> +	if (end < b)
>> +		ret = vfio_insert_iova(end + 1, b, &node->list);
>> +
>> +done:
>> +	list_del(&node->list);
>> +	kfree(node);
>> +
>> +	return ret;
>> +}
>> +
>> +static int vfio_resv_cmp(void *priv, struct list_head *a, struct list_head *b)
>> +{
>> +	struct iommu_resv_region *ra, *rb;
>> +
>> +	ra = container_of(a, struct iommu_resv_region, list);
>> +	rb = container_of(b, struct iommu_resv_region, list);
>> +
>> +	if (ra->start < rb->start)
>> +		return -1;
>> +	if (ra->start > rb->start)
>> +		return 1;
>> +	return 0;
>> +}
>> +
>> +static int vfio_build_iommu_iova_caps(struct vfio_iommu *iommu,
>> +				struct vfio_info_cap *caps)
>> +{
>> +	struct iommu_resv_region *resv, *resv_next;
>> +	struct vfio_iommu_iova *iova, *iova_next;
>> +	struct list_head group_resv_regions, vfio_iova_regions;
>> +	struct vfio_domain *domain;
>> +	struct vfio_group *g;
>> +	phys_addr_t start, end;
>> +	int ret = 0;
>> +
>> +	domain = list_first_entry(&iommu->domain_list,
>> +				  struct vfio_domain, next);
>> +	/* Get the default iova range supported */
>> +	start = domain->domain->geometry.aperture_start;
>> +	end = domain->domain->geometry.aperture_end;
>> +	INIT_LIST_HEAD(&vfio_iova_regions);
>> +	vfio_insert_iova(start, end, &vfio_iova_regions);
>> +
>> +	/* Get reserved regions if any */
>> +	INIT_LIST_HEAD(&group_resv_regions);
>> +	list_for_each_entry(g, &domain->group_list, next)
>> +		iommu_get_group_resv_regions(g->iommu_group,
>> +						&group_resv_regions);
>> +	list_sort(NULL, &group_resv_regions, vfio_resv_cmp);
>> +
>> +	/* Update iova range excluding reserved regions */
>> +	list_for_each_entry(resv, &group_resv_regions, list) {
>> +		ret = vfio_update_iommu_iova_range(resv->start,
>> +				resv->start + resv->length - 1,
>> +				&vfio_iova_regions);
>> +		if (ret)
>> +			goto done;
>> +	}
>> +
>> +	list_for_each_entry(iova, &vfio_iova_regions, list) {
>> +		ret = vfio_add_iova_cap(caps, iova->start, iova->end);
>> +		if (ret)
>> +			goto done;
>> +	}
>> +
>> +done:
>> +	list_for_each_entry_safe(resv, resv_next, &group_resv_regions, list)
>> +		kfree(resv);
>> +
>> +	list_for_each_entry_safe(iova, iova_next, &vfio_iova_regions, list)
>> +		kfree(iova);
>> +
>> +	return ret;
>> +}
>> +
>>   static long vfio_iommu_type1_ioctl(void *iommu_data,
>>   				   unsigned int cmd, unsigned long arg)
>>   {
>> @@ -1558,8 +1703,10 @@ static long vfio_iommu_type1_ioctl(void
>> *iommu_data,
>>   		}
>>   	} else if (cmd == VFIO_IOMMU_GET_INFO) {
>>   		struct vfio_iommu_type1_info info;
>> +		struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
>> +		int ret;
>>
>> -		minsz = offsetofend(struct vfio_iommu_type1_info,
>> iova_pgsizes);
>> +		minsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);
>>
>>   		if (copy_from_user(&info, (void __user *)arg, minsz))
>>   			return -EFAULT;
>> @@ -1571,6 +1718,29 @@ static long vfio_iommu_type1_ioctl(void
>> *iommu_data,
>>
>>   		info.iova_pgsizes = vfio_pgsize_bitmap(iommu);
>>
>> +		ret = vfio_build_iommu_iova_caps(iommu, &caps);
>> +		if (ret)
>> +			return ret;
>> +
>> +		if (caps.size) {
>> +			info.flags |= VFIO_IOMMU_INFO_CAPS;
>> +			if (info.argsz < sizeof(info) + caps.size) {
>> +				info.argsz = sizeof(info) + caps.size;
>> +				info.cap_offset = 0;
>> +			} else {
>> +				vfio_info_cap_shift(&caps, sizeof(info));
>> +				if (copy_to_user((void __user *)arg +
>> +						sizeof(info), caps.buf,
>> +						caps.size)) {
>> +					kfree(caps.buf);
>> +					return -EFAULT;
>> +				}
>> +				info.cap_offset = sizeof(info);
>> +			}
>> +
>> +			kfree(caps.buf);
>> +		}
>> +
>>   		return copy_to_user((void __user *)arg, &info, minsz) ?
>>   			-EFAULT : 0;
>>
>> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>> index e3301db..c4e338b 100644
>> --- a/include/uapi/linux/vfio.h
>> +++ b/include/uapi/linux/vfio.h
>> @@ -517,7 +517,20 @@ struct vfio_iommu_type1_info {
>>   	__u32	argsz;
>>   	__u32	flags;
>>   #define VFIO_IOMMU_INFO_PGSIZES (1 << 0)	/* supported page sizes info */
>> +#define VFIO_IOMMU_INFO_CAPS	(1 << 1)	/* Info supports caps */
>>   	__u64	iova_pgsizes;		/* Bitmap of supported page sizes */
>> +	__u32   cap_offset;	/* Offset within info struct of first cap */
>> +	__u32	__resv;
>> +};
>> +
>> +/*
>> + * The IOVA_RANGE capability allows to report the IOVA range(s),
>> + */
>> +#define VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE  1
>> +struct vfio_iommu_type1_info_cap_iova_range {
>> +	struct vfio_info_cap_header header;
>> +	__u64 start;
>> +	__u64 end;
>>   };
>>
>>   #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
>> --
>> 1.9.1
>>
> 


-- 
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ