[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <35701c5e-030a-4f52-b6f6-ed18368fb2cd@amd.com>
Date: Fri, 4 Oct 2024 14:32:28 +1000
From: Alexey Kardashevskiy <aik@....com>
To: Nicolin Chen <nicolinc@...dia.com>, jgg@...dia.com, kevin.tian@...el.com,
 will@...nel.org
Cc: joro@...tes.org, suravee.suthikulpanit@....com, robin.murphy@....com,
 dwmw2@...radead.org, baolu.lu@...ux.intel.com, shuah@...nel.org,
 linux-kernel@...r.kernel.org, iommu@...ts.linux.dev,
 linux-arm-kernel@...ts.infradead.org, linux-kselftest@...r.kernel.org,
 eric.auger@...hat.com, jean-philippe@...aro.org, mdf@...nel.org,
 mshavit@...gle.com, shameerali.kolothum.thodi@...wei.com,
 smostafa@...gle.com, yi.l.liu@...el.com
Subject: Re: [PATCH v2 06/19] iommufd/viommu: Add
 IOMMU_VIOMMU_SET/UNSET_VDEV_ID ioctl
On 28/8/24 02:59, Nicolin Chen wrote:
> Introduce a pair of new ioctls to set/unset a per-viommu virtual device id
> that should be linked to a physical device id via an idev pointer.
> 
> Continue the support IOMMU_VIOMMU_TYPE_DEFAULT for a core-managed viommu.
> Provide a lookup function for drivers to load device pointer by a virtual
> device id.
> 
> Add a rw_semaphore protection around the vdev_id list. Any future ioctl
> handlers that potentially access the list must grab the lock too.
> 
> Signed-off-by: Nicolin Chen <nicolinc@...dia.com>
> ---
>   drivers/iommu/iommufd/device.c          |  12 +++
>   drivers/iommu/iommufd/iommufd_private.h |  21 ++++
>   drivers/iommu/iommufd/main.c            |   6 ++
>   drivers/iommu/iommufd/viommu.c          | 121 ++++++++++++++++++++++++
>   include/uapi/linux/iommufd.h            |  40 ++++++++
>   5 files changed, 200 insertions(+)
> 
> diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
> index 5fd3dd420290..3ad759971b32 100644
> --- a/drivers/iommu/iommufd/device.c
> +++ b/drivers/iommu/iommufd/device.c
> @@ -136,6 +136,18 @@ void iommufd_device_destroy(struct iommufd_object *obj)
>   	struct iommufd_device *idev =
>   		container_of(obj, struct iommufd_device, obj);
>   
> +	/* Unlocked since there should be no race in a destroy() */
> +	if (idev->vdev_id) {
> +		struct iommufd_vdev_id *vdev_id = idev->vdev_id;
> +		struct iommufd_viommu *viommu = vdev_id->viommu;
> +		struct iommufd_vdev_id *old;
> +
> +		old = xa_cmpxchg(&viommu->vdev_ids, vdev_id->id, vdev_id, NULL,
> +				 GFP_KERNEL);
> +		WARN_ON(old != vdev_id);
> +		kfree(vdev_id);
> +		idev->vdev_id = NULL;
> +	}
>   	iommu_device_release_dma_owner(idev->dev);
>   	iommufd_put_group(idev->igroup);
>   	if (!iommufd_selftest_is_mock_dev(idev->dev))
> diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
> index 1f2a1c133b9a..2c6e168c5300 100644
> --- a/drivers/iommu/iommufd/iommufd_private.h
> +++ b/drivers/iommu/iommufd/iommufd_private.h
> @@ -416,6 +416,7 @@ struct iommufd_device {
>   	struct iommufd_object obj;
>   	struct iommufd_ctx *ictx;
>   	struct iommufd_group *igroup;
> +	struct iommufd_vdev_id *vdev_id;
>   	struct list_head group_item;
>   	/* always the physical device */
>   	struct device *dev;
> @@ -533,11 +534,31 @@ struct iommufd_viommu {
>   	struct iommufd_ctx *ictx;
>   	struct iommufd_hwpt_paging *hwpt;
>   
> +	/* The locking order is vdev_ids_rwsem -> igroup::lock */
> +	struct rw_semaphore vdev_ids_rwsem;
> +	struct xarray vdev_ids;
> +
>   	unsigned int type;
>   };
>   
> +struct iommufd_vdev_id {
> +	struct iommufd_viommu *viommu;
> +	struct iommufd_device *idev;
> +	u64 id;
> +};
> +
> +static inline struct iommufd_viommu *
> +iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id)
> +{
> +	return container_of(iommufd_get_object(ucmd->ictx, id,
> +					       IOMMUFD_OBJ_VIOMMU),
> +			    struct iommufd_viommu, obj);
> +}
> +
>   int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd);
>   void iommufd_viommu_destroy(struct iommufd_object *obj);
> +int iommufd_viommu_set_vdev_id(struct iommufd_ucmd *ucmd);
> +int iommufd_viommu_unset_vdev_id(struct iommufd_ucmd *ucmd);
>   
>   #ifdef CONFIG_IOMMUFD_TEST
>   int iommufd_test(struct iommufd_ucmd *ucmd);
> diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
> index 288ee51b6829..199ad90fa36b 100644
> --- a/drivers/iommu/iommufd/main.c
> +++ b/drivers/iommu/iommufd/main.c
> @@ -334,6 +334,8 @@ union ucmd_buffer {
>   	struct iommu_option option;
>   	struct iommu_vfio_ioas vfio_ioas;
>   	struct iommu_viommu_alloc viommu;
> +	struct iommu_viommu_set_vdev_id set_vdev_id;
> +	struct iommu_viommu_unset_vdev_id unset_vdev_id;
>   #ifdef CONFIG_IOMMUFD_TEST
>   	struct iommu_test_cmd test;
>   #endif
> @@ -387,6 +389,10 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
>   		 __reserved),
>   	IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl,
>   		 struct iommu_viommu_alloc, out_viommu_id),
> +	IOCTL_OP(IOMMU_VIOMMU_SET_VDEV_ID, iommufd_viommu_set_vdev_id,
> +		 struct iommu_viommu_set_vdev_id, vdev_id),
> +	IOCTL_OP(IOMMU_VIOMMU_UNSET_VDEV_ID, iommufd_viommu_unset_vdev_id,
> +		 struct iommu_viommu_unset_vdev_id, vdev_id),
>   #ifdef CONFIG_IOMMUFD_TEST
>   	IOCTL_OP(IOMMU_TEST_CMD, iommufd_test, struct iommu_test_cmd, last),
>   #endif
> diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
> index 200653a4bf57..8ffcd72b16b8 100644
> --- a/drivers/iommu/iommufd/viommu.c
> +++ b/drivers/iommu/iommufd/viommu.c
> @@ -8,6 +8,15 @@ void iommufd_viommu_destroy(struct iommufd_object *obj)
>   {
>   	struct iommufd_viommu *viommu =
>   		container_of(obj, struct iommufd_viommu, obj);
> +	struct iommufd_vdev_id *vdev_id;
> +	unsigned long index;
> +
> +	xa_for_each(&viommu->vdev_ids, index, vdev_id) {
> +		/* Unlocked since there should be no race in a destroy() */
> +		vdev_id->idev->vdev_id = NULL;
> +		kfree(vdev_id);
> +	}
> +	xa_destroy(&viommu->vdev_ids);
>   
>   	refcount_dec(&viommu->hwpt->common.obj.users);
>   }
> @@ -53,6 +62,9 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
>   	viommu->ictx = ucmd->ictx;
>   	viommu->hwpt = hwpt_paging;
>   
> +	xa_init(&viommu->vdev_ids);
> +	init_rwsem(&viommu->vdev_ids_rwsem);
> +
>   	refcount_inc(&viommu->hwpt->common.obj.users);
>   
>   	cmd->out_viommu_id = viommu->obj.id;
> @@ -70,3 +82,112 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
>   	iommufd_put_object(ucmd->ictx, &idev->obj);
>   	return rc;
>   }
> +
> +int iommufd_viommu_set_vdev_id(struct iommufd_ucmd *ucmd)
> +{
> +	struct iommu_viommu_set_vdev_id *cmd = ucmd->cmd;
> +	struct iommufd_vdev_id *vdev_id, *curr;
> +	struct iommufd_viommu *viommu;
> +	struct iommufd_device *idev;
> +	int rc = 0;
> +
> +	if (cmd->vdev_id > ULONG_MAX)
> +		return -EINVAL;
> +
> +	viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
> +	if (IS_ERR(viommu))
> +		return PTR_ERR(viommu);
> +
> +	idev = iommufd_get_device(ucmd, cmd->dev_id);
> +	if (IS_ERR(idev)) {
> +		rc = PTR_ERR(idev);
> +		goto out_put_viommu;
> +	}
> +
> +	down_write(&viommu->vdev_ids_rwsem);
> +	mutex_lock(&idev->igroup->lock);
> +	if (idev->vdev_id) {
> +		rc = -EEXIST;
> +		goto out_unlock_igroup;
> +	}
> +
> +	vdev_id = kzalloc(sizeof(*vdev_id), GFP_KERNEL);
> +	if (!vdev_id) {
> +		rc = -ENOMEM;
> +		goto out_unlock_igroup;
> +	}
> +
> +	vdev_id->idev = idev;
> +	vdev_id->viommu = viommu;
> +	vdev_id->id = cmd->vdev_id;
> +
> +	curr = xa_cmpxchg(&viommu->vdev_ids, cmd->vdev_id, NULL, vdev_id,
> +			  GFP_KERNEL);
> +	if (curr) {
> +		rc = xa_err(curr) ? : -EBUSY;
> +		goto out_free;
> +	}
> +
> +	idev->vdev_id = vdev_id;
> +	goto out_unlock_igroup;
> +
> +out_free:
> +	kfree(vdev_id);
> +out_unlock_igroup:
> +	mutex_unlock(&idev->igroup->lock);
> +	up_write(&viommu->vdev_ids_rwsem);
> +	iommufd_put_object(ucmd->ictx, &idev->obj);
> +out_put_viommu:
> +	iommufd_put_object(ucmd->ictx, &viommu->obj);
> +	return rc;
> +}
> +
> +int iommufd_viommu_unset_vdev_id(struct iommufd_ucmd *ucmd)
> +{
> +	struct iommu_viommu_unset_vdev_id *cmd = ucmd->cmd;
> +	struct iommufd_viommu *viommu;
> +	struct iommufd_vdev_id *old;
> +	struct iommufd_device *idev;
> +	int rc = 0;
> +
> +	if (cmd->vdev_id > ULONG_MAX)
> +		return -EINVAL;
> +
> +	viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
> +	if (IS_ERR(viommu))
> +		return PTR_ERR(viommu);
> +
> +	idev = iommufd_get_device(ucmd, cmd->dev_id);
> +	if (IS_ERR(idev)) {
> +		rc = PTR_ERR(idev);
> +		goto out_put_viommu;
> +	}
> +
> +	down_write(&viommu->vdev_ids_rwsem);
> +	mutex_lock(&idev->igroup->lock);
> +	if (!idev->vdev_id) {
> +		rc = -ENOENT;
> +		goto out_unlock_igroup;
> +	}
> +	if (idev->vdev_id->id != cmd->vdev_id) {
> +		rc = -EINVAL;
> +		goto out_unlock_igroup;
> +	}
> +
> +	old = xa_cmpxchg(&viommu->vdev_ids, idev->vdev_id->id,
> +			 idev->vdev_id, NULL, GFP_KERNEL);
> +	if (xa_is_err(old)) {
> +		rc = xa_err(old);
> +		goto out_unlock_igroup;
> +	}
> +	kfree(old);
> +	idev->vdev_id = NULL;
> +
> +out_unlock_igroup:
> +	mutex_unlock(&idev->igroup->lock);
> +	up_write(&viommu->vdev_ids_rwsem);
> +	iommufd_put_object(ucmd->ictx, &idev->obj);
> +out_put_viommu:
> +	iommufd_put_object(ucmd->ictx, &viommu->obj);
> +	return rc;
> +}
> diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
> index 51ce6a019c34..1816e89c922d 100644
> --- a/include/uapi/linux/iommufd.h
> +++ b/include/uapi/linux/iommufd.h
> @@ -52,6 +52,8 @@ enum {
>   	IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d,
>   	IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e,
>   	IOMMUFD_CMD_VIOMMU_ALLOC = 0x8f,
> +	IOMMUFD_CMD_VIOMMU_SET_VDEV_ID = 0x90,
> +	IOMMUFD_CMD_VIOMMU_UNSET_VDEV_ID = 0x91,
>   };
>   
>   /**
> @@ -882,4 +884,42 @@ struct iommu_viommu_alloc {
>   	__u32 out_viommu_id;
>   };
>   #define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC)
> +
> +/**
> + * struct iommu_viommu_set_vdev_id - ioctl(IOMMU_VIOMMU_SET_VDEV_ID)
> + * @size: sizeof(struct iommu_viommu_set_vdev_id)
> + * @viommu_id: viommu ID to associate with the device to store its virtual ID
> + * @dev_id: device ID to set its virtual ID
> + * @__reserved: Must be 0
> + * @vdev_id: Virtual device ID
> + *
> + * Set a viommu-specific virtual ID of a device
> + */
> +struct iommu_viommu_set_vdev_id {
> +	__u32 size;
> +	__u32 viommu_id;
> +	__u32 dev_id;
Is this ID from vfio_device_bind_iommufd.out_devid?
> +	__u32 __reserved;
> +	__aligned_u64 vdev_id;
What is the nature of this id? It is not the guest's BDFn, is it? The 
code suggests it is ARM's "SID" == "stream ID" and "a device might be 
able to generate multiple StreamIDs" (how, why?) 🤯 And these streams 
seem to have nothing to do with PCIe IDE streams, right?
For my SEV-TIO exercise ("trusted IO"), I am looking for a kernel 
interface to pass the guest's BDFs for a specific host device (which is 
passed through) and nothing in the kernel has any knowledge of it atm, 
is this the right place, or another ioctl() is needed here?
Sorry, I am too ignorant about ARM :)
> +};
> +#define IOMMU_VIOMMU_SET_VDEV_ID _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_SET_VDEV_ID)
> +
> +/**
> + * struct iommu_viommu_unset_vdev_id - ioctl(IOMMU_VIOMMU_UNSET_VDEV_ID)
> + * @size: sizeof(struct iommu_viommu_unset_vdev_id)
> + * @viommu_id: viommu ID associated with the device to delete its virtual ID
> + * @dev_id: device ID to unset its virtual ID
> + * @__reserved: Must be 0
> + * @vdev_id: Virtual device ID (for verification)
> + *
> + * Unset a viommu-specific virtual ID of a device
> + */
> +struct iommu_viommu_unset_vdev_id {
> +	__u32 size;
> +	__u32 viommu_id;
> +	__u32 dev_id;
> +	__u32 __reserved;
> +	__aligned_u64 vdev_id;
> +};
> +#define IOMMU_VIOMMU_UNSET_VDEV_ID _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_UNSET_VDEV_ID)
>   #endif
Nit: "git format-patch -O orderfile" makes patches nicer by putting the 
documentation first (.h before .c, in this case) with the "ordefile" 
looking like this:
===
*.txt
configure
*Makefile*
*.json
*.h
*.c
===
Thanks,
-- 
Alexey
Powered by blists - more mailing lists
 
