[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <35701c5e-030a-4f52-b6f6-ed18368fb2cd@amd.com>
Date: Fri, 4 Oct 2024 14:32:28 +1000
From: Alexey Kardashevskiy <aik@....com>
To: Nicolin Chen <nicolinc@...dia.com>, jgg@...dia.com, kevin.tian@...el.com,
will@...nel.org
Cc: joro@...tes.org, suravee.suthikulpanit@....com, robin.murphy@....com,
dwmw2@...radead.org, baolu.lu@...ux.intel.com, shuah@...nel.org,
linux-kernel@...r.kernel.org, iommu@...ts.linux.dev,
linux-arm-kernel@...ts.infradead.org, linux-kselftest@...r.kernel.org,
eric.auger@...hat.com, jean-philippe@...aro.org, mdf@...nel.org,
mshavit@...gle.com, shameerali.kolothum.thodi@...wei.com,
smostafa@...gle.com, yi.l.liu@...el.com
Subject: Re: [PATCH v2 06/19] iommufd/viommu: Add
IOMMU_VIOMMU_SET/UNSET_VDEV_ID ioctl
On 28/8/24 02:59, Nicolin Chen wrote:
> Introduce a pair of new ioctls to set/unset a per-viommu virtual device id
> that should be linked to a physical device id via an idev pointer.
>
> Continue the support IOMMU_VIOMMU_TYPE_DEFAULT for a core-managed viommu.
> Provide a lookup function for drivers to load device pointer by a virtual
> device id.
>
> Add a rw_semaphore protection around the vdev_id list. Any future ioctl
> handlers that potentially access the list must grab the lock too.
>
> Signed-off-by: Nicolin Chen <nicolinc@...dia.com>
> ---
> drivers/iommu/iommufd/device.c | 12 +++
> drivers/iommu/iommufd/iommufd_private.h | 21 ++++
> drivers/iommu/iommufd/main.c | 6 ++
> drivers/iommu/iommufd/viommu.c | 121 ++++++++++++++++++++++++
> include/uapi/linux/iommufd.h | 40 ++++++++
> 5 files changed, 200 insertions(+)
>
> diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
> index 5fd3dd420290..3ad759971b32 100644
> --- a/drivers/iommu/iommufd/device.c
> +++ b/drivers/iommu/iommufd/device.c
> @@ -136,6 +136,18 @@ void iommufd_device_destroy(struct iommufd_object *obj)
> struct iommufd_device *idev =
> container_of(obj, struct iommufd_device, obj);
>
> + /* Unlocked since there should be no race in a destroy() */
> + if (idev->vdev_id) {
> + struct iommufd_vdev_id *vdev_id = idev->vdev_id;
> + struct iommufd_viommu *viommu = vdev_id->viommu;
> + struct iommufd_vdev_id *old;
> +
> + old = xa_cmpxchg(&viommu->vdev_ids, vdev_id->id, vdev_id, NULL,
> + GFP_KERNEL);
> + WARN_ON(old != vdev_id);
> + kfree(vdev_id);
> + idev->vdev_id = NULL;
> + }
> iommu_device_release_dma_owner(idev->dev);
> iommufd_put_group(idev->igroup);
> if (!iommufd_selftest_is_mock_dev(idev->dev))
> diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
> index 1f2a1c133b9a..2c6e168c5300 100644
> --- a/drivers/iommu/iommufd/iommufd_private.h
> +++ b/drivers/iommu/iommufd/iommufd_private.h
> @@ -416,6 +416,7 @@ struct iommufd_device {
> struct iommufd_object obj;
> struct iommufd_ctx *ictx;
> struct iommufd_group *igroup;
> + struct iommufd_vdev_id *vdev_id;
> struct list_head group_item;
> /* always the physical device */
> struct device *dev;
> @@ -533,11 +534,31 @@ struct iommufd_viommu {
> struct iommufd_ctx *ictx;
> struct iommufd_hwpt_paging *hwpt;
>
> + /* The locking order is vdev_ids_rwsem -> igroup::lock */
> + struct rw_semaphore vdev_ids_rwsem;
> + struct xarray vdev_ids;
> +
> unsigned int type;
> };
>
> +struct iommufd_vdev_id {
> + struct iommufd_viommu *viommu;
> + struct iommufd_device *idev;
> + u64 id;
> +};
> +
> +static inline struct iommufd_viommu *
> +iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id)
> +{
> + return container_of(iommufd_get_object(ucmd->ictx, id,
> + IOMMUFD_OBJ_VIOMMU),
> + struct iommufd_viommu, obj);
> +}
> +
> int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd);
> void iommufd_viommu_destroy(struct iommufd_object *obj);
> +int iommufd_viommu_set_vdev_id(struct iommufd_ucmd *ucmd);
> +int iommufd_viommu_unset_vdev_id(struct iommufd_ucmd *ucmd);
>
> #ifdef CONFIG_IOMMUFD_TEST
> int iommufd_test(struct iommufd_ucmd *ucmd);
> diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
> index 288ee51b6829..199ad90fa36b 100644
> --- a/drivers/iommu/iommufd/main.c
> +++ b/drivers/iommu/iommufd/main.c
> @@ -334,6 +334,8 @@ union ucmd_buffer {
> struct iommu_option option;
> struct iommu_vfio_ioas vfio_ioas;
> struct iommu_viommu_alloc viommu;
> + struct iommu_viommu_set_vdev_id set_vdev_id;
> + struct iommu_viommu_unset_vdev_id unset_vdev_id;
> #ifdef CONFIG_IOMMUFD_TEST
> struct iommu_test_cmd test;
> #endif
> @@ -387,6 +389,10 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
> __reserved),
> IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl,
> struct iommu_viommu_alloc, out_viommu_id),
> + IOCTL_OP(IOMMU_VIOMMU_SET_VDEV_ID, iommufd_viommu_set_vdev_id,
> + struct iommu_viommu_set_vdev_id, vdev_id),
> + IOCTL_OP(IOMMU_VIOMMU_UNSET_VDEV_ID, iommufd_viommu_unset_vdev_id,
> + struct iommu_viommu_unset_vdev_id, vdev_id),
> #ifdef CONFIG_IOMMUFD_TEST
> IOCTL_OP(IOMMU_TEST_CMD, iommufd_test, struct iommu_test_cmd, last),
> #endif
> diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
> index 200653a4bf57..8ffcd72b16b8 100644
> --- a/drivers/iommu/iommufd/viommu.c
> +++ b/drivers/iommu/iommufd/viommu.c
> @@ -8,6 +8,15 @@ void iommufd_viommu_destroy(struct iommufd_object *obj)
> {
> struct iommufd_viommu *viommu =
> container_of(obj, struct iommufd_viommu, obj);
> + struct iommufd_vdev_id *vdev_id;
> + unsigned long index;
> +
> + xa_for_each(&viommu->vdev_ids, index, vdev_id) {
> + /* Unlocked since there should be no race in a destroy() */
> + vdev_id->idev->vdev_id = NULL;
> + kfree(vdev_id);
> + }
> + xa_destroy(&viommu->vdev_ids);
>
> refcount_dec(&viommu->hwpt->common.obj.users);
> }
> @@ -53,6 +62,9 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
> viommu->ictx = ucmd->ictx;
> viommu->hwpt = hwpt_paging;
>
> + xa_init(&viommu->vdev_ids);
> + init_rwsem(&viommu->vdev_ids_rwsem);
> +
> refcount_inc(&viommu->hwpt->common.obj.users);
>
> cmd->out_viommu_id = viommu->obj.id;
> @@ -70,3 +82,112 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
> iommufd_put_object(ucmd->ictx, &idev->obj);
> return rc;
> }
> +
> +int iommufd_viommu_set_vdev_id(struct iommufd_ucmd *ucmd)
> +{
> + struct iommu_viommu_set_vdev_id *cmd = ucmd->cmd;
> + struct iommufd_vdev_id *vdev_id, *curr;
> + struct iommufd_viommu *viommu;
> + struct iommufd_device *idev;
> + int rc = 0;
> +
> + if (cmd->vdev_id > ULONG_MAX)
> + return -EINVAL;
> +
> + viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
> + if (IS_ERR(viommu))
> + return PTR_ERR(viommu);
> +
> + idev = iommufd_get_device(ucmd, cmd->dev_id);
> + if (IS_ERR(idev)) {
> + rc = PTR_ERR(idev);
> + goto out_put_viommu;
> + }
> +
> + down_write(&viommu->vdev_ids_rwsem);
> + mutex_lock(&idev->igroup->lock);
> + if (idev->vdev_id) {
> + rc = -EEXIST;
> + goto out_unlock_igroup;
> + }
> +
> + vdev_id = kzalloc(sizeof(*vdev_id), GFP_KERNEL);
> + if (!vdev_id) {
> + rc = -ENOMEM;
> + goto out_unlock_igroup;
> + }
> +
> + vdev_id->idev = idev;
> + vdev_id->viommu = viommu;
> + vdev_id->id = cmd->vdev_id;
> +
> + curr = xa_cmpxchg(&viommu->vdev_ids, cmd->vdev_id, NULL, vdev_id,
> + GFP_KERNEL);
> + if (curr) {
> + rc = xa_err(curr) ? : -EBUSY;
> + goto out_free;
> + }
> +
> + idev->vdev_id = vdev_id;
> + goto out_unlock_igroup;
> +
> +out_free:
> + kfree(vdev_id);
> +out_unlock_igroup:
> + mutex_unlock(&idev->igroup->lock);
> + up_write(&viommu->vdev_ids_rwsem);
> + iommufd_put_object(ucmd->ictx, &idev->obj);
> +out_put_viommu:
> + iommufd_put_object(ucmd->ictx, &viommu->obj);
> + return rc;
> +}
> +
> +int iommufd_viommu_unset_vdev_id(struct iommufd_ucmd *ucmd)
> +{
> + struct iommu_viommu_unset_vdev_id *cmd = ucmd->cmd;
> + struct iommufd_viommu *viommu;
> + struct iommufd_vdev_id *old;
> + struct iommufd_device *idev;
> + int rc = 0;
> +
> + if (cmd->vdev_id > ULONG_MAX)
> + return -EINVAL;
> +
> + viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
> + if (IS_ERR(viommu))
> + return PTR_ERR(viommu);
> +
> + idev = iommufd_get_device(ucmd, cmd->dev_id);
> + if (IS_ERR(idev)) {
> + rc = PTR_ERR(idev);
> + goto out_put_viommu;
> + }
> +
> + down_write(&viommu->vdev_ids_rwsem);
> + mutex_lock(&idev->igroup->lock);
> + if (!idev->vdev_id) {
> + rc = -ENOENT;
> + goto out_unlock_igroup;
> + }
> + if (idev->vdev_id->id != cmd->vdev_id) {
> + rc = -EINVAL;
> + goto out_unlock_igroup;
> + }
> +
> + old = xa_cmpxchg(&viommu->vdev_ids, idev->vdev_id->id,
> + idev->vdev_id, NULL, GFP_KERNEL);
> + if (xa_is_err(old)) {
> + rc = xa_err(old);
> + goto out_unlock_igroup;
> + }
> + kfree(old);
> + idev->vdev_id = NULL;
> +
> +out_unlock_igroup:
> + mutex_unlock(&idev->igroup->lock);
> + up_write(&viommu->vdev_ids_rwsem);
> + iommufd_put_object(ucmd->ictx, &idev->obj);
> +out_put_viommu:
> + iommufd_put_object(ucmd->ictx, &viommu->obj);
> + return rc;
> +}
> diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
> index 51ce6a019c34..1816e89c922d 100644
> --- a/include/uapi/linux/iommufd.h
> +++ b/include/uapi/linux/iommufd.h
> @@ -52,6 +52,8 @@ enum {
> IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d,
> IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e,
> IOMMUFD_CMD_VIOMMU_ALLOC = 0x8f,
> + IOMMUFD_CMD_VIOMMU_SET_VDEV_ID = 0x90,
> + IOMMUFD_CMD_VIOMMU_UNSET_VDEV_ID = 0x91,
> };
>
> /**
> @@ -882,4 +884,42 @@ struct iommu_viommu_alloc {
> __u32 out_viommu_id;
> };
> #define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC)
> +
> +/**
> + * struct iommu_viommu_set_vdev_id - ioctl(IOMMU_VIOMMU_SET_VDEV_ID)
> + * @size: sizeof(struct iommu_viommu_set_vdev_id)
> + * @viommu_id: viommu ID to associate with the device to store its virtual ID
> + * @dev_id: device ID to set its virtual ID
> + * @__reserved: Must be 0
> + * @vdev_id: Virtual device ID
> + *
> + * Set a viommu-specific virtual ID of a device
> + */
> +struct iommu_viommu_set_vdev_id {
> + __u32 size;
> + __u32 viommu_id;
> + __u32 dev_id;
Is this ID from vfio_device_bind_iommufd.out_devid?
> + __u32 __reserved;
> + __aligned_u64 vdev_id;
What is the nature of this id? It is not the guest's BDFn, is it? The
code suggests it is ARM's "SID" == "stream ID" and "a device might be
able to generate multiple StreamIDs" (how, why?) 🤯 And these streams
seem to have nothing to do with PCIe IDE streams, right?
For my SEV-TIO exercise ("trusted IO"), I am looking for a kernel
interface to pass the guest's BDFs for a specific host device (which is
passed through) and nothing in the kernel has any knowledge of it atm,
is this the right place, or another ioctl() is needed here?
Sorry, I am too ignorant about ARM :)
> +};
> +#define IOMMU_VIOMMU_SET_VDEV_ID _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_SET_VDEV_ID)
> +
> +/**
> + * struct iommu_viommu_unset_vdev_id - ioctl(IOMMU_VIOMMU_UNSET_VDEV_ID)
> + * @size: sizeof(struct iommu_viommu_unset_vdev_id)
> + * @viommu_id: viommu ID associated with the device to delete its virtual ID
> + * @dev_id: device ID to unset its virtual ID
> + * @__reserved: Must be 0
> + * @vdev_id: Virtual device ID (for verification)
> + *
> + * Unset a viommu-specific virtual ID of a device
> + */
> +struct iommu_viommu_unset_vdev_id {
> + __u32 size;
> + __u32 viommu_id;
> + __u32 dev_id;
> + __u32 __reserved;
> + __aligned_u64 vdev_id;
> +};
> +#define IOMMU_VIOMMU_UNSET_VDEV_ID _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_UNSET_VDEV_ID)
> #endif
Nit: "git format-patch -O orderfile" makes patches nicer by putting the
documentation first (.h before .c, in this case) with the "ordefile"
looking like this:
===
*.txt
configure
*Makefile*
*.json
*.h
*.c
===
Thanks,
--
Alexey
Powered by blists - more mailing lists