[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250702124042.GD1051729@nvidia.com>
Date: Wed, 2 Jul 2025 09:40:42 -0300
From: Jason Gunthorpe <jgg@...dia.com>
To: "Tian, Kevin" <kevin.tian@...el.com>
Cc: Xu Yilun <yilun.xu@...ux.intel.com>,
"will@...nel.org" <will@...nel.org>,
"aneesh.kumar@...nel.org" <aneesh.kumar@...nel.org>,
"iommu@...ts.linux.dev" <iommu@...ts.linux.dev>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"joro@...tes.org" <joro@...tes.org>,
"robin.murphy@....com" <robin.murphy@....com>,
"shuah@...nel.org" <shuah@...nel.org>,
"nicolinc@...dia.com" <nicolinc@...dia.com>,
"aik@....com" <aik@....com>,
"Williams, Dan J" <dan.j.williams@...el.com>,
"baolu.lu@...ux.intel.com" <baolu.lu@...ux.intel.com>,
"Xu, Yilun" <yilun.xu@...el.com>
Subject: Re: [PATCH v3 2/5] iommufd: Destroy vdevice on idevice destroy
On Wed, Jul 02, 2025 at 09:13:50AM +0000, Tian, Kevin wrote:
> > > Yes, you can't touch idev inside the destroy function at all, under
> > > any version. idev is only valid if you have a refcount on vdev.
> > >
> > > But why are you touching this lock? Arrange things so abort doesn't
> > > touch the idev??
> >
> > idev has a pointer idev->vdev to track the vdev's lifecycle.
> > idev->igroup->lock protects the pointer. At the end of
> > iommufd_vdevice_destroy() this pointer should be NULLed so that idev
> > knows vdev is really destroyed.
Well, that is destroy, not abort, but OK, there is an issue with
destroy.
> but comparing to that I'd prefer to the original wait approach...
Okay, but lets try to keep the wait hidden inside the refcounting..
The issue here is we don't hold a refcount on idev while working with
idev. Let's fix that and then things should work properly?
Maybe something like this:
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 4e781aa9fc6329..9174fa7c972b80 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -178,12 +178,20 @@ static void iommufd_device_remove_vdev(struct iommufd_device *idev)
mutex_unlock(&idev->igroup->lock);
}
+void iommufd_device_pre_destroy(struct iommufd_object *obj)
+{
+ struct iommufd_device *idev =
+ container_of(obj, struct iommufd_device, obj);
+
+ /* Release the short term users on this */
+ iommufd_device_remove_vdev(idev);
+}
+
void iommufd_device_destroy(struct iommufd_object *obj)
{
struct iommufd_device *idev =
container_of(obj, struct iommufd_device, obj);
- iommufd_device_remove_vdev(idev);
iommu_device_release_dma_owner(idev->dev);
iommufd_put_group(idev->igroup);
if (!iommufd_selftest_is_mock_dev(idev->dev))
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index b2e8e106c16158..387c630fdabfbd 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -151,6 +151,9 @@ static int iommufd_object_dec_wait_shortterm(struct iommufd_ctx *ictx,
if (refcount_dec_and_test(&to_destroy->shortterm_users))
return 0;
+ if (iommufd_object_ops[to_destroy->type].pre_destroy)
+ iommufd_object_ops[to_destroy->type].pre_destroy(to_destroy);
+
if (wait_event_timeout(ictx->destroy_wait,
refcount_read(&to_destroy->shortterm_users) == 0,
msecs_to_jiffies(60000)))
@@ -567,6 +570,7 @@ static const struct iommufd_object_ops iommufd_object_ops[] = {
.destroy = iommufd_access_destroy_object,
},
[IOMMUFD_OBJ_DEVICE] = {
+ .pre_destroy = iommufd_device_pre_destroy,
.destroy = iommufd_device_destroy,
},
[IOMMUFD_OBJ_FAULT] = {
diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
index 9451a311745f7b..cbf99daa7dc25d 100644
--- a/drivers/iommu/iommufd/viommu.c
+++ b/drivers/iommu/iommufd/viommu.c
@@ -135,6 +135,7 @@ void iommufd_vdevice_destroy(struct iommufd_object *obj)
mutex_lock(&vdev->idev->igroup->lock);
iommufd_vdevice_abort(obj);
mutex_unlock(&vdev->idev->igroup->lock);
+ iommufd_put_object(vdev->viommu->ictx, &vdev->idev->obj);
}
int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
@@ -180,13 +181,19 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
vdev->id = virt_id;
vdev->viommu = viommu;
refcount_inc(&viommu->obj.users);
+
+ /*
+ * A reference is held on the idev so long as we have a pointer.
+ * iommufd_device_pre_destroy() will revoke it before the real
+ * destruction.
+ */
+ vdev->idev = idev;
+
/*
* iommufd_device_destroy() waits until idev->vdev is NULL before
* freeing the idev, which only happens once the vdev is finished
- * destruction. Thus we do not need refcounting on either idev->vdev or
- * vdev->idev.
+ * destruction.
*/
- vdev->idev = idev;
idev->vdev = vdev;
curr = xa_cmpxchg(&viommu->vdevs, virt_id, NULL, vdev, GFP_KERNEL);
@@ -207,7 +214,8 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
out_unlock_igroup:
mutex_unlock(&idev->igroup->lock);
out_put_idev:
- iommufd_put_object(ucmd->ictx, &idev->obj);
+ if (rc)
+ iommufd_put_object(ucmd->ictx, &idev->obj);
out_put_viommu:
iommufd_put_object(ucmd->ictx, &viommu->obj);
return rc;
Powered by blists - more mailing lists