[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250530095112.5ed4c249@DESKTOP-0403QTC.>
Date: Fri, 30 May 2025 09:51:12 -0700
From: Jacob Pan <jacob.pan@...ux.microsoft.com>
To: Jason Gunthorpe <jgg@...dia.com>
Cc: linux-kernel@...r.kernel.org, "iommu@...ts.linux.dev"
<iommu@...ts.linux.dev>, Alex Williamson <alex.williamson@...hat.com>,
"Liu, Yi L" <yi.l.liu@...el.com>, Zhang Yu <zhangyu1@...rosoft.com>, Easwar
Hariharan <eahariha@...ux.microsoft.com>, jacob.pan@...ux.microsoft.com
Subject: Re: [PATCH 1/2] vfio: Fix unbalanced vfio_df_close call in no-iommu
mode
Hi Jason,
On Mon, 26 May 2025 21:05:11 -0300
Jason Gunthorpe <jgg@...dia.com> wrote:
> On Fri, May 16, 2025 at 09:45:21AM -0700, Jacob Pan wrote:
> > For no-iommu enabled devices working under IOMMUFD VFIO compat
> > mode, the group open path does not call vfio_df_open() and the
> > open_count is 0. So calling vfio_df_close() in the group close path
> > will trigger warning in vfio_assert_device_open(device);
> >
> > E.g. The following warning can be seen by running VFIO test.
> > https://github.com/awilliam/tests/blob/master/vfio-noiommu-pci-device-open.c
> > CONFIG_VFIO_CONTAINER = n
> > [ 29.094781] vfio-pci 0000:02:01.0: vfio-noiommu device opened by
> > user (vfio-noiommu-pc:164) Failed to get device info
> > [ 29.096540] ------------[ cut here ]------------
> > [ 29.096616] WARNING: CPU: 1 PID: 164 at
> > drivers/vfio/vfio_main.c:487 vfio_df_close+0xac/0xb4
> >
> > This patch adds checks for no-iommu mode and open_count to skip
> > calling vfio_df_close.
> >
> > Signed-off-by: Jacob Pan <jacob.pan@...ux.microsoft.com>
> > ---
> > drivers/vfio/group.c | 7 ++++---
> > 1 file changed, 4 insertions(+), 3 deletions(-)
>
> Sorry, this should have a fixes line:
>
> I think it is probably
>
> Fixes: 6086efe73498 ("vfio-iommufd: Move noiommu compat validation
> out of vfio_iommufd_bind()")
>
> By the look of it, since that is what started skipping the
> vfio_df_open()
>
> But after looking at that patch I'm now doubting that this is the
> right fix.
>
> Previously we'd still do vfio_df_device_first_open(), just the
> vfio_df_iommufd_bind() was skipped.
>
> Now we skip all of vfio_df_device_first_open() which also means we
> skip:
>
> if (!try_module_get(device->dev->driver->owner))
> return -ENODEV;
>
> and
> if (device->ops->open_device) {
> ret = device->ops->open_device(device);
>
> Which seems wrong to me?? We only want to skip the bind, we should
> still do open_device! At least that is how it was before 6086e
>
> So.. This may not be the right fix.
>
> Maybe more like:
Looks good to me, please disregard my patch.
Tested-by: Jacob Pan <jacob.pan@...ux.microsoft.com>
I guess you will submit this?
> diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c
> index c321d442f0da09..1b6a0e30544401 100644
> --- a/drivers/vfio/group.c
> +++ b/drivers/vfio/group.c
> @@ -192,18 +192,18 @@ static int vfio_df_group_open(struct
> vfio_device_file *df)
> * implies they expected translation to exist
> */
> if (!capable(CAP_SYS_RAWIO) ||
> - vfio_iommufd_device_has_compat_ioas(device,
> df->iommufd))
> + vfio_iommufd_device_has_compat_ioas(device,
> df->iommufd)) { ret = -EPERM;
> - else
> - ret = 0;
> - goto out_put_kvm;
> + goto out_put_kvm;
> + }
> }
>
> ret = vfio_df_open(df);
> if (ret)
> goto out_put_kvm;
>
> - if (df->iommufd && device->open_count == 1) {
> + if (df->iommufd && device->open_count == 1 &&
> + !vfio_device_is_noiommu(device)) {
> ret = vfio_iommufd_compat_attach_ioas(device,
> df->iommufd); if (ret)
> goto out_close_device;
> diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c
> index c8c3a2d53f86e1..26c9c3068c77da 100644
> --- a/drivers/vfio/iommufd.c
> +++ b/drivers/vfio/iommufd.c
> @@ -54,9 +54,6 @@ void vfio_df_iommufd_unbind(struct vfio_device_file
> *df)
> lockdep_assert_held(&vdev->dev_set->lock);
>
> - if (vfio_device_is_noiommu(vdev))
> - return;
> -
> if (vdev->ops->unbind_iommufd)
> vdev->ops->unbind_iommufd(vdev);
> }
> diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
> index 1fd261efc582d0..ff19ea05442e7d 100644
> --- a/drivers/vfio/vfio_main.c
> +++ b/drivers/vfio/vfio_main.c
> @@ -506,17 +506,19 @@ static int vfio_df_device_first_open(struct
> vfio_device_file *df) {
> struct vfio_device *device = df->device;
> struct iommufd_ctx *iommufd = df->iommufd;
> - int ret;
> + int ret = 0;
>
> lockdep_assert_held(&device->dev_set->lock);
>
> if (!try_module_get(device->dev->driver->owner))
> return -ENODEV;
>
> - if (iommufd)
> - ret = vfio_df_iommufd_bind(df);
> - else
> + if (iommufd) {
> + if (!vfio_device_is_noiommu(device))
> + ret = vfio_df_iommufd_bind(df);
> + } else {
> ret = vfio_device_group_use_iommu(device);
> + }
> if (ret)
> goto err_module_put;
>
> @@ -528,10 +530,12 @@ static int vfio_df_device_first_open(struct
> vfio_device_file *df) return 0;
>
> err_unuse_iommu:
> - if (iommufd)
> - vfio_df_iommufd_unbind(df);
> - else
> + if (iommufd) {
> + if (!vfio_device_is_noiommu(device))
> + vfio_df_iommufd_unbind(df);
> + } else {
> vfio_device_group_unuse_iommu(device);
> + }
> err_module_put:
> module_put(device->dev->driver->owner);
> return ret;
> @@ -546,10 +550,12 @@ static void vfio_df_device_last_close(struct
> vfio_device_file *df)
> if (device->ops->close_device)
> device->ops->close_device(device);
> - if (iommufd)
> - vfio_df_iommufd_unbind(df);
> - else
> + if (iommufd) {
> + if (!vfio_device_is_noiommu(device))
> + vfio_df_iommufd_unbind(df);
> + } else {
> vfio_device_group_unuse_iommu(device);
> + }
> module_put(device->dev->driver->owner);
> }
>
>
> Jason
Powered by blists - more mailing lists