[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <53a8dd61-0f50-da2a-6594-2a5920af3024@linux.ibm.com>
Date: Mon, 9 Jan 2023 16:07:14 -0500
From: Anthony Krowiak <akrowiak@...ux.ibm.com>
To: Matthew Rosato <mjrosato@...ux.ibm.com>,
alex.williamson@...hat.com, pbonzini@...hat.com
Cc: jgg@...dia.com, cohuck@...hat.com, farman@...ux.ibm.com,
pmorel@...ux.ibm.com, borntraeger@...ux.ibm.com,
frankja@...ux.ibm.com, imbrenda@...ux.ibm.com, david@...hat.com,
jjherne@...ux.ibm.com, pasic@...ux.ibm.com,
zhenyuw@...ux.intel.com, zhi.a.wang@...el.com,
linux-s390@...r.kernel.org, kvm@...r.kernel.org,
intel-gvt-dev@...ts.freedesktop.org,
intel-gfx@...ts.freedesktop.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 1/2] KVM: async kvm_destroy_vm for vfio devices
LGTM
Reviewed-by: Tony Krowiak <akrowiak@...ux.ibm.com>
On 1/9/23 3:10 PM, Matthew Rosato wrote:
> Currently it is possible that the final put of a KVM reference comes from
> vfio during its device close operation. This occurs while the vfio group
> lock is held; however, if the vfio device is still in the kvm device list,
> then the following call chain could result in a deadlock:
>
> kvm_put_kvm
> -> kvm_destroy_vm
> -> kvm_destroy_devices
> -> kvm_vfio_destroy
> -> kvm_vfio_file_set_kvm
> -> vfio_file_set_kvm
> -> group->group_lock/group_rwsem
>
> Avoid this scenario by adding kvm_put_kvm_async which will perform the
> kvm_destroy_vm asynchronously if the refcount reaches 0.
>
> Fixes: 421cfe6596f6 ("vfio: remove VFIO_GROUP_NOTIFY_SET_KVM")
> Reported-by: Alex Williamson <alex.williamson@...hat.com>
> Signed-off-by: Matthew Rosato <mjrosato@...ux.ibm.com>
> ---
> drivers/gpu/drm/i915/gvt/kvmgt.c | 6 +++++-
> drivers/s390/crypto/vfio_ap_ops.c | 7 ++++++-
> include/linux/kvm_host.h | 3 +++
> virt/kvm/kvm_main.c | 22 ++++++++++++++++++++++
> 4 files changed, 36 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
> index 8ae7039b3683..24511c877572 100644
> --- a/drivers/gpu/drm/i915/gvt/kvmgt.c
> +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
> @@ -703,7 +703,11 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
>
> kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
> &vgpu->track_node);
> - kvm_put_kvm(vgpu->vfio_device.kvm);
> + /*
> + * Avoid possible deadlock on any currently-held vfio lock by
> + * ensuring the potential kvm_destroy_vm call is done asynchronously
> + */
> + kvm_put_kvm_async(vgpu->vfio_device.kvm);
>
> kvmgt_protect_table_destroy(vgpu);
> gvt_cache_destroy(vgpu);
> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
> index e93bb9c468ce..a37b2baefb36 100644
> --- a/drivers/s390/crypto/vfio_ap_ops.c
> +++ b/drivers/s390/crypto/vfio_ap_ops.c
> @@ -1574,7 +1574,12 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
>
> kvm_arch_crypto_clear_masks(kvm);
> vfio_ap_mdev_reset_queues(&matrix_mdev->qtable);
> - kvm_put_kvm(kvm);
> + /*
> + * Avoid possible deadlock on any currently-held vfio lock by
> + * ensuring the potential kvm_destroy_vm call is done
> + * asynchronously
> + */
> + kvm_put_kvm_async(kvm);
> matrix_mdev->kvm = NULL;
>
> release_update_locks_for_kvm(kvm);
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 4f26b244f6d0..2ef6a5102265 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -34,6 +34,7 @@
> #include <linux/instrumentation.h>
> #include <linux/interval_tree.h>
> #include <linux/rbtree.h>
> +#include <linux/workqueue.h>
> #include <linux/xarray.h>
> #include <asm/signal.h>
>
> @@ -793,6 +794,7 @@ struct kvm {
> struct kvm_stat_data **debugfs_stat_data;
> struct srcu_struct srcu;
> struct srcu_struct irq_srcu;
> + struct work_struct async_work;
> pid_t userspace_pid;
> bool override_halt_poll_ns;
> unsigned int max_halt_poll_ns;
> @@ -963,6 +965,7 @@ void kvm_exit(void);
> void kvm_get_kvm(struct kvm *kvm);
> bool kvm_get_kvm_safe(struct kvm *kvm);
> void kvm_put_kvm(struct kvm *kvm);
> +void kvm_put_kvm_async(struct kvm *kvm);
> bool file_is_kvm(struct file *file);
> void kvm_put_kvm_no_destroy(struct kvm *kvm);
>
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 13e88297f999..fbe8d127028b 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -1353,6 +1353,28 @@ void kvm_put_kvm(struct kvm *kvm)
> }
> EXPORT_SYMBOL_GPL(kvm_put_kvm);
>
> +static void kvm_put_async_fn(struct work_struct *work)
> +{
> + struct kvm *kvm = container_of(work, struct kvm,
> + async_work);
> +
> + kvm_destroy_vm(kvm);
> +}
> +
> +/*
> + * Put a reference but only destroy the vm asynchronously. Can be used in
> + * cases where the caller holds a mutex that could cause deadlock if
> + * kvm_destroy_vm is triggered
> + */
> +void kvm_put_kvm_async(struct kvm *kvm)
> +{
> + if (refcount_dec_and_test(&kvm->users_count)) {
> + INIT_WORK(&kvm->async_work, kvm_put_async_fn);
> + schedule_work(&kvm->async_work);
> + }
> +}
> +EXPORT_SYMBOL_GPL(kvm_put_kvm_async);
> +
> /*
> * Used to put a reference that was taken on behalf of an object associated
> * with a user-visible file descriptor, e.g. a vcpu or device, if installation
Powered by blists - more mailing lists