[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aee9dceb-db9c-f610-1d00-9993b1b81b8a@linux.ibm.com>
Date: Mon, 12 Jul 2021 09:42:12 -0400
From: Tony Krowiak <akrowiak@...ux.ibm.com>
To: linux-s390@...r.kernel.org, linux-kernel@...r.kernel.org
Cc: borntraeger@...ibm.com, cohuck@...hat.com,
pasic@...ux.vnet.ibm.com, jjherne@...ux.ibm.com, jgg@...dia.com,
alex.williamson@...hat.com, kwankhede@...dia.com,
frankja@...ux.ibm.com, david@...hat.com, imbrenda@...ux.ibm.com,
hca@...ux.ibm.com
Subject: Re: [PATCH] s390/vfio-ap: do not open code locks for
VFIO_GROUP_NOTIFY_SET_KVM notification
Ping!
On 7/7/21 11:41 AM, Tony Krowiak wrote:
> The fix to resolve a lockdep splat while handling the
> VFIO_GROUP_NOTIFY_SET_KVM event introduced a kvm_busy flag indicating that
> the vfio_ap device driver is busy setting or unsetting the KVM pointer.
> A wait queue was employed to allow functions requiring access to the KVM
> pointer to wait for the kvm_busy flag to be cleared. For the duration of
> the wait period, the mdev lock was unlocked then acquired again after the
> kvm_busy flag was cleared.
>
> It was pointed out during an unrelated patch review that locks should not
> be open coded - i.e., writing the algorithm of a standard lock in a
> function instead of using a lock from the standard library. The setting and
> testing of the kvm_busy flag and sleeping on a wait_event is the same thing
> a lock does. Whatever potential deadlock was found and reported via the
> lockdep splat was not magically removed by going to a wait_queue; it just
> removed the lockdep annotations that would identify the issue early.
>
> To remedy the problem introduced with the open coded locks, this patch
> introduces the following changes:
>
> 1. Removes the the kvm_busy flag and wait queue. These were introduced to
> prevent functions from accessing the KVM pointer while it was being
> set because the matrix_dev->lock mutex had to be given up while
> updating the guest's AP configuration in order to resolve the lockdep
> splat. Since the functions that set the KVM pointer as well as those
> that need access to it do so while holding the matrix_dev->lock mutex,
> it is not necessary to wait for the KVM pointer to be set.
>
> 2. Introduces an rwsem to protect the hook (i.e., function pointer) to the
> handler that processes interception of the PQAP instruction. A read
> lock will be taken when the PQAP instruction is intercepted, before
> calling the handler. A write lock will be taken whenever the KVM
> pointer is set since the functions that set the KVM pointer also set
> the hook.
>
> 3. Removes the lock of the matrix_dev->lock mutex from the function that
> handles interception of the PQAP instruction. Since the functions that
> set the KVM pointer and the PQAP interception handler hook as well as
> the function that calls the hook lock the rwsem, it is not necessary
> to lock the matrix_dev->lock mutex in the handler.
>
> Fixes: 0cc00c8d4050 ("s390/vfio-ap: fix circular lockdep when setting/clearing crypto masks")
> Cc: stable@...r.kernel.org
> Signed-off-by: Tony Krowiak <akrowiak@...ux.ibm.com>
> Reported-by: Jason Gunthorpe <jgg@...dia.com>
> ---
> arch/s390/include/asm/kvm_host.h | 8 +-
> arch/s390/kvm/kvm-s390.c | 1 +
> arch/s390/kvm/priv.c | 10 +-
> drivers/s390/crypto/vfio_ap_ops.c | 129 +++++++++++---------------
> drivers/s390/crypto/vfio_ap_private.h | 4 +-
> 5 files changed, 67 insertions(+), 85 deletions(-)
>
> diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
> index 9b4473f76e56..f18849d259e6 100644
> --- a/arch/s390/include/asm/kvm_host.h
> +++ b/arch/s390/include/asm/kvm_host.h
> @@ -798,14 +798,12 @@ struct kvm_s390_cpu_model {
> unsigned short ibc;
> };
>
> -struct kvm_s390_module_hook {
> - int (*hook)(struct kvm_vcpu *vcpu);
> - struct module *owner;
> -};
> +typedef int (*crypto_hook)(struct kvm_vcpu *vcpu);
>
> struct kvm_s390_crypto {
> struct kvm_s390_crypto_cb *crycb;
> - struct kvm_s390_module_hook *pqap_hook;
> + struct rw_semaphore pqap_hook_rwsem;
> + crypto_hook *pqap_hook;
> __u32 crycbd;
> __u8 aes_kw;
> __u8 dea_kw;
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index b655a7d82bf0..339534a0c5a5 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -2641,6 +2641,7 @@ static void kvm_s390_crypto_init(struct kvm *kvm)
> sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
> get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
> sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
> + init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
> }
>
> static void sca_dispose(struct kvm *kvm)
> diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
> index 9928f785c677..ec16c2facf7c 100644
> --- a/arch/s390/kvm/priv.c
> +++ b/arch/s390/kvm/priv.c
> @@ -610,6 +610,7 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
> static int handle_pqap(struct kvm_vcpu *vcpu)
> {
> struct ap_queue_status status = {};
> + crypto_hook handle_pqap;
> unsigned long reg0;
> int ret;
> uint8_t fc;
> @@ -657,15 +658,16 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
> * Verify that the hook callback is registered, lock the owner
> * and call the hook.
> */
> + down_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem);
> if (vcpu->kvm->arch.crypto.pqap_hook) {
> - if (!try_module_get(vcpu->kvm->arch.crypto.pqap_hook->owner))
> - return -EOPNOTSUPP;
> - ret = vcpu->kvm->arch.crypto.pqap_hook->hook(vcpu);
> - module_put(vcpu->kvm->arch.crypto.pqap_hook->owner);
> + handle_pqap = *vcpu->kvm->arch.crypto.pqap_hook;
> + ret = handle_pqap(vcpu);
> if (!ret && vcpu->run->s.regs.gprs[1] & 0x00ff0000)
> kvm_s390_set_psw_cc(vcpu, 3);
> + up_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem);
> return ret;
> }
> + up_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem);
> /*
> * A vfio_driver must register a hook.
> * No hook means no driver to enable the SIE CRYCB and no queues.
> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
> index 122c85c22469..d3447f6d83f1 100644
> --- a/drivers/s390/crypto/vfio_ap_ops.c
> +++ b/drivers/s390/crypto/vfio_ap_ops.c
> @@ -270,6 +270,9 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
> * We take the matrix_dev lock to ensure serialization on queues and
> * mediated device access.
> *
> + * Note: This function must be called with a read lock held on
> + * vcpu->kvm->arch.crypto.pqap_hook_rwsem.
> + *
> * Return 0 if we could handle the request inside KVM.
> * otherwise, returns -EOPNOTSUPP to let QEMU handle the fault.
> */
> @@ -287,22 +290,12 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
> return -EOPNOTSUPP;
>
> apqn = vcpu->run->s.regs.gprs[0] & 0xffff;
> - mutex_lock(&matrix_dev->lock);
>
> if (!vcpu->kvm->arch.crypto.pqap_hook)
> goto out_unlock;
> matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook,
> struct ap_matrix_mdev, pqap_hook);
>
> - /*
> - * If the KVM pointer is in the process of being set, wait until the
> - * process has completed.
> - */
> - wait_event_cmd(matrix_mdev->wait_for_kvm,
> - !matrix_mdev->kvm_busy,
> - mutex_unlock(&matrix_dev->lock),
> - mutex_lock(&matrix_dev->lock));
> -
> /* If the there is no guest using the mdev, there is nothing to do */
> if (!matrix_mdev->kvm)
> goto out_unlock;
> @@ -323,7 +316,6 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
> out_unlock:
> memcpy(&vcpu->run->s.regs.gprs[1], &qstatus, sizeof(qstatus));
> vcpu->run->s.regs.gprs[1] >>= 32;
> - mutex_unlock(&matrix_dev->lock);
> return 0;
> }
>
> @@ -350,10 +342,8 @@ static int vfio_ap_mdev_create(struct mdev_device *mdev)
>
> matrix_mdev->mdev = mdev;
> vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
> - init_waitqueue_head(&matrix_mdev->wait_for_kvm);
> mdev_set_drvdata(mdev, matrix_mdev);
> - matrix_mdev->pqap_hook.hook = handle_pqap;
> - matrix_mdev->pqap_hook.owner = THIS_MODULE;
> + matrix_mdev->pqap_hook = handle_pqap;
> mutex_lock(&matrix_dev->lock);
> list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
> mutex_unlock(&matrix_dev->lock);
> @@ -624,7 +614,7 @@ static ssize_t assign_adapter_store(struct device *dev,
> * If the KVM pointer is in flux or the guest is running, disallow
> * un-assignment of adapter
> */
> - if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
> + if (matrix_mdev->kvm) {
> ret = -EBUSY;
> goto done;
> }
> @@ -697,7 +687,7 @@ static ssize_t unassign_adapter_store(struct device *dev,
> * If the KVM pointer is in flux or the guest is running, disallow
> * un-assignment of adapter
> */
> - if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
> + if (matrix_mdev->kvm) {
> ret = -EBUSY;
> goto done;
> }
> @@ -787,7 +777,7 @@ static ssize_t assign_domain_store(struct device *dev,
> * If the KVM pointer is in flux or the guest is running, disallow
> * assignment of domain
> */
> - if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
> + if (matrix_mdev->kvm) {
> ret = -EBUSY;
> goto done;
> }
> @@ -855,7 +845,7 @@ static ssize_t unassign_domain_store(struct device *dev,
> * If the KVM pointer is in flux or the guest is running, disallow
> * un-assignment of domain
> */
> - if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
> + if (matrix_mdev->kvm) {
> ret = -EBUSY;
> goto done;
> }
> @@ -909,7 +899,7 @@ static ssize_t assign_control_domain_store(struct device *dev,
> * If the KVM pointer is in flux or the guest is running, disallow
> * assignment of control domain.
> */
> - if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
> + if (matrix_mdev->kvm) {
> ret = -EBUSY;
> goto done;
> }
> @@ -968,7 +958,7 @@ static ssize_t unassign_control_domain_store(struct device *dev,
> * If the KVM pointer is in flux or the guest is running, disallow
> * un-assignment of control domain.
> */
> - if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
> + if (matrix_mdev->kvm) {
> ret = -EBUSY;
> goto done;
> }
> @@ -1108,26 +1098,31 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
> {
> struct ap_matrix_mdev *m;
>
> - if (kvm->arch.crypto.crycbd) {
> - list_for_each_entry(m, &matrix_dev->mdev_list, node) {
> - if (m != matrix_mdev && m->kvm == kvm)
> - return -EPERM;
> - }
> + if (!kvm->arch.crypto.crycbd)
> + return 0;
>
> - kvm_get_kvm(kvm);
> - matrix_mdev->kvm_busy = true;
> - mutex_unlock(&matrix_dev->lock);
> - kvm_arch_crypto_set_masks(kvm,
> - matrix_mdev->matrix.apm,
> - matrix_mdev->matrix.aqm,
> - matrix_mdev->matrix.adm);
> - mutex_lock(&matrix_dev->lock);
> - kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
> - matrix_mdev->kvm = kvm;
> - matrix_mdev->kvm_busy = false;
> - wake_up_all(&matrix_mdev->wait_for_kvm);
> + down_write(&kvm->arch.crypto.pqap_hook_rwsem);
> + mutex_lock(&matrix_dev->lock);
> +
> + list_for_each_entry(m, &matrix_dev->mdev_list, node) {
> + if (m != matrix_mdev && m->kvm == kvm) {
> + up_read(&kvm->arch.crypto.pqap_hook_rwsem);
> + mutex_unlock(&matrix_dev->lock);
> + return -EPERM;
> + }
> }
>
> + kvm_get_kvm(kvm);
> + matrix_mdev->kvm = kvm;
> + kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
> + mutex_unlock(&matrix_dev->lock);
> +
> + kvm_arch_crypto_set_masks(kvm,
> + matrix_mdev->matrix.apm,
> + matrix_mdev->matrix.aqm,
> + matrix_mdev->matrix.adm);
> + up_write(&kvm->arch.crypto.pqap_hook_rwsem);
> +
> return 0;
> }
>
> @@ -1164,6 +1159,7 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
> * vfio_ap_mdev_unset_kvm
> *
> * @matrix_mdev: a matrix mediated device
> + * @kvm: the KVM guest state object
> *
> * Performs clean-up of resources no longer needed by @matrix_mdev.
> *
> @@ -1175,29 +1171,30 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
> * done under the @matrix_mdev->lock.
> *
> */
> -static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
> +static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev,
> + struct kvm *kvm)
> {
> - /*
> - * If the KVM pointer is in the process of being set, wait until the
> - * process has completed.
> - */
> - wait_event_cmd(matrix_mdev->wait_for_kvm,
> - !matrix_mdev->kvm_busy,
> - mutex_unlock(&matrix_dev->lock),
> - mutex_lock(&matrix_dev->lock));
> + if (!kvm)
> + return;
>
> - if (matrix_mdev->kvm) {
> - matrix_mdev->kvm_busy = true;
> + down_write(&kvm->arch.crypto.pqap_hook_rwsem);
> + mutex_lock(&matrix_dev->lock);
> + if ((!matrix_mdev->kvm) || (!matrix_mdev->kvm->arch.crypto.crycbd)) {
> + up_write(&kvm->arch.crypto.pqap_hook_rwsem);
> mutex_unlock(&matrix_dev->lock);
> - kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
> - mutex_lock(&matrix_dev->lock);
> - vfio_ap_mdev_reset_queues(matrix_mdev->mdev);
> - matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
> - kvm_put_kvm(matrix_mdev->kvm);
> - matrix_mdev->kvm = NULL;
> - matrix_mdev->kvm_busy = false;
> - wake_up_all(&matrix_mdev->wait_for_kvm);
> + return;
> }
> + mutex_unlock(&matrix_dev->lock);
> +
> + kvm_arch_crypto_clear_masks(kvm);
> +
> + mutex_lock(&matrix_dev->lock);
> + vfio_ap_mdev_reset_queues(matrix_mdev->mdev);
> + kvm_put_kvm(kvm);
> + kvm->arch.crypto.pqap_hook = NULL;
> + matrix_mdev->kvm = NULL;
> + mutex_unlock(&matrix_dev->lock);
> + up_write(&kvm->arch.crypto.pqap_hook_rwsem);
> }
>
> static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
> @@ -1209,16 +1206,13 @@ static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
> if (action != VFIO_GROUP_NOTIFY_SET_KVM)
> return NOTIFY_OK;
>
> - mutex_lock(&matrix_dev->lock);
> matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
>
> if (!data)
> - vfio_ap_mdev_unset_kvm(matrix_mdev);
> + vfio_ap_mdev_unset_kvm(matrix_mdev, matrix_mdev->kvm);
> else if (vfio_ap_mdev_set_kvm(matrix_mdev, data))
> notify_rc = NOTIFY_DONE;
>
> - mutex_unlock(&matrix_dev->lock);
> -
> return notify_rc;
> }
>
> @@ -1352,14 +1346,12 @@ static void vfio_ap_mdev_release(struct mdev_device *mdev)
> {
> struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
>
> - mutex_lock(&matrix_dev->lock);
> - vfio_ap_mdev_unset_kvm(matrix_mdev);
> - mutex_unlock(&matrix_dev->lock);
> -
> vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
> &matrix_mdev->iommu_notifier);
> vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
> &matrix_mdev->group_notifier);
> +
> + vfio_ap_mdev_unset_kvm(matrix_mdev, matrix_mdev->kvm);
> module_put(THIS_MODULE);
> }
>
> @@ -1401,15 +1393,6 @@ static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev,
> break;
> }
>
> - /*
> - * If the KVM pointer is in the process of being set, wait until
> - * the process has completed.
> - */
> - wait_event_cmd(matrix_mdev->wait_for_kvm,
> - !matrix_mdev->kvm_busy,
> - mutex_unlock(&matrix_dev->lock),
> - mutex_lock(&matrix_dev->lock));
> -
> ret = vfio_ap_mdev_reset_queues(mdev);
> break;
> default:
> diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
> index f82a6396acae..22d2e0ca3ae5 100644
> --- a/drivers/s390/crypto/vfio_ap_private.h
> +++ b/drivers/s390/crypto/vfio_ap_private.h
> @@ -83,10 +83,8 @@ struct ap_matrix_mdev {
> struct ap_matrix matrix;
> struct notifier_block group_notifier;
> struct notifier_block iommu_notifier;
> - bool kvm_busy;
> - wait_queue_head_t wait_for_kvm;
> struct kvm *kvm;
> - struct kvm_s390_module_hook pqap_hook;
> + crypto_hook pqap_hook;
> struct mdev_device *mdev;
> };
>
Powered by blists - more mailing lists