lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 12 Jul 2021 09:42:12 -0400
From:   Tony Krowiak <akrowiak@...ux.ibm.com>
To:     linux-s390@...r.kernel.org, linux-kernel@...r.kernel.org
Cc:     borntraeger@...ibm.com, cohuck@...hat.com,
        pasic@...ux.vnet.ibm.com, jjherne@...ux.ibm.com, jgg@...dia.com,
        alex.williamson@...hat.com, kwankhede@...dia.com,
        frankja@...ux.ibm.com, david@...hat.com, imbrenda@...ux.ibm.com,
        hca@...ux.ibm.com
Subject: Re: [PATCH] s390/vfio-ap: do not open code locks for
 VFIO_GROUP_NOTIFY_SET_KVM notification

Ping!

On 7/7/21 11:41 AM, Tony Krowiak wrote:
> The fix to resolve a lockdep splat while handling the
> VFIO_GROUP_NOTIFY_SET_KVM event introduced a kvm_busy flag indicating that
> the vfio_ap device driver is busy setting or unsetting the KVM pointer.
> A wait queue was employed to allow functions requiring access to the KVM
> pointer to wait for the kvm_busy flag to be cleared. For the duration of
> the wait period, the mdev lock was unlocked then acquired again after the
> kvm_busy flag was cleared.
>
> It was pointed out during an unrelated patch review that locks should not
> be open coded - i.e., writing the algorithm of a standard lock in a
> function instead of using a lock from the standard library. The setting and
> testing of the kvm_busy flag and sleeping on a wait_event is the same thing
> a lock does. Whatever potential deadlock was found and reported via the
> lockdep splat was not magically removed by going to a wait_queue; it just
> removed the lockdep annotations that would identify the issue early.
>
> To remedy the problem introduced with the open coded locks, this patch
> introduces the following changes:
>
> 1. Removes the the kvm_busy flag and wait queue. These were introduced to
>     prevent functions from accessing the KVM pointer while it was being
>     set because the matrix_dev->lock mutex had to be given up while
>     updating the guest's AP configuration in order to resolve the lockdep
>     splat. Since the functions that set the KVM pointer as well as those
>     that need access to it do so while holding the matrix_dev->lock mutex,
>     it is not necessary to wait for the KVM pointer to be set.
>
> 2. Introduces an rwsem to protect the hook (i.e., function pointer) to the
>     handler that processes interception of the PQAP instruction. A read
>     lock will be taken when the PQAP instruction is intercepted, before
>     calling the handler. A write lock will be taken whenever the KVM
>     pointer is set since the functions that set the KVM pointer also set
>     the hook.
>
> 3. Removes the lock of the matrix_dev->lock mutex from the function that
>     handles interception of the PQAP instruction. Since the functions that
>     set the KVM pointer and the PQAP interception handler hook as well as
>     the function that calls the hook lock the rwsem, it is not necessary
>     to lock the matrix_dev->lock mutex in the handler.
>
> Fixes: 0cc00c8d4050 ("s390/vfio-ap: fix circular lockdep when setting/clearing crypto masks")
> Cc: stable@...r.kernel.org
> Signed-off-by: Tony Krowiak <akrowiak@...ux.ibm.com>
> Reported-by: Jason Gunthorpe <jgg@...dia.com>
> ---
>   arch/s390/include/asm/kvm_host.h      |   8 +-
>   arch/s390/kvm/kvm-s390.c              |   1 +
>   arch/s390/kvm/priv.c                  |  10 +-
>   drivers/s390/crypto/vfio_ap_ops.c     | 129 +++++++++++---------------
>   drivers/s390/crypto/vfio_ap_private.h |   4 +-
>   5 files changed, 67 insertions(+), 85 deletions(-)
>
> diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
> index 9b4473f76e56..f18849d259e6 100644
> --- a/arch/s390/include/asm/kvm_host.h
> +++ b/arch/s390/include/asm/kvm_host.h
> @@ -798,14 +798,12 @@ struct kvm_s390_cpu_model {
>   	unsigned short ibc;
>   };
>   
> -struct kvm_s390_module_hook {
> -	int (*hook)(struct kvm_vcpu *vcpu);
> -	struct module *owner;
> -};
> +typedef int (*crypto_hook)(struct kvm_vcpu *vcpu);
>   
>   struct kvm_s390_crypto {
>   	struct kvm_s390_crypto_cb *crycb;
> -	struct kvm_s390_module_hook *pqap_hook;
> +	struct rw_semaphore pqap_hook_rwsem;
> +	crypto_hook *pqap_hook;
>   	__u32 crycbd;
>   	__u8 aes_kw;
>   	__u8 dea_kw;
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index b655a7d82bf0..339534a0c5a5 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -2641,6 +2641,7 @@ static void kvm_s390_crypto_init(struct kvm *kvm)
>   			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
>   	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
>   			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
> +	init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
>   }
>   
>   static void sca_dispose(struct kvm *kvm)
> diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
> index 9928f785c677..ec16c2facf7c 100644
> --- a/arch/s390/kvm/priv.c
> +++ b/arch/s390/kvm/priv.c
> @@ -610,6 +610,7 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
>   static int handle_pqap(struct kvm_vcpu *vcpu)
>   {
>   	struct ap_queue_status status = {};
> +	crypto_hook handle_pqap;
>   	unsigned long reg0;
>   	int ret;
>   	uint8_t fc;
> @@ -657,15 +658,16 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
>   	 * Verify that the hook callback is registered, lock the owner
>   	 * and call the hook.
>   	 */
> +	down_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem);
>   	if (vcpu->kvm->arch.crypto.pqap_hook) {
> -		if (!try_module_get(vcpu->kvm->arch.crypto.pqap_hook->owner))
> -			return -EOPNOTSUPP;
> -		ret = vcpu->kvm->arch.crypto.pqap_hook->hook(vcpu);
> -		module_put(vcpu->kvm->arch.crypto.pqap_hook->owner);
> +		handle_pqap = *vcpu->kvm->arch.crypto.pqap_hook;
> +		ret = handle_pqap(vcpu);
>   		if (!ret && vcpu->run->s.regs.gprs[1] & 0x00ff0000)
>   			kvm_s390_set_psw_cc(vcpu, 3);
> +		up_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem);
>   		return ret;
>   	}
> +	up_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem);
>   	/*
>   	 * A vfio_driver must register a hook.
>   	 * No hook means no driver to enable the SIE CRYCB and no queues.
> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
> index 122c85c22469..d3447f6d83f1 100644
> --- a/drivers/s390/crypto/vfio_ap_ops.c
> +++ b/drivers/s390/crypto/vfio_ap_ops.c
> @@ -270,6 +270,9 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
>    * We take the matrix_dev lock to ensure serialization on queues and
>    * mediated device access.
>    *
> + * Note: This function must be called with a read lock held on
> + *	 vcpu->kvm->arch.crypto.pqap_hook_rwsem.
> + *
>    * Return 0 if we could handle the request inside KVM.
>    * otherwise, returns -EOPNOTSUPP to let QEMU handle the fault.
>    */
> @@ -287,22 +290,12 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
>   		return -EOPNOTSUPP;
>   
>   	apqn = vcpu->run->s.regs.gprs[0] & 0xffff;
> -	mutex_lock(&matrix_dev->lock);
>   
>   	if (!vcpu->kvm->arch.crypto.pqap_hook)
>   		goto out_unlock;
>   	matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook,
>   				   struct ap_matrix_mdev, pqap_hook);
>   
> -	/*
> -	 * If the KVM pointer is in the process of being set, wait until the
> -	 * process has completed.
> -	 */
> -	wait_event_cmd(matrix_mdev->wait_for_kvm,
> -		       !matrix_mdev->kvm_busy,
> -		       mutex_unlock(&matrix_dev->lock),
> -		       mutex_lock(&matrix_dev->lock));
> -
>   	/* If the there is no guest using the mdev, there is nothing to do */
>   	if (!matrix_mdev->kvm)
>   		goto out_unlock;
> @@ -323,7 +316,6 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
>   out_unlock:
>   	memcpy(&vcpu->run->s.regs.gprs[1], &qstatus, sizeof(qstatus));
>   	vcpu->run->s.regs.gprs[1] >>= 32;
> -	mutex_unlock(&matrix_dev->lock);
>   	return 0;
>   }
>   
> @@ -350,10 +342,8 @@ static int vfio_ap_mdev_create(struct mdev_device *mdev)
>   
>   	matrix_mdev->mdev = mdev;
>   	vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
> -	init_waitqueue_head(&matrix_mdev->wait_for_kvm);
>   	mdev_set_drvdata(mdev, matrix_mdev);
> -	matrix_mdev->pqap_hook.hook = handle_pqap;
> -	matrix_mdev->pqap_hook.owner = THIS_MODULE;
> +	matrix_mdev->pqap_hook = handle_pqap;
>   	mutex_lock(&matrix_dev->lock);
>   	list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
>   	mutex_unlock(&matrix_dev->lock);
> @@ -624,7 +614,7 @@ static ssize_t assign_adapter_store(struct device *dev,
>   	 * If the KVM pointer is in flux or the guest is running, disallow
>   	 * un-assignment of adapter
>   	 */
> -	if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
> +	if (matrix_mdev->kvm) {
>   		ret = -EBUSY;
>   		goto done;
>   	}
> @@ -697,7 +687,7 @@ static ssize_t unassign_adapter_store(struct device *dev,
>   	 * If the KVM pointer is in flux or the guest is running, disallow
>   	 * un-assignment of adapter
>   	 */
> -	if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
> +	if (matrix_mdev->kvm) {
>   		ret = -EBUSY;
>   		goto done;
>   	}
> @@ -787,7 +777,7 @@ static ssize_t assign_domain_store(struct device *dev,
>   	 * If the KVM pointer is in flux or the guest is running, disallow
>   	 * assignment of domain
>   	 */
> -	if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
> +	if (matrix_mdev->kvm) {
>   		ret = -EBUSY;
>   		goto done;
>   	}
> @@ -855,7 +845,7 @@ static ssize_t unassign_domain_store(struct device *dev,
>   	 * If the KVM pointer is in flux or the guest is running, disallow
>   	 * un-assignment of domain
>   	 */
> -	if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
> +	if (matrix_mdev->kvm) {
>   		ret = -EBUSY;
>   		goto done;
>   	}
> @@ -909,7 +899,7 @@ static ssize_t assign_control_domain_store(struct device *dev,
>   	 * If the KVM pointer is in flux or the guest is running, disallow
>   	 * assignment of control domain.
>   	 */
> -	if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
> +	if (matrix_mdev->kvm) {
>   		ret = -EBUSY;
>   		goto done;
>   	}
> @@ -968,7 +958,7 @@ static ssize_t unassign_control_domain_store(struct device *dev,
>   	 * If the KVM pointer is in flux or the guest is running, disallow
>   	 * un-assignment of control domain.
>   	 */
> -	if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
> +	if (matrix_mdev->kvm) {
>   		ret = -EBUSY;
>   		goto done;
>   	}
> @@ -1108,26 +1098,31 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
>   {
>   	struct ap_matrix_mdev *m;
>   
> -	if (kvm->arch.crypto.crycbd) {
> -		list_for_each_entry(m, &matrix_dev->mdev_list, node) {
> -			if (m != matrix_mdev && m->kvm == kvm)
> -				return -EPERM;
> -		}
> +	if (!kvm->arch.crypto.crycbd)
> +		return 0;
>   
> -		kvm_get_kvm(kvm);
> -		matrix_mdev->kvm_busy = true;
> -		mutex_unlock(&matrix_dev->lock);
> -		kvm_arch_crypto_set_masks(kvm,
> -					  matrix_mdev->matrix.apm,
> -					  matrix_mdev->matrix.aqm,
> -					  matrix_mdev->matrix.adm);
> -		mutex_lock(&matrix_dev->lock);
> -		kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
> -		matrix_mdev->kvm = kvm;
> -		matrix_mdev->kvm_busy = false;
> -		wake_up_all(&matrix_mdev->wait_for_kvm);
> +	down_write(&kvm->arch.crypto.pqap_hook_rwsem);
> +	mutex_lock(&matrix_dev->lock);
> +
> +	list_for_each_entry(m, &matrix_dev->mdev_list, node) {
> +		if (m != matrix_mdev && m->kvm == kvm) {
> +			up_read(&kvm->arch.crypto.pqap_hook_rwsem);
> +			mutex_unlock(&matrix_dev->lock);
> +			return -EPERM;
> +		}
>   	}
>   
> +	kvm_get_kvm(kvm);
> +	matrix_mdev->kvm = kvm;
> +	kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
> +	mutex_unlock(&matrix_dev->lock);
> +
> +	kvm_arch_crypto_set_masks(kvm,
> +				  matrix_mdev->matrix.apm,
> +				  matrix_mdev->matrix.aqm,
> +				  matrix_mdev->matrix.adm);
> +	up_write(&kvm->arch.crypto.pqap_hook_rwsem);
> +
>   	return 0;
>   }
>   
> @@ -1164,6 +1159,7 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
>    * vfio_ap_mdev_unset_kvm
>    *
>    * @matrix_mdev: a matrix mediated device
> + * @kvm: the KVM guest state object
>    *
>    * Performs clean-up of resources no longer needed by @matrix_mdev.
>    *
> @@ -1175,29 +1171,30 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
>    * done under the @matrix_mdev->lock.
>    *
>    */
> -static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
> +static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev,
> +				   struct kvm *kvm)
>   {
> -	/*
> -	 * If the KVM pointer is in the process of being set, wait until the
> -	 * process has completed.
> -	 */
> -	wait_event_cmd(matrix_mdev->wait_for_kvm,
> -		       !matrix_mdev->kvm_busy,
> -		       mutex_unlock(&matrix_dev->lock),
> -		       mutex_lock(&matrix_dev->lock));
> +	if (!kvm)
> +		return;
>   
> -	if (matrix_mdev->kvm) {
> -		matrix_mdev->kvm_busy = true;
> +	down_write(&kvm->arch.crypto.pqap_hook_rwsem);
> +	mutex_lock(&matrix_dev->lock);
> +	if ((!matrix_mdev->kvm) || (!matrix_mdev->kvm->arch.crypto.crycbd)) {
> +		up_write(&kvm->arch.crypto.pqap_hook_rwsem);
>   		mutex_unlock(&matrix_dev->lock);
> -		kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
> -		mutex_lock(&matrix_dev->lock);
> -		vfio_ap_mdev_reset_queues(matrix_mdev->mdev);
> -		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
> -		kvm_put_kvm(matrix_mdev->kvm);
> -		matrix_mdev->kvm = NULL;
> -		matrix_mdev->kvm_busy = false;
> -		wake_up_all(&matrix_mdev->wait_for_kvm);
> +		return;
>   	}
> +	mutex_unlock(&matrix_dev->lock);
> +
> +	kvm_arch_crypto_clear_masks(kvm);
> +
> +	mutex_lock(&matrix_dev->lock);
> +	vfio_ap_mdev_reset_queues(matrix_mdev->mdev);
> +	kvm_put_kvm(kvm);
> +	kvm->arch.crypto.pqap_hook = NULL;
> +	matrix_mdev->kvm = NULL;
> +	mutex_unlock(&matrix_dev->lock);
> +	up_write(&kvm->arch.crypto.pqap_hook_rwsem);
>   }
>   
>   static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
> @@ -1209,16 +1206,13 @@ static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
>   	if (action != VFIO_GROUP_NOTIFY_SET_KVM)
>   		return NOTIFY_OK;
>   
> -	mutex_lock(&matrix_dev->lock);
>   	matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
>   
>   	if (!data)
> -		vfio_ap_mdev_unset_kvm(matrix_mdev);
> +		vfio_ap_mdev_unset_kvm(matrix_mdev, matrix_mdev->kvm);
>   	else if (vfio_ap_mdev_set_kvm(matrix_mdev, data))
>   		notify_rc = NOTIFY_DONE;
>   
> -	mutex_unlock(&matrix_dev->lock);
> -
>   	return notify_rc;
>   }
>   
> @@ -1352,14 +1346,12 @@ static void vfio_ap_mdev_release(struct mdev_device *mdev)
>   {
>   	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
>   
> -	mutex_lock(&matrix_dev->lock);
> -	vfio_ap_mdev_unset_kvm(matrix_mdev);
> -	mutex_unlock(&matrix_dev->lock);
> -
>   	vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
>   				 &matrix_mdev->iommu_notifier);
>   	vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
>   				 &matrix_mdev->group_notifier);
> +
> +	vfio_ap_mdev_unset_kvm(matrix_mdev, matrix_mdev->kvm);
>   	module_put(THIS_MODULE);
>   }
>   
> @@ -1401,15 +1393,6 @@ static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev,
>   			break;
>   		}
>   
> -		/*
> -		 * If the KVM pointer is in the process of being set, wait until
> -		 * the process has completed.
> -		 */
> -		wait_event_cmd(matrix_mdev->wait_for_kvm,
> -			       !matrix_mdev->kvm_busy,
> -			       mutex_unlock(&matrix_dev->lock),
> -			       mutex_lock(&matrix_dev->lock));
> -
>   		ret = vfio_ap_mdev_reset_queues(mdev);
>   		break;
>   	default:
> diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
> index f82a6396acae..22d2e0ca3ae5 100644
> --- a/drivers/s390/crypto/vfio_ap_private.h
> +++ b/drivers/s390/crypto/vfio_ap_private.h
> @@ -83,10 +83,8 @@ struct ap_matrix_mdev {
>   	struct ap_matrix matrix;
>   	struct notifier_block group_notifier;
>   	struct notifier_block iommu_notifier;
> -	bool kvm_busy;
> -	wait_queue_head_t wait_for_kvm;
>   	struct kvm *kvm;
> -	struct kvm_s390_module_hook pqap_hook;
> +	crypto_hook pqap_hook;
>   	struct mdev_device *mdev;
>   };
>   

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ