[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4d05a8f4-d2e9-bc54-3e9b-6becc3281f0f@linux.ibm.com>
Date: Fri, 27 May 2022 09:50:50 -0400
From: "Jason J. Herne" <jjherne@...ux.ibm.com>
To: Tony Krowiak <akrowiak@...ux.ibm.com>, linux-s390@...r.kernel.org,
linux-kernel@...r.kernel.org, kvm@...r.kernel.org
Cc: freude@...ux.ibm.com, borntraeger@...ibm.com, cohuck@...hat.com,
mjrosato@...ux.ibm.com, pasic@...ux.ibm.com,
alex.williamson@...hat.com, kwankhede@...dia.com,
fiuczy@...ux.ibm.com
Subject: Re: [PATCH v19 11/20] s390/vfio-ap: prepare for dynamic update of
guest's APCB on queue probe/remove
On 4/4/22 18:10, Tony Krowiak wrote:
> The callback functions for probing and removing a queue device must take
> and release the locks required to perform a dynamic update of a guest's
> APCB in the proper order.
>
> The proper order for taking the locks is:
>
> matrix_dev->guests_lock => kvm->lock => matrix_dev->mdevs_lock
>
> The proper order for releasing the locks is:
>
> matrix_dev->mdevs_lock => kvm->lock => matrix_dev->guests_lock
>
> A new helper function is introduced to be used by the probe callback to
> acquire the required locks. Since the probe callback only has
> access to a queue device when it is called, the helper function will find
> the ap_matrix_mdev object to which the queue device's APQN is assigned and
> return it so the KVM guest to which the mdev is attached can be dynamically
> updated.
>
> Note that in order to find the ap_matrix_mdev (matrix_mdev) object, it is
> necessary to search the matrix_dev->mdev_list. This presents a
> locking order dilemma because the matrix_dev->mdevs_lock can't be taken to
> protect against changes to the list while searching for the matrix_mdev to
> which a queue device's APQN is assigned. This is due to the fact that the
> proper locking order requires that the matrix_dev->mdevs_lock be taken
> after both the matrix_mdev->kvm->lock and the matrix_dev->mdevs_lock.
> Consequently, the matrix_dev->guests_lock will be used to protect against
> removal of a matrix_mdev object from the list while a queue device is
> being probed. This necessitates changes to the mdev probe/remove
> callback functions to take the matrix_dev->guests_lock prior to removing
> a matrix_mdev object from the list.
>
> A new macro is also introduced to acquire the locks required to dynamically
> update the guest's APCB in the proper order when a queue device is
> removed.
>
> Signed-off-by: Tony Krowiak <akrowiak@...ux.ibm.com>
> ---
> drivers/s390/crypto/vfio_ap_ops.c | 126 +++++++++++++++++++++---------
> 1 file changed, 88 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
> index 2219b1069ceb..080a733f7cd2 100644
> --- a/drivers/s390/crypto/vfio_ap_ops.c
> +++ b/drivers/s390/crypto/vfio_ap_ops.c
> @@ -116,6 +116,74 @@ static const struct vfio_device_ops vfio_ap_matrix_dev_ops;
> mutex_unlock(&matrix_dev->guests_lock); \
> })
>
> +/**
> + * vfio_ap_mdev_get_update_locks_for_apqn: retrieve the matrix mdev to which an
> + * APQN is assigned and acquire the
> + * locks required to update the APCB of
> + * the KVM guest to which the mdev is
> + * attached.
> + *
> + * @apqn: the APQN of a queue device.
> + *
> + * The proper locking order is:
> + * 1. matrix_dev->guests_lock: required to use the KVM pointer to update a KVM
> + * guest's APCB.
> + * 2. matrix_mdev->kvm->lock: required to update a guest's APCB
> + * 3. matrix_dev->mdevs_lock: required to access data stored in a matrix_mdev
> + *
> + * Note: If @apqn is not assigned to a matrix_mdev, the matrix_mdev->kvm->lock
> + * will not be taken.
> + *
> + * Return: the ap_matrix_mdev object to which @apqn is assigned or NULL if @apqn
> + * is not assigned to an ap_matrix_mdev.
> + */
> +static struct ap_matrix_mdev *vfio_ap_mdev_get_update_locks_for_apqn(int apqn)
> +{
> + struct ap_matrix_mdev *matrix_mdev;
> +
> + mutex_lock(&matrix_dev->guests_lock);
> +
> + list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) {
> + if (test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm) &&
> + test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm)) {
> + if (matrix_mdev->kvm)
> + mutex_lock(&matrix_mdev->kvm->lock);
> +
> + mutex_lock(&matrix_dev->mdevs_lock);
> +
> + return matrix_mdev;
> + }
> + }
> +
> + mutex_lock(&matrix_dev->mdevs_lock);
> +
> + return NULL;
> +}
> +
> +/**
> + * get_update_locks_for_queue: get the locks required to update the APCB of the
> + * KVM guest to which the matrix mdev linked to a
> + * vfio_ap_queue object is attached.
> + *
> + * @queue: a pointer to a vfio_ap_queue object.
> + *
> + * The proper locking order is:
> + * 1. matrix_dev->guests_lock: required to use the KVM pointer to update a KVM
> + * guest's APCB.
> + * 2. queue->matrix_mdev->kvm->lock: required to update a guest's APCB
> + * 3. matrix_dev->mdevs_lock: required to access data stored in a matrix_mdev
> + *
> + * Note: if @queue is not linked to an ap_matrix_mdev object, the KVM lock
> + * will not be taken.
> + */
> +#define get_update_locks_for_queue(queue) ({ \
> + struct ap_matrix_mdev *matrix_mdev = q->matrix_mdev; \
> + mutex_lock(&matrix_dev->guests_lock); \
> + if (matrix_mdev && matrix_mdev->kvm) \
> + mutex_lock(&matrix_mdev->kvm->lock); \
> + mutex_lock(&matrix_dev->mdevs_lock); \
> +})
> +
One more comment I forgot to include before:
This macro is far too similar to existing macro, get_update_locks_for_mdev. And it is only
called in one place. Let's remove this and replace the single invocation with:
get_update_locks_for_mdev(q->matrix_mdev);
> /**
> * vfio_ap_mdev_get_queue - retrieve a queue with a specific APQN from a
> * hash table of queues assigned to a matrix mdev
> @@ -615,21 +683,18 @@ static int vfio_ap_mdev_probe(struct mdev_device *mdev)
> matrix_mdev->pqap_hook = handle_pqap;
> vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->shadow_apcb);
> hash_init(matrix_mdev->qtable.queues);
> - mdev_set_drvdata(mdev, matrix_mdev);
> - mutex_lock(&matrix_dev->mdevs_lock);
> - list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
> - mutex_unlock(&matrix_dev->mdevs_lock);
>
> ret = vfio_register_emulated_iommu_dev(&matrix_mdev->vdev);
> if (ret)
> goto err_list;
> + mdev_set_drvdata(mdev, matrix_mdev);
> + mutex_lock(&matrix_dev->mdevs_lock);
> + list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
> + mutex_unlock(&matrix_dev->mdevs_lock);
> dev_set_drvdata(&mdev->dev, matrix_mdev);
> return 0;
>
> err_list:
> - mutex_lock(&matrix_dev->mdevs_lock);
> - list_del(&matrix_mdev->node);
> - mutex_unlock(&matrix_dev->mdevs_lock);
> vfio_uninit_group_dev(&matrix_mdev->vdev);
> kfree(matrix_mdev);
> err_dec_available:
> @@ -692,11 +757,13 @@ static void vfio_ap_mdev_remove(struct mdev_device *mdev)
>
> vfio_unregister_group_dev(&matrix_mdev->vdev);
>
> + mutex_lock(&matrix_dev->guests_lock);
> mutex_lock(&matrix_dev->mdevs_lock);
> vfio_ap_mdev_reset_queues(matrix_mdev);
> vfio_ap_mdev_unlink_fr_queues(matrix_mdev);
> list_del(&matrix_mdev->node);
> mutex_unlock(&matrix_dev->mdevs_lock);
> + mutex_unlock(&matrix_dev->guests_lock);
> vfio_uninit_group_dev(&matrix_mdev->vdev);
> kfree(matrix_mdev);
> atomic_inc(&matrix_dev->available_instances);
> @@ -1665,49 +1732,30 @@ void vfio_ap_mdev_unregister(void)
> mdev_unregister_driver(&vfio_ap_matrix_driver);
> }
>
> -/*
> - * vfio_ap_queue_link_mdev
> - *
> - * @q: The queue to link with the matrix mdev.
> - *
> - * Links @q with the matrix mdev to which the queue's APQN is assigned.
> - */
> -static void vfio_ap_queue_link_mdev(struct vfio_ap_queue *q)
> -{
> - unsigned long apid = AP_QID_CARD(q->apqn);
> - unsigned long apqi = AP_QID_QUEUE(q->apqn);
> - struct ap_matrix_mdev *matrix_mdev;
> -
> - list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) {
> - if (test_bit_inv(apid, matrix_mdev->matrix.apm) &&
> - test_bit_inv(apqi, matrix_mdev->matrix.aqm)) {
> - vfio_ap_mdev_link_queue(matrix_mdev, q);
> - break;
> - }
> - }
> -}
> -
> int vfio_ap_mdev_probe_queue(struct ap_device *apdev)
> {
> struct vfio_ap_queue *q;
> + struct ap_matrix_mdev *matrix_mdev;
> DECLARE_BITMAP(apm_delta, AP_DEVICES);
>
> q = kzalloc(sizeof(*q), GFP_KERNEL);
> if (!q)
> return -ENOMEM;
> - mutex_lock(&matrix_dev->mdevs_lock);
> q->apqn = to_ap_queue(&apdev->device)->qid;
> q->saved_isc = VFIO_AP_ISC_INVALID;
> - vfio_ap_queue_link_mdev(q);
> - if (q->matrix_mdev) {
> +
> + matrix_mdev = vfio_ap_mdev_get_update_locks_for_apqn(q->apqn);
> +
> + if (matrix_mdev) {
> + vfio_ap_mdev_link_queue(matrix_mdev, q);
> memset(apm_delta, 0, sizeof(apm_delta));
> set_bit_inv(AP_QID_CARD(q->apqn), apm_delta);
> vfio_ap_mdev_filter_matrix(apm_delta,
> - q->matrix_mdev->matrix.aqm,
> - q->matrix_mdev);
> + matrix_mdev->matrix.aqm,
> + matrix_mdev);
> }
> dev_set_drvdata(&apdev->device, q);
> - mutex_unlock(&matrix_dev->mdevs_lock);
> + release_update_locks_for_mdev(matrix_mdev);
>
> return 0;
> }
> @@ -1716,11 +1764,13 @@ void vfio_ap_mdev_remove_queue(struct ap_device *apdev)
> {
> unsigned long apid;
> struct vfio_ap_queue *q;
> + struct ap_matrix_mdev *matrix_mdev;
>
> - mutex_lock(&matrix_dev->mdevs_lock);
> q = dev_get_drvdata(&apdev->device);
> + get_update_locks_for_queue(q);
> + matrix_mdev = q->matrix_mdev;
>
> - if (q->matrix_mdev) {
> + if (matrix_mdev) {
> vfio_ap_unlink_queue_fr_mdev(q);
>
> apid = AP_QID_CARD(q->apqn);
> @@ -1731,5 +1781,5 @@ void vfio_ap_mdev_remove_queue(struct ap_device *apdev)
> vfio_ap_mdev_reset_queue(q, 1);
> dev_set_drvdata(&apdev->device, NULL);
> kfree(q);
> - mutex_unlock(&matrix_dev->mdevs_lock);
> + release_update_locks_for_mdev(matrix_mdev);
> }
--
-- Jason J. Herne (jjherne@...ux.ibm.com)
Powered by blists - more mailing lists