[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20201129025250.16eb8355.pasic@linux.ibm.com>
Date: Sun, 29 Nov 2020 02:52:50 +0100
From: Halil Pasic <pasic@...ux.ibm.com>
To: Tony Krowiak <akrowiak@...ux.ibm.com>
Cc: linux-s390@...r.kernel.org, linux-kernel@...r.kernel.org,
kvm@...r.kernel.org, freude@...ux.ibm.com, borntraeger@...ibm.com,
cohuck@...hat.com, mjrosato@...ux.ibm.com,
alex.williamson@...hat.com, kwankhede@...dia.com,
fiuczy@...ux.ibm.com, frankja@...ux.ibm.com, david@...hat.com,
hca@...ux.ibm.com, gor@...ux.ibm.com
Subject: Re: [PATCH v12 12/17] s390/vfio-ap: allow hot plug/unplug of AP
resources using mdev device
On Tue, 24 Nov 2020 16:40:11 -0500
Tony Krowiak <akrowiak@...ux.ibm.com> wrote:
> Let's hot plug/unplug adapters, domains and control domains assigned to or
> unassigned from an AP matrix mdev device while it is in use by a guest per
> the following rules:
>
> * Assign an adapter to mdev's matrix:
>
> The adapter will be hot plugged into the guest under the following
> conditions:
> 1. The adapter is not yet assigned to the guest's matrix
> 2. At least one domain is assigned to the guest's matrix
> 3. Each APQN derived from the APID of the newly assigned adapter and
> the APQIs of the domains already assigned to the guest's
> matrix references a queue device bound to the vfio_ap device driver.
>
> The adapter and each domain assigned to the mdev's matrix will be hot
> plugged into the guest under the following conditions:
> 1. The adapter is not yet assigned to the guest's matrix
> 2. No domains are assigned to the guest's matrix
> 3 At least one domain is assigned to the mdev's matrix
> 4. Each APQN derived from the APID of the newly assigned adapter and
> the APQIs of the domains assigned to the mdev's matrix references a
> queue device bound to the vfio_ap device driver.
>
> * Unassign an adapter from mdev's matrix:
>
> The adapter will be hot unplugged from the KVM guest if it is
> assigned to the guest's matrix.
>
> * Assign a domain to mdev's matrix:
>
> The domain will be hot plugged into the guest under the following
> conditions:
> 1. The domain is not yet assigned to the guest's matrix
> 2. At least one adapter is assigned to the guest's matrix
> 3. Each APQN derived from the APQI of the newly assigned domain and
> the APIDs of the adapters already assigned to the guest's
> matrix references a queue device bound to the vfio_ap device driver.
>
> The domain and each adapter assigned to the mdev's matrix will be hot
> plugged into the guest under the following conditions:
> 1. The domain is not yet assigned to the guest's matrix
> 2. No adapters are assigned to the guest's matrix
> 3 At least one adapter is assigned to the mdev's matrix
> 4. Each APQN derived from the APQI of the newly assigned domain and
> the APIDs of the adapters assigned to the mdev's matrix references a
> queue device bound to the vfio_ap device driver.
>
> * Unassign adapter from mdev's matrix:
>
> The domain will be hot unplugged from the KVM guest if it is
> assigned to the guest's matrix.
>
> * Assign a control domain:
>
> The control domain will be hot plugged into the KVM guest if it is not
> assigned to the guest's APCB. The AP architecture ensures a guest will
> only get access to the control domain if it is in the host's AP
> configuration, so there is no risk in hot plugging it; however, it will
> become automatically available to the guest when it is added to the host
> configuration.
>
> * Unassign a control domain:
>
> The control domain will be hot unplugged from the KVM guest if it is
> assigned to the guest's APCB.
This is where things start getting tricky. E.g. do we need to revise
filtering after an unassign? (For example an assign_adapter X didn't
change the shadow, because queue XY was missing, but now we unplug domain
Y. Should the adapter X pop up? I guess it should.)
>
> Note: Now that hot plug/unplug is implemented, there is the possibility
> that an assignment/unassignment of an adapter, domain or control
> domain could be initiated while the guest is starting, so the
> matrix device lock will be taken for the group notification callback
> that initializes the guest's APCB when the KVM pointer is made
> available to the vfio_ap device driver.
>
> Signed-off-by: Tony Krowiak <akrowiak@...ux.ibm.com>
> ---
> drivers/s390/crypto/vfio_ap_ops.c | 190 +++++++++++++++++++++++++-----
> 1 file changed, 159 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
> index 586ec5776693..4f96b7861607 100644
> --- a/drivers/s390/crypto/vfio_ap_ops.c
> +++ b/drivers/s390/crypto/vfio_ap_ops.c
> @@ -631,6 +631,60 @@ static void vfio_ap_mdev_manage_qlinks(struct ap_matrix_mdev *matrix_mdev,
> }
> }
>
> +static bool vfio_ap_assign_apid_to_apcb(struct ap_matrix_mdev *matrix_mdev,
> + unsigned long apid)
> +{
> + unsigned long apqi, apqn;
> + unsigned long *aqm = matrix_mdev->shadow_apcb.aqm;
> +
> + /*
> + * If the APID is already assigned to the guest's shadow APCB, there is
> + * no need to assign it.
> + */
> + if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm))
> + return false;
> +
> + /*
> + * If no domains have yet been assigned to the shadow APCB and one or
> + * more domains have been assigned to the matrix mdev, then use
> + * the domains assigned to the matrix mdev; otherwise, there is nothing
> + * to assign to the shadow APCB.
> + */
> + if (bitmap_empty(matrix_mdev->shadow_apcb.aqm, AP_DOMAINS)) {
> + if (bitmap_empty(matrix_mdev->matrix.aqm, AP_DOMAINS))
> + return false;
> +
> + aqm = matrix_mdev->matrix.aqm;
> + }
> +
> + /* Make sure all APQNs are bound to the vfio_ap driver */
> + for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) {
> + apqn = AP_MKQID(apid, apqi);
> +
> + if (vfio_ap_mdev_get_queue(matrix_mdev, apqn) == NULL)
> + return false;
> + }
> +
> + set_bit_inv(apid, matrix_mdev->shadow_apcb.apm);
> +
> + /*
> + * If we verified APQNs using the domains assigned to the matrix mdev,
> + * then copy the APQIs of those domains into the guest's APCB
> + */
> + if (bitmap_empty(matrix_mdev->shadow_apcb.aqm, AP_DOMAINS))
> + bitmap_copy(matrix_mdev->shadow_apcb.aqm,
> + matrix_mdev->matrix.aqm, AP_DOMAINS);
> +
> + return true;
> +}
What is the rationale behind the shadow aqm empty special handling? I.e.
why not simply:
static bool vfio_ap_assign_apid_to_apcb(struct ap_matrix_mdev *matrix_mdev,
unsigned long apid)
{
unsigned long apqi, apqn;
unsigned long *aqm = matrix_mdev->shadow_apcb.aqm;
/*
* If the APID is already assigned to the guest's shadow APCB, there is
* no need to assign it.
*/
if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm))
return false;
/* Make sure all APQNs are bound to the vfio_ap driver */
for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) {
apqn = AP_MKQID(apid, apqi);
if (vfio_ap_mdev_get_queue(matrix_mdev, apqn) == NULL)
return false;
}
set_bit_inv(apid, matrix_mdev->shadow_apcb.apm);
return true;
}
Please answer the questions I've asked, and note that I will have to
return to this patch, later.
Regards,
Halil
> +
> +static void vfio_ap_mdev_hot_plug_adapter(struct ap_matrix_mdev *matrix_mdev,
> + unsigned long apid)
> +{
> + if (vfio_ap_assign_apid_to_apcb(matrix_mdev, apid))
> + vfio_ap_mdev_commit_shadow_apcb(matrix_mdev);
> +}
> +
> /**
> * assign_adapter_store
> *
> @@ -673,10 +727,6 @@ static ssize_t assign_adapter_store(struct device *dev,
> struct mdev_device *mdev = mdev_from_dev(dev);
> struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
>
> - /* If the guest is running, disallow assignment of adapter */
> - if (matrix_mdev->kvm)
> - return -EBUSY;
> -
> ret = kstrtoul(buf, 0, &apid);
> if (ret)
> return ret;
> @@ -698,12 +748,22 @@ static ssize_t assign_adapter_store(struct device *dev,
> }
> set_bit_inv(apid, matrix_mdev->matrix.apm);
> vfio_ap_mdev_manage_qlinks(matrix_mdev, LINK_APID, apid);
> + vfio_ap_mdev_hot_plug_adapter(matrix_mdev, apid);
> mutex_unlock(&matrix_dev->lock);
>
> return count;
> }
> static DEVICE_ATTR_WO(assign_adapter);
>
> +static void vfio_ap_mdev_hot_unplug_adapter(struct ap_matrix_mdev *matrix_mdev,
> + unsigned long apid)
> +{
> + if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm)) {
> + clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm);
> + vfio_ap_mdev_commit_shadow_apcb(matrix_mdev);
> + }
> +}
> +
> /**
> * unassign_adapter_store
> *
> @@ -730,10 +790,6 @@ static ssize_t unassign_adapter_store(struct device *dev,
> struct mdev_device *mdev = mdev_from_dev(dev);
> struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
>
> - /* If the guest is running, disallow un-assignment of adapter */
> - if (matrix_mdev->kvm)
> - return -EBUSY;
> -
> ret = kstrtoul(buf, 0, &apid);
> if (ret)
> return ret;
> @@ -744,12 +800,67 @@ static ssize_t unassign_adapter_store(struct device *dev,
> mutex_lock(&matrix_dev->lock);
> clear_bit_inv((unsigned long)apid, matrix_mdev->matrix.apm);
> vfio_ap_mdev_manage_qlinks(matrix_mdev, UNLINK_APID, apid);
> + vfio_ap_mdev_hot_unplug_adapter(matrix_mdev, apid);
> mutex_unlock(&matrix_dev->lock);
>
> return count;
> }
> static DEVICE_ATTR_WO(unassign_adapter);
>
> +static bool vfio_ap_assign_apqi_to_apcb(struct ap_matrix_mdev *matrix_mdev,
> + unsigned long apqi)
> +{
> + unsigned long apid, apqn;
> + unsigned long *apm = matrix_mdev->shadow_apcb.apm;
> +
> + /*
> + * If the APQI is already assigned to the guest's shadow APCB, there is
> + * no need to assign it.
> + */
> + if (test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm))
> + return false;
> +
> + /*
> + * If no adapters have yet been assigned to the shadow APCB and one or
> + * more adapters have been assigned to the matrix mdev, then use
> + * the adapters assigned to the matrix mdev; otherwise, there is nothing
> + * to assign to the shadow APCB.
> + */
> + if (bitmap_empty(matrix_mdev->shadow_apcb.apm, AP_DEVICES)) {
> + if (bitmap_empty(matrix_mdev->matrix.apm, AP_DEVICES))
> + return false;
> +
> + apm = matrix_mdev->matrix.apm;
> + }
> +
> + /* Make sure all APQNs are bound to the vfio_ap driver */
> + for_each_set_bit_inv(apid, apm, AP_DEVICES) {
> + apqn = AP_MKQID(apid, apqi);
> +
> + if (vfio_ap_mdev_get_queue(matrix_mdev, apqn) == NULL)
> + return false;
> + }
> +
> + set_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm);
> +
> + /*
> + * If we verified APQNs using the adapters assigned to the matrix mdev,
> + * then copy the APIDs of those adapters into the guest's APCB
> + */
> + if (bitmap_empty(matrix_mdev->shadow_apcb.apm, AP_DEVICES))
> + bitmap_copy(matrix_mdev->shadow_apcb.apm,
> + matrix_mdev->matrix.apm, AP_DEVICES);
> +
> + return true;
> +}
> +
> +static void vfio_ap_mdev_hot_plug_domain(struct ap_matrix_mdev *matrix_mdev,
> + unsigned long apqi)
> +{
> + if (vfio_ap_assign_apqi_to_apcb(matrix_mdev, apqi))
> + vfio_ap_mdev_commit_shadow_apcb(matrix_mdev);
> +}
> +
> /**
> * assign_domain_store
> *
> @@ -793,10 +904,6 @@ static ssize_t assign_domain_store(struct device *dev,
> struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
> unsigned long max_apqi = matrix_mdev->matrix.aqm_max;
>
> - /* If the guest is running, disallow assignment of domain */
> - if (matrix_mdev->kvm)
> - return -EBUSY;
> -
> ret = kstrtoul(buf, 0, &apqi);
> if (ret)
> return ret;
> @@ -817,12 +924,21 @@ static ssize_t assign_domain_store(struct device *dev,
> }
> set_bit_inv(apqi, matrix_mdev->matrix.aqm);
> vfio_ap_mdev_manage_qlinks(matrix_mdev, LINK_APQI, apqi);
> + vfio_ap_mdev_hot_plug_domain(matrix_mdev, apqi);
> mutex_unlock(&matrix_dev->lock);
>
> return count;
> }
> static DEVICE_ATTR_WO(assign_domain);
>
> +static void vfio_ap_mdev_hot_unplug_domain(struct ap_matrix_mdev *matrix_mdev,
> + unsigned long apqi)
> +{
> + if (test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) {
> + clear_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm);
> + vfio_ap_mdev_commit_shadow_apcb(matrix_mdev);
> + }
> +}
>
> /**
> * unassign_domain_store
> @@ -850,10 +966,6 @@ static ssize_t unassign_domain_store(struct device *dev,
> struct mdev_device *mdev = mdev_from_dev(dev);
> struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
>
> - /* If the guest is running, disallow un-assignment of domain */
> - if (matrix_mdev->kvm)
> - return -EBUSY;
> -
> ret = kstrtoul(buf, 0, &apqi);
> if (ret)
> return ret;
> @@ -864,12 +976,22 @@ static ssize_t unassign_domain_store(struct device *dev,
> mutex_lock(&matrix_dev->lock);
> clear_bit_inv((unsigned long)apqi, matrix_mdev->matrix.aqm);
> vfio_ap_mdev_manage_qlinks(matrix_mdev, UNLINK_APQI, apqi);
> + vfio_ap_mdev_hot_unplug_domain(matrix_mdev, apqi);
> mutex_unlock(&matrix_dev->lock);
>
> return count;
> }
> static DEVICE_ATTR_WO(unassign_domain);
>
> +static void vfio_ap_mdev_hot_plug_ctl_domain(struct ap_matrix_mdev *matrix_mdev,
> + unsigned long domid)
> +{
> + if (!test_bit_inv(domid, matrix_mdev->shadow_apcb.adm)) {
> + set_bit_inv(domid, matrix_mdev->shadow_apcb.adm);
> + vfio_ap_mdev_commit_shadow_apcb(matrix_mdev);
> + }
> +}
> +
> /**
> * assign_control_domain_store
> *
> @@ -895,10 +1017,6 @@ static ssize_t assign_control_domain_store(struct device *dev,
> struct mdev_device *mdev = mdev_from_dev(dev);
> struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
>
> - /* If the guest is running, disallow assignment of control domain */
> - if (matrix_mdev->kvm)
> - return -EBUSY;
> -
> ret = kstrtoul(buf, 0, &id);
> if (ret)
> return ret;
> @@ -914,12 +1032,23 @@ static ssize_t assign_control_domain_store(struct device *dev,
> if (!mutex_trylock(&matrix_dev->lock))
> return -EBUSY;
> set_bit_inv(id, matrix_mdev->matrix.adm);
> + vfio_ap_mdev_hot_plug_ctl_domain(matrix_mdev, id);
> mutex_unlock(&matrix_dev->lock);
>
> return count;
> }
> static DEVICE_ATTR_WO(assign_control_domain);
>
> +static void
> +vfio_ap_mdev_hot_unplug_ctl_domain(struct ap_matrix_mdev *matrix_mdev,
> + unsigned long domid)
> +{
> + if (test_bit_inv(domid, matrix_mdev->shadow_apcb.adm)) {
> + clear_bit_inv(domid, matrix_mdev->shadow_apcb.adm);
> + vfio_ap_mdev_commit_shadow_apcb(matrix_mdev);
> + }
> +}
> +
> /**
> * unassign_control_domain_store
> *
> @@ -946,10 +1075,6 @@ static ssize_t unassign_control_domain_store(struct device *dev,
> struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
> unsigned long max_domid = matrix_mdev->matrix.adm_max;
>
> - /* If the guest is running, disallow un-assignment of control domain */
> - if (matrix_mdev->kvm)
> - return -EBUSY;
> -
> ret = kstrtoul(buf, 0, &domid);
> if (ret)
> return ret;
> @@ -958,6 +1083,7 @@ static ssize_t unassign_control_domain_store(struct device *dev,
>
> mutex_lock(&matrix_dev->lock);
> clear_bit_inv(domid, matrix_mdev->matrix.adm);
> + vfio_ap_mdev_hot_unplug_ctl_domain(matrix_mdev, domid);
> mutex_unlock(&matrix_dev->lock);
>
> return count;
> @@ -1099,8 +1225,6 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
> {
> struct ap_matrix_mdev *m;
>
> - mutex_lock(&matrix_dev->lock);
> -
> list_for_each_entry(m, &matrix_dev->mdev_list, node) {
> if ((m != matrix_mdev) && (m->kvm == kvm)) {
> mutex_unlock(&matrix_dev->lock);
> @@ -1111,7 +1235,6 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
> matrix_mdev->kvm = kvm;
> kvm_get_kvm(kvm);
> kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
> - mutex_unlock(&matrix_dev->lock);
>
> return 0;
> }
> @@ -1148,7 +1271,7 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
> static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
> unsigned long action, void *data)
> {
> - int ret;
> + int ret = NOTIFY_DONE;
> struct ap_matrix_mdev *matrix_mdev;
>
> if (action != VFIO_GROUP_NOTIFY_SET_KVM)
> @@ -1156,23 +1279,28 @@ static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
>
> matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
>
> + mutex_lock(&matrix_dev->lock);
> +
> if (!data) {
> if (matrix_mdev->kvm)
> kvm_put_kvm(matrix_mdev->kvm);
>
> matrix_mdev->kvm = NULL;
>
> - return NOTIFY_OK;
> + ret = NOTIFY_OK;
> + goto done;
> }
>
> ret = vfio_ap_mdev_set_kvm(matrix_mdev, data);
> if (ret)
> - return NOTIFY_DONE;
> + goto done;
>
> vfio_ap_mdev_init_apcb(matrix_mdev);
> vfio_ap_mdev_commit_shadow_apcb(matrix_mdev);
>
> - return NOTIFY_OK;
> +done:
> + mutex_unlock(&matrix_dev->lock);
> + return ret;
> }
>
> static int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi,
Powered by blists - more mailing lists