lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening PHC | |
Open Source and information security mailing list archives
| ||
|
Date: Mon, 7 Feb 2022 19:22:15 +0200 From: Yishai Hadas <yishaih@...dia.com> To: <alex.williamson@...hat.com>, <bhelgaas@...gle.com>, <jgg@...dia.com>, <saeedm@...dia.com> CC: <linux-pci@...r.kernel.org>, <kvm@...r.kernel.org>, <netdev@...r.kernel.org>, <kuba@...nel.org>, <leonro@...dia.com>, <kwankhede@...dia.com>, <mgurtovoy@...dia.com>, <yishaih@...dia.com>, <maorg@...dia.com>, <ashok.raj@...el.com>, <kevin.tian@...el.com>, <shameerali.kolothum.thodi@...wei.com> Subject: [PATCH V7 mlx5-next 14/15] vfio/mlx5: Use its own PCI reset_done error handler Register its own handler for pci_error_handlers.reset_done and update state accordingly. Signed-off-by: Yishai Hadas <yishaih@...dia.com> Signed-off-by: Leon Romanovsky <leonro@...dia.com> Signed-off-by: Jason Gunthorpe <jgg@...dia.com> --- drivers/vfio/pci/mlx5/main.c | 57 ++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index acd205bcff70..63a889210ef3 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -28,9 +28,12 @@ struct mlx5vf_pci_core_device { struct vfio_pci_core_device core_device; u8 migrate_cap:1; + u8 deferred_reset:1; /* protect migration state */ struct mutex state_mutex; enum vfio_device_mig_state mig_state; + /* protect the reset_done flow */ + spinlock_t reset_lock; u16 vhca_id; struct mlx5_vf_migration_file *resuming_migf; struct mlx5_vf_migration_file *saving_migf; @@ -437,6 +440,25 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, return ERR_PTR(-EINVAL); } +/* + * This function is called in all state_mutex unlock cases to + * handle a 'deferred_reset' if exists. + */ +static void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev) +{ +again: + spin_lock(&mvdev->reset_lock); + if (mvdev->deferred_reset) { + mvdev->deferred_reset = false; + spin_unlock(&mvdev->reset_lock); + mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING; + mlx5vf_disable_fds(mvdev); + goto again; + } + mutex_unlock(&mvdev->state_mutex); + spin_unlock(&mvdev->reset_lock); +} + static struct file * mlx5vf_pci_set_device_state(struct vfio_device *vdev, enum vfio_device_mig_state new_state) @@ -465,7 +487,7 @@ mlx5vf_pci_set_device_state(struct vfio_device *vdev, break; } } - mutex_unlock(&mvdev->state_mutex); + mlx5vf_state_mutex_unlock(mvdev); return res; } @@ -477,10 +499,34 @@ static int mlx5vf_pci_get_device_state(struct vfio_device *vdev, mutex_lock(&mvdev->state_mutex); *curr_state = mvdev->mig_state; - mutex_unlock(&mvdev->state_mutex); + mlx5vf_state_mutex_unlock(mvdev); return 0; } +static void mlx5vf_pci_aer_reset_done(struct pci_dev *pdev) +{ + struct mlx5vf_pci_core_device *mvdev = dev_get_drvdata(&pdev->dev); + + if (!mvdev->migrate_cap) + return; + + /* + * As the higher VFIO layers are holding locks across reset and using + * those same locks with the mm_lock we need to prevent ABBA deadlock + * with the state_mutex and mm_lock. + * In case the state_mutex was taken already we defer the cleanup work + * to the unlock flow of the other running context. + */ + spin_lock(&mvdev->reset_lock); + mvdev->deferred_reset = true; + if (!mutex_trylock(&mvdev->state_mutex)) { + spin_unlock(&mvdev->reset_lock); + return; + } + spin_unlock(&mvdev->reset_lock); + mlx5vf_state_mutex_unlock(mvdev); +} + static int mlx5vf_pci_open_device(struct vfio_device *core_vdev) { struct mlx5vf_pci_core_device *mvdev = container_of( @@ -562,6 +608,7 @@ static int mlx5vf_pci_probe(struct pci_dev *pdev, VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P; mutex_init(&mvdev->state_mutex); + spin_lock_init(&mvdev->reset_lock); } mlx5_vf_put_core_dev(mdev); } @@ -596,11 +643,17 @@ static const struct pci_device_id mlx5vf_pci_table[] = { MODULE_DEVICE_TABLE(pci, mlx5vf_pci_table); +static const struct pci_error_handlers mlx5vf_err_handlers = { + .reset_done = mlx5vf_pci_aer_reset_done, + .error_detected = vfio_pci_core_aer_err_detected, +}; + static struct pci_driver mlx5vf_pci_driver = { .name = KBUILD_MODNAME, .id_table = mlx5vf_pci_table, .probe = mlx5vf_pci_probe, .remove = mlx5vf_pci_remove, + .err_handler = &mlx5vf_err_handlers, }; static void __exit mlx5vf_pci_cleanup(void) -- 2.18.1
Powered by blists - more mailing lists