[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20231207154327.4bd74c98.alex.williamson@redhat.com>
Date: Thu, 7 Dec 2023 15:43:27 -0700
From: Alex Williamson <alex.williamson@...hat.com>
To: Yahui Cao <yahui.cao@...el.com>
Cc: intel-wired-lan@...ts.osuosl.org, kvm@...r.kernel.org,
netdev@...r.kernel.org, lingyu.liu@...el.com, kevin.tian@...el.com,
madhu.chittim@...el.com, sridhar.samudrala@...el.com, jgg@...dia.com,
yishaih@...dia.com, shameerali.kolothum.thodi@...wei.com,
brett.creeley@....com, davem@...emloft.net, edumazet@...gle.com,
kuba@...nel.org, pabeni@...hat.com
Subject: Re: [PATCH iwl-next v4 12/12] vfio/ice: Implement vfio_pci driver
for E800 devices
On Tue, 21 Nov 2023 02:51:11 +0000
Yahui Cao <yahui.cao@...el.com> wrote:
> From: Lingyu Liu <lingyu.liu@...el.com>
>
> Add a vendor-specific vfio_pci driver for E800 devices.
>
> It uses vfio_pci_core to register to the VFIO subsystem and then
> implements the E800 specific logic to support VF live migration.
>
> It implements the device state transition flow for live
> migration.
>
> Signed-off-by: Lingyu Liu <lingyu.liu@...el.com>
> Signed-off-by: Yahui Cao <yahui.cao@...el.com>
> ---
> MAINTAINERS | 7 +
> drivers/vfio/pci/Kconfig | 2 +
> drivers/vfio/pci/Makefile | 2 +
> drivers/vfio/pci/ice/Kconfig | 10 +
> drivers/vfio/pci/ice/Makefile | 4 +
> drivers/vfio/pci/ice/ice_vfio_pci.c | 707 ++++++++++++++++++++++++++++
> 6 files changed, 732 insertions(+)
> create mode 100644 drivers/vfio/pci/ice/Kconfig
> create mode 100644 drivers/vfio/pci/ice/Makefile
> create mode 100644 drivers/vfio/pci/ice/ice_vfio_pci.c
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 97f51d5ec1cf..c8faf7fe1bd1 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -22860,6 +22860,13 @@ L: kvm@...r.kernel.org
> S: Maintained
> F: drivers/vfio/pci/mlx5/
>
> +VFIO ICE PCI DRIVER
> +M: Yahui Cao <yahui.cao@...el.com>
> +M: Lingyu Liu <lingyu.liu@...el.com>
> +L: kvm@...r.kernel.org
> +S: Maintained
> +F: drivers/vfio/pci/ice/
> +
> VFIO PCI DEVICE SPECIFIC DRIVERS
> R: Jason Gunthorpe <jgg@...dia.com>
> R: Yishai Hadas <yishaih@...dia.com>
> diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
> index 8125e5f37832..6618208947af 100644
> --- a/drivers/vfio/pci/Kconfig
> +++ b/drivers/vfio/pci/Kconfig
> @@ -65,4 +65,6 @@ source "drivers/vfio/pci/hisilicon/Kconfig"
>
> source "drivers/vfio/pci/pds/Kconfig"
>
> +source "drivers/vfio/pci/ice/Kconfig"
> +
> endmenu
> diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
> index 45167be462d8..fc1df82df3ac 100644
> --- a/drivers/vfio/pci/Makefile
> +++ b/drivers/vfio/pci/Makefile
> @@ -13,3 +13,5 @@ obj-$(CONFIG_MLX5_VFIO_PCI) += mlx5/
> obj-$(CONFIG_HISI_ACC_VFIO_PCI) += hisilicon/
>
> obj-$(CONFIG_PDS_VFIO_PCI) += pds/
> +
> +obj-$(CONFIG_ICE_VFIO_PCI) += ice/
> diff --git a/drivers/vfio/pci/ice/Kconfig b/drivers/vfio/pci/ice/Kconfig
> new file mode 100644
> index 000000000000..0b8cd1489073
> --- /dev/null
> +++ b/drivers/vfio/pci/ice/Kconfig
> @@ -0,0 +1,10 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +config ICE_VFIO_PCI
> + tristate "VFIO support for Intel(R) Ethernet Connection E800 Series"
> + depends on ICE
> + select VFIO_PCI_CORE
> + help
> + This provides migration support for Intel(R) Ethernet connection E800
> + series devices using the VFIO framework.
> +
> + If you don't know what to do here, say N.
> diff --git a/drivers/vfio/pci/ice/Makefile b/drivers/vfio/pci/ice/Makefile
> new file mode 100644
> index 000000000000..259d4ab89105
> --- /dev/null
> +++ b/drivers/vfio/pci/ice/Makefile
> @@ -0,0 +1,4 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +obj-$(CONFIG_ICE_VFIO_PCI) += ice-vfio-pci.o
> +ice-vfio-pci-y := ice_vfio_pci.o
> +
> diff --git a/drivers/vfio/pci/ice/ice_vfio_pci.c b/drivers/vfio/pci/ice/ice_vfio_pci.c
> new file mode 100644
> index 000000000000..28a181aa2f3f
> --- /dev/null
> +++ b/drivers/vfio/pci/ice/ice_vfio_pci.c
> @@ -0,0 +1,707 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (C) 2018-2023 Intel Corporation */
> +
> +#include <linux/device.h>
> +#include <linux/module.h>
> +#include <linux/types.h>
> +#include <linux/file.h>
> +#include <linux/pci.h>
> +#include <linux/vfio_pci_core.h>
> +#include <linux/net/intel/ice_migration.h>
> +#include <linux/anon_inodes.h>
> +
> +#define DRIVER_DESC "ICE VFIO PCI - User Level meta-driver for Intel E800 device family"
> +
> +struct ice_vfio_pci_migration_file {
> + struct file *filp;
> + struct mutex lock; /* protect migration file access */
> + bool disabled;
> +
> + u8 mig_data[SZ_128K];
> + size_t total_length;
> +};
> +
> +struct ice_vfio_pci_core_device {
> + struct vfio_pci_core_device core_device;
> + u8 deferred_reset:1;
Move vf_id here to use some of the hole this leaves.
> + struct mutex state_mutex; /* protect migration state */
> + enum vfio_device_mig_state mig_state;
> + /* protect the reset_done flow */
> + spinlock_t reset_lock;
> + struct ice_vfio_pci_migration_file *resuming_migf;
> + struct ice_vfio_pci_migration_file *saving_migf;
> + struct vfio_device_migration_info mig_info;
> + u8 *mig_data;
> + struct ice_pf *pf;
> + int vf_id;
> +};
> +
> +/**
> + * ice_vfio_pci_load_state - VFIO device state reloading
> + * @ice_vdev: pointer to ice vfio pci core device structure
> + *
> + * Load device state. This function is called when the userspace VFIO uAPI
> + * consumer wants to load the device state info from VFIO migration region and
> + * load them into the device. This function should make sure all the device
> + * state info is loaded successfully. As a result, return value is mandatory
> + * to be checked.
> + *
> + * Return 0 for success, negative value for failure.
> + */
> +static int __must_check
> +ice_vfio_pci_load_state(struct ice_vfio_pci_core_device *ice_vdev)
> +{
> + struct ice_vfio_pci_migration_file *migf = ice_vdev->resuming_migf;
> +
> + return ice_migration_load_devstate(ice_vdev->pf,
> + ice_vdev->vf_id,
> + migf->mig_data,
> + migf->total_length);
> +}
> +
> +/**
> + * ice_vfio_pci_save_state - VFIO device state saving
> + * @ice_vdev: pointer to ice vfio pci core device structure
> + * @migf: pointer to migration file
> + *
> + * Snapshot the device state and save it. This function is called when the
> + * VFIO uAPI consumer wants to snapshot the current device state and saves
> + * it into the VFIO migration region. This function should make sure all
> + * of the device state info is collectted and saved successfully. As a
> + * result, return value is mandatory to be checked.
> + *
> + * Return 0 for success, negative value for failure.
> + */
> +static int __must_check
> +ice_vfio_pci_save_state(struct ice_vfio_pci_core_device *ice_vdev,
> + struct ice_vfio_pci_migration_file *migf)
> +{
> + migf->total_length = SZ_128K;
> +
> + return ice_migration_save_devstate(ice_vdev->pf,
> + ice_vdev->vf_id,
> + migf->mig_data,
> + migf->total_length);
> +}
> +
> +/**
> + * ice_vfio_migration_init - Initialization for live migration function
> + * @ice_vdev: pointer to ice vfio pci core device structure
> + *
> + * Returns 0 on success, negative value on error
> + */
> +static int ice_vfio_migration_init(struct ice_vfio_pci_core_device *ice_vdev)
> +{
> + struct pci_dev *pdev = ice_vdev->core_device.pdev;
> +
> + ice_vdev->pf = ice_migration_get_pf(pdev);
> + if (!ice_vdev->pf)
> + return -EFAULT;
> +
> + ice_vdev->vf_id = pci_iov_vf_id(pdev);
> + if (ice_vdev->vf_id < 0)
> + return -EINVAL;
> +
> + return ice_migration_init_dev(ice_vdev->pf, ice_vdev->vf_id);
> +}
> +
> +/**
> + * ice_vfio_migration_uninit - Cleanup for live migration function
> + * @ice_vdev: pointer to ice vfio pci core device structure
> + */
> +static void ice_vfio_migration_uninit(struct ice_vfio_pci_core_device *ice_vdev)
> +{
> + ice_migration_uninit_dev(ice_vdev->pf, ice_vdev->vf_id);
> +}
> +
> +/**
> + * ice_vfio_pci_disable_fd - Close migration file
> + * @migf: pointer to ice vfio pci migration file
> + */
> +static void ice_vfio_pci_disable_fd(struct ice_vfio_pci_migration_file *migf)
> +{
> + mutex_lock(&migf->lock);
> + migf->disabled = true;
> + migf->total_length = 0;
> + migf->filp->f_pos = 0;
> + mutex_unlock(&migf->lock);
> +}
> +
> +/**
> + * ice_vfio_pci_disable_fds - Close migration files of ice vfio pci device
> + * @ice_vdev: pointer to ice vfio pci core device structure
> + */
> +static void ice_vfio_pci_disable_fds(struct ice_vfio_pci_core_device *ice_vdev)
> +{
> + if (ice_vdev->resuming_migf) {
> + ice_vfio_pci_disable_fd(ice_vdev->resuming_migf);
> + fput(ice_vdev->resuming_migf->filp);
> + ice_vdev->resuming_migf = NULL;
> + }
> + if (ice_vdev->saving_migf) {
> + ice_vfio_pci_disable_fd(ice_vdev->saving_migf);
> + fput(ice_vdev->saving_migf->filp);
> + ice_vdev->saving_migf = NULL;
> + }
> +}
> +
> +/*
> + * This function is called in all state_mutex unlock cases to
> + * handle a 'deferred_reset' if exists.
> + * @ice_vdev: pointer to ice vfio pci core device structure
> + */
> +static void
> +ice_vfio_pci_state_mutex_unlock(struct ice_vfio_pci_core_device *ice_vdev)
> +{
> +again:
> + spin_lock(&ice_vdev->reset_lock);
> + if (ice_vdev->deferred_reset) {
> + ice_vdev->deferred_reset = false;
> + spin_unlock(&ice_vdev->reset_lock);
> + ice_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
> + ice_vfio_pci_disable_fds(ice_vdev);
> + goto again;
> + }
> + mutex_unlock(&ice_vdev->state_mutex);
> + spin_unlock(&ice_vdev->reset_lock);
> +}
> +
> +static void ice_vfio_pci_reset_done(struct pci_dev *pdev)
> +{
> + struct ice_vfio_pci_core_device *ice_vdev =
> + (struct ice_vfio_pci_core_device *)dev_get_drvdata(&pdev->dev);
> +
> + /*
> + * As the higher VFIO layers are holding locks across reset and using
> + * those same locks with the mm_lock we need to prevent ABBA deadlock
> + * with the state_mutex and mm_lock.
> + * In case the state_mutex was taken already we defer the cleanup work
> + * to the unlock flow of the other running context.
> + */
> + spin_lock(&ice_vdev->reset_lock);
> + ice_vdev->deferred_reset = true;
> + if (!mutex_trylock(&ice_vdev->state_mutex)) {
> + spin_unlock(&ice_vdev->reset_lock);
> + return;
> + }
> + spin_unlock(&ice_vdev->reset_lock);
> + ice_vfio_pci_state_mutex_unlock(ice_vdev);
> +}
> +
> +/**
> + * ice_vfio_pci_open_device - Called when a vfio device is probed by VFIO UAPI
> + * @core_vdev: the vfio device to open
> + *
> + * Initialization of the vfio device
> + *
> + * Returns 0 on success, negative value on error
> + */
> +static int ice_vfio_pci_open_device(struct vfio_device *core_vdev)
> +{
> + struct ice_vfio_pci_core_device *ice_vdev = container_of(core_vdev,
> + struct ice_vfio_pci_core_device, core_device.vdev);
> + struct vfio_pci_core_device *vdev = &ice_vdev->core_device;
> + int ret;
> +
> + ret = vfio_pci_core_enable(vdev);
> + if (ret)
> + return ret;
> +
> + ret = ice_vfio_migration_init(ice_vdev);
> + if (ret) {
> + vfio_pci_core_disable(vdev);
> + return ret;
> + }
> + ice_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
> + vfio_pci_core_finish_enable(vdev);
> +
> + return 0;
> +}
> +
> +/**
> + * ice_vfio_pci_close_device - Called when a vfio device fd is closed
> + * @core_vdev: the vfio device to close
> + */
> +static void ice_vfio_pci_close_device(struct vfio_device *core_vdev)
> +{
> + struct ice_vfio_pci_core_device *ice_vdev = container_of(core_vdev,
> + struct ice_vfio_pci_core_device, core_device.vdev);
> +
> + ice_vfio_pci_disable_fds(ice_vdev);
> + vfio_pci_core_close_device(core_vdev);
> + ice_vfio_migration_uninit(ice_vdev);
> +}
> +
> +/**
> + * ice_vfio_pci_release_file - release ice vfio pci migration file
> + * @inode: pointer to inode
> + * @filp: pointer to the file to release
> + *
> + * Return 0 for success, negative for error
> + */
> +static int ice_vfio_pci_release_file(struct inode *inode, struct file *filp)
> +{
> + struct ice_vfio_pci_migration_file *migf = filp->private_data;
> +
> + ice_vfio_pci_disable_fd(migf);
> + mutex_destroy(&migf->lock);
> + kfree(migf);
> + return 0;
> +}
> +
> +/**
> + * ice_vfio_pci_save_read - save migration file data to user space
> + * @filp: pointer to migration file
> + * @buf: pointer to user space buffer
> + * @len: data length to be saved
> + * @pos: should be 0
> + *
> + * Return len of saved data, negative for error
> + */
> +static ssize_t ice_vfio_pci_save_read(struct file *filp, char __user *buf,
> + size_t len, loff_t *pos)
> +{
> + struct ice_vfio_pci_migration_file *migf = filp->private_data;
> + loff_t *off = &filp->f_pos;
> + ssize_t done = 0;
> + int ret;
> +
> + if (pos)
> + return -ESPIPE;
> +
> + mutex_lock(&migf->lock);
> + if (*off > migf->total_length) {
> + done = -EINVAL;
> + goto out_unlock;
> + }
> +
> + if (migf->disabled) {
> + done = -ENODEV;
> + goto out_unlock;
> + }
> +
> + len = min_t(size_t, migf->total_length - *off, len);
> + if (len) {
> + ret = copy_to_user(buf, migf->mig_data + *off, len);
> + if (ret) {
> + done = -EFAULT;
> + goto out_unlock;
> + }
> + *off += len;
> + done = len;
> + }
> +out_unlock:
> + mutex_unlock(&migf->lock);
> + return done;
> +}
> +
> +static const struct file_operations ice_vfio_pci_save_fops = {
> + .owner = THIS_MODULE,
> + .read = ice_vfio_pci_save_read,
> + .release = ice_vfio_pci_release_file,
> + .llseek = no_llseek,
> +};
> +
> +/**
> + * ice_vfio_pci_stop_copy - create migration file and save migration state to it
> + * @ice_vdev: pointer to ice vfio pci core device structure
> + *
> + * Return migration file handler
> + */
> +static struct ice_vfio_pci_migration_file *
> +ice_vfio_pci_stop_copy(struct ice_vfio_pci_core_device *ice_vdev)
> +{
> + struct ice_vfio_pci_migration_file *migf;
> + int ret;
> +
> + migf = kzalloc(sizeof(*migf), GFP_KERNEL);
> + if (!migf)
> + return ERR_PTR(-ENOMEM);
> +
> + migf->filp = anon_inode_getfile("ice_vfio_pci_mig",
> + &ice_vfio_pci_save_fops, migf,
> + O_RDONLY);
> + if (IS_ERR(migf->filp)) {
> + int err = PTR_ERR(migf->filp);
> +
> + kfree(migf);
> + return ERR_PTR(err);
> + }
> +
> + stream_open(migf->filp->f_inode, migf->filp);
> + mutex_init(&migf->lock);
> +
> + ret = ice_vfio_pci_save_state(ice_vdev, migf);
> + if (ret) {
> + fput(migf->filp);
> + kfree(migf);
> + return ERR_PTR(ret);
> + }
> +
> + return migf;
> +}
> +
> +/**
> + * ice_vfio_pci_resume_write- copy migration file data from user space
> + * @filp: pointer to migration file
> + * @buf: pointer to user space buffer
> + * @len: data length to be copied
> + * @pos: should be 0
> + *
> + * Return len of saved data, negative for error
> + */
> +static ssize_t
> +ice_vfio_pci_resume_write(struct file *filp, const char __user *buf,
> + size_t len, loff_t *pos)
> +{
> + struct ice_vfio_pci_migration_file *migf = filp->private_data;
> + loff_t *off = &filp->f_pos;
> + loff_t requested_length;
> + ssize_t done = 0;
> + int ret;
> +
> + if (pos)
> + return -ESPIPE;
> +
> + if (*off < 0 ||
> + check_add_overflow((loff_t)len, *off, &requested_length))
> + return -EINVAL;
> +
> + if (requested_length > sizeof(migf->mig_data))
> + return -ENOMEM;
> +
> + mutex_lock(&migf->lock);
> + if (migf->disabled) {
> + done = -ENODEV;
> + goto out_unlock;
> + }
> +
> + ret = copy_from_user(migf->mig_data + *off, buf, len);
> + if (ret) {
> + done = -EFAULT;
> + goto out_unlock;
> + }
> + *off += len;
> + done = len;
> + migf->total_length += len;
> +out_unlock:
> + mutex_unlock(&migf->lock);
> + return done;
> +}
> +
> +static const struct file_operations ice_vfio_pci_resume_fops = {
> + .owner = THIS_MODULE,
> + .write = ice_vfio_pci_resume_write,
> + .release = ice_vfio_pci_release_file,
> + .llseek = no_llseek,
> +};
> +
> +/**
> + * ice_vfio_pci_resume - create resuming migration file
> + * @ice_vdev: pointer to ice vfio pci core device structure
> + *
> + * Return migration file handler, negative value for failure
> + */
> +static struct ice_vfio_pci_migration_file *
> +ice_vfio_pci_resume(struct ice_vfio_pci_core_device *ice_vdev)
> +{
> + struct ice_vfio_pci_migration_file *migf;
> +
> + migf = kzalloc(sizeof(*migf), GFP_KERNEL);
> + if (!migf)
> + return ERR_PTR(-ENOMEM);
> +
> + migf->filp = anon_inode_getfile("ice_vfio_pci_mig",
> + &ice_vfio_pci_resume_fops, migf,
> + O_WRONLY);
> + if (IS_ERR(migf->filp)) {
> + int err = PTR_ERR(migf->filp);
> +
> + kfree(migf);
> + return ERR_PTR(err);
> + }
> +
> + stream_open(migf->filp->f_inode, migf->filp);
> + mutex_init(&migf->lock);
> + return migf;
> +}
> +
> +/**
> + * ice_vfio_pci_step_device_state_locked - process device state change
> + * @ice_vdev: pointer to ice vfio pci core device structure
> + * @new: new device state
> + * @final: final device state
> + *
> + * Return migration file handler or NULL for success, negative value for failure
> + */
> +static struct file *
> +ice_vfio_pci_step_device_state_locked(struct ice_vfio_pci_core_device *ice_vdev,
> + u32 new, u32 final)
> +{
> + u32 cur = ice_vdev->mig_state;
> + int ret;
> +
> + if (cur == VFIO_DEVICE_STATE_RUNNING &&
> + new == VFIO_DEVICE_STATE_RUNNING_P2P) {
> + ice_migration_suspend_dev(ice_vdev->pf, ice_vdev->vf_id);
> + return NULL;
> + }
> +
> + if (cur == VFIO_DEVICE_STATE_RUNNING_P2P &&
> + new == VFIO_DEVICE_STATE_STOP)
> + return NULL;
This looks suspicious, are we actually able to freeze the internal
device state? It should happen here.
* RUNNING_P2P -> STOP
* STOP_COPY -> STOP
* While in STOP the device must stop the operation of the device. The device
* must not generate interrupts, DMA, or any other change to external state.
* It must not change its internal state. When stopped the device and kernel
* migration driver must accept and respond to interaction to support external
* subsystems in the STOP state, for example PCI MSI-X and PCI config space.
* Failure by the user to restrict device access while in STOP must not result
* in error conditions outside the user context (ex. host system faults).
*
* The STOP_COPY arc will terminate a data transfer session.
> +
> + if (cur == VFIO_DEVICE_STATE_STOP &&
> + new == VFIO_DEVICE_STATE_STOP_COPY) {
> + struct ice_vfio_pci_migration_file *migf;
> +
> + migf = ice_vfio_pci_stop_copy(ice_vdev);
> + if (IS_ERR(migf))
> + return ERR_CAST(migf);
> + get_file(migf->filp);
> + ice_vdev->saving_migf = migf;
> + return migf->filp;
> + }
> +
> + if (cur == VFIO_DEVICE_STATE_STOP_COPY &&
> + new == VFIO_DEVICE_STATE_STOP) {
> + ice_vfio_pci_disable_fds(ice_vdev);
> + return NULL;
> + }
> +
> + if (cur == VFIO_DEVICE_STATE_STOP &&
> + new == VFIO_DEVICE_STATE_RESUMING) {
> + struct ice_vfio_pci_migration_file *migf;
> +
> + migf = ice_vfio_pci_resume(ice_vdev);
> + if (IS_ERR(migf))
> + return ERR_CAST(migf);
> + get_file(migf->filp);
> + ice_vdev->resuming_migf = migf;
> + return migf->filp;
> + }
> +
> + if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP)
> + return NULL;
* RESUMING -> STOP
* Leaving RESUMING terminates a data transfer session and indicates the
* device should complete processing of the data delivered by write(). The
* kernel migration driver should complete the incorporation of data written
* to the data transfer FD into the device internal state and perform
* final validity and consistency checking of the new device state. If the
* user provided data is found to be incomplete, inconsistent, or otherwise
* invalid, the migration driver must fail the SET_STATE ioctl and
* optionally go to the ERROR state as described below.
> +
> + if (cur == VFIO_DEVICE_STATE_STOP &&
> + new == VFIO_DEVICE_STATE_RUNNING_P2P) {
> + ret = ice_vfio_pci_load_state(ice_vdev);
> + if (ret)
> + return ERR_PTR(ret);
> + ice_vfio_pci_disable_fds(ice_vdev);
STOP is not a state that should have active migration fds, RESUMING ->
STOP above is, which is also where we'd expect to see the state loaded.
This again makes it suspicious whether the device actually supports
stopping and resuming internal state changes.
> + return NULL;
> + }
> +
> + if (cur == VFIO_DEVICE_STATE_RUNNING_P2P &&
> + new == VFIO_DEVICE_STATE_RUNNING)
> + return NULL;
> +
> + /*
> + * vfio_mig_get_next_state() does not use arcs other than the above
> + */
> + WARN_ON(true);
> + return ERR_PTR(-EINVAL);
> +}
> +
> +/**
> + * ice_vfio_pci_set_device_state - Config device state
> + * @vdev: pointer to vfio pci device
> + * @new_state: device state
> + *
> + * Return 0 for success, negative value for failure.
Inaccurate description of return value.
> + */
> +static struct file *
> +ice_vfio_pci_set_device_state(struct vfio_device *vdev,
> + enum vfio_device_mig_state new_state)
> +{
> + struct ice_vfio_pci_core_device *ice_vdev =
> + container_of(vdev,
> + struct ice_vfio_pci_core_device,
> + core_device.vdev);
> + enum vfio_device_mig_state next_state;
> + struct file *res = NULL;
> + int ret;
> +
> + mutex_lock(&ice_vdev->state_mutex);
> + while (new_state != ice_vdev->mig_state) {
> + ret = vfio_mig_get_next_state(vdev, ice_vdev->mig_state,
> + new_state, &next_state);
> + if (ret) {
> + res = ERR_PTR(ret);
> + break;
> + }
> + res = ice_vfio_pci_step_device_state_locked(ice_vdev,
> + next_state,
> + new_state);
> + if (IS_ERR(res))
> + break;
> + ice_vdev->mig_state = next_state;
> + if (WARN_ON(res && new_state != ice_vdev->mig_state)) {
> + fput(res);
> + res = ERR_PTR(-EINVAL);
> + break;
> + }
> + }
> + ice_vfio_pci_state_mutex_unlock(ice_vdev);
> + return res;
> +}
> +
> +/**
> + * ice_vfio_pci_get_device_state - get device state
> + * @vdev: pointer to vfio pci device
> + * @curr_state: device state
> + *
> + * Return 0 for success
> + */
> +static int ice_vfio_pci_get_device_state(struct vfio_device *vdev,
> + enum vfio_device_mig_state *curr_state)
> +{
> + struct ice_vfio_pci_core_device *ice_vdev =
> + container_of(vdev,
> + struct ice_vfio_pci_core_device,
> + core_device.vdev);
Blank line after variable declaration.
> + mutex_lock(&ice_vdev->state_mutex);
> + *curr_state = ice_vdev->mig_state;
> + ice_vfio_pci_state_mutex_unlock(ice_vdev);
> + return 0;
> +}
> +
> +/**
> + * ice_vfio_pci_get_data_size - get migration data size
> + * @vdev: pointer to vfio pci device
> + * @stop_copy_length: migration data size
> + *
> + * Return 0 for success
> + */
> +static int
> +ice_vfio_pci_get_data_size(struct vfio_device *vdev,
> + unsigned long *stop_copy_length)
> +{
> + *stop_copy_length = SZ_128K;
> + return 0;
> +}
> +
> +static const struct vfio_migration_ops ice_vfio_pci_migrn_state_ops = {
> + .migration_set_state = ice_vfio_pci_set_device_state,
> + .migration_get_state = ice_vfio_pci_get_device_state,
> + .migration_get_data_size = ice_vfio_pci_get_data_size,
> +};
> +
> +/**
> + * ice_vfio_pci_core_init_dev - initialize vfio device
> + * @core_vdev: pointer to vfio device
> + *
> + * Return 0 for success
> + */
> +static int ice_vfio_pci_core_init_dev(struct vfio_device *core_vdev)
> +{
> + struct ice_vfio_pci_core_device *ice_vdev = container_of(core_vdev,
> + struct ice_vfio_pci_core_device, core_device.vdev);
> +
> + mutex_init(&ice_vdev->state_mutex);
> + spin_lock_init(&ice_vdev->reset_lock);
> +
> + core_vdev->migration_flags =
> + VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P;
> + core_vdev->mig_ops = &ice_vfio_pci_migrn_state_ops;
> +
> + return vfio_pci_core_init_dev(core_vdev);
> +}
> +
> +static const struct vfio_device_ops ice_vfio_pci_ops = {
> + .name = "ice-vfio-pci",
> + .init = ice_vfio_pci_core_init_dev,
> + .release = vfio_pci_core_release_dev,
Looks like the release callback should at least cleanup the locks for
lockdep rather than use the core function directly.
> + .open_device = ice_vfio_pci_open_device,
> + .close_device = ice_vfio_pci_close_device,
> + .device_feature = vfio_pci_core_ioctl_feature,
> + .read = vfio_pci_core_read,
> + .write = vfio_pci_core_write,
> + .ioctl = vfio_pci_core_ioctl,
> + .mmap = vfio_pci_core_mmap,
> + .request = vfio_pci_core_request,
> + .match = vfio_pci_core_match,
> + .bind_iommufd = vfio_iommufd_physical_bind,
> + .unbind_iommufd = vfio_iommufd_physical_unbind,
> + .attach_ioas = vfio_iommufd_physical_attach_ioas,
> + .detach_ioas = vfio_iommufd_physical_detach_ioas,
> +};
> +
> +/**
> + * ice_vfio_pci_probe - Device initialization routine
> + * @pdev: PCI device information struct
> + * @id: entry in ice_vfio_pci_table
> + *
> + * Returns 0 on success, negative on failure
> + */
> +static int
> +ice_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> +{
> + struct ice_vfio_pci_core_device *ice_vdev;
> + int ret;
> +
> + ice_vdev = vfio_alloc_device(ice_vfio_pci_core_device, core_device.vdev,
> + &pdev->dev, &ice_vfio_pci_ops);
> + if (!ice_vdev)
Needs to test IS_ERR(ice_vdev). Thanks,
Alex
> + return -ENOMEM;
> +
> + dev_set_drvdata(&pdev->dev, &ice_vdev->core_device);
> +
> + ret = vfio_pci_core_register_device(&ice_vdev->core_device);
> + if (ret)
> + goto out_free;
> +
> + return 0;
> +
> +out_free:
> + vfio_put_device(&ice_vdev->core_device.vdev);
> + return ret;
> +}
> +
> +/**
> + * ice_vfio_pci_remove - Device removal routine
> + * @pdev: PCI device information struct
> + */
> +static void ice_vfio_pci_remove(struct pci_dev *pdev)
> +{
> + struct ice_vfio_pci_core_device *ice_vdev =
> + (struct ice_vfio_pci_core_device *)dev_get_drvdata(&pdev->dev);
> +
> + vfio_pci_core_unregister_device(&ice_vdev->core_device);
> + vfio_put_device(&ice_vdev->core_device.vdev);
> +}
> +
> +/* ice_pci_tbl - PCI Device ID Table
> + *
> + * Wildcard entries (PCI_ANY_ID) should come last
> + * Last entry must be all 0s
> + *
> + * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
> + * Class, Class Mask, private data (not used) }
> + */
> +static const struct pci_device_id ice_vfio_pci_table[] = {
> + { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_INTEL, 0x1889) },
> + {}
> +};
> +MODULE_DEVICE_TABLE(pci, ice_vfio_pci_table);
> +
> +static const struct pci_error_handlers ice_vfio_pci_core_err_handlers = {
> + .reset_done = ice_vfio_pci_reset_done,
> + .error_detected = vfio_pci_core_aer_err_detected,
> +};
> +
> +static struct pci_driver ice_vfio_pci_driver = {
> + .name = "ice-vfio-pci",
> + .id_table = ice_vfio_pci_table,
> + .probe = ice_vfio_pci_probe,
> + .remove = ice_vfio_pci_remove,
> + .err_handler = &ice_vfio_pci_core_err_handlers,
> + .driver_managed_dma = true,
> +};
> +
> +module_pci_driver(ice_vfio_pci_driver);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Intel Corporation, <linux.nics@...el.com>");
> +MODULE_DESCRIPTION(DRIVER_DESC);
Powered by blists - more mailing lists