[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230410160538.35c1a5a6.alex.williamson@redhat.com>
Date: Mon, 10 Apr 2023 16:05:37 -0600
From: Alex Williamson <alex.williamson@...hat.com>
To: Brett Creeley <brett.creeley@....com>
Cc: <kvm@...r.kernel.org>, <netdev@...r.kernel.org>, <jgg@...dia.com>,
<yishaih@...dia.com>, <shameerali.kolothum.thodi@...wei.com>,
<kevin.tian@...el.com>, <shannon.nelson@....com>,
<drivers@...sando.io>, <simon.horman@...igine.com>
Subject: Re: [PATCH v8 vfio 4/7] vfio/pds: Add VFIO live migration support
On Tue, 4 Apr 2023 12:01:38 -0700
Brett Creeley <brett.creeley@....com> wrote:
> diff --git a/drivers/vfio/pci/pds/lm.c b/drivers/vfio/pci/pds/lm.c
> new file mode 100644
> index 000000000000..855f5da9b99a
> --- /dev/null
> +++ b/drivers/vfio/pci/pds/lm.c
> @@ -0,0 +1,479 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright(c) 2023 Advanced Micro Devices, Inc. */
> +
> +#include <linux/anon_inodes.h>
> +#include <linux/file.h>
> +#include <linux/fs.h>
> +#include <linux/highmem.h>
> +#include <linux/vfio.h>
> +#include <linux/vfio_pci_core.h>
> +
> +#include "vfio_dev.h"
> +#include "cmds.h"
> +
> +#define PDS_VFIO_LM_FILENAME "pds_vfio_lm"
> +
> +const char *
> +pds_vfio_lm_state(enum vfio_device_mig_state state)
> +{
> + switch (state) {
> + case VFIO_DEVICE_STATE_ERROR:
> + return "VFIO_DEVICE_STATE_ERROR";
> + case VFIO_DEVICE_STATE_STOP:
> + return "VFIO_DEVICE_STATE_STOP";
> + case VFIO_DEVICE_STATE_RUNNING:
> + return "VFIO_DEVICE_STATE_RUNNING";
> + case VFIO_DEVICE_STATE_STOP_COPY:
> + return "VFIO_DEVICE_STATE_STOP_COPY";
> + case VFIO_DEVICE_STATE_RESUMING:
> + return "VFIO_DEVICE_STATE_RESUMING";
> + case VFIO_DEVICE_STATE_RUNNING_P2P:
> + return "VFIO_DEVICE_STATE_RUNNING_P2P";
> + default:
> + return "VFIO_DEVICE_STATE_INVALID";
> + }
> +
> + return "VFIO_DEVICE_STATE_INVALID";
Seems a tad redundant.
> +}
> +
[snip]
> +struct file *
> +pds_vfio_step_device_state_locked(struct pds_vfio_pci_device *pds_vfio,
> + enum vfio_device_mig_state next)
> +{
> + enum vfio_device_mig_state cur = pds_vfio->state;
> + struct device *dev = &pds_vfio->pdev->dev;
> + int err = 0;
> +
> + dev_dbg(dev, "%s => %s\n",
> + pds_vfio_lm_state(cur), pds_vfio_lm_state(next));
> +
> + if (cur == VFIO_DEVICE_STATE_STOP && next == VFIO_DEVICE_STATE_STOP_COPY) {
> + /* Device is already stopped
> + * create save device data file & get device state from firmware
> + */
Standard multi-line comment style please, we're not under net/ here.
> + err = pds_vfio_get_save_file(pds_vfio);
> + if (err)
> + return ERR_PTR(err);
> +
> + /* Get device state */
> + err = pds_vfio_get_lm_state_cmd(pds_vfio);
> + if (err) {
> + pds_vfio_put_save_file(pds_vfio);
> + return ERR_PTR(err);
> + }
> +
> + return pds_vfio->save_file->filep;
> + }
> +
> + if (cur == VFIO_DEVICE_STATE_STOP_COPY && next == VFIO_DEVICE_STATE_STOP) {
> + /* Device is already stopped
> + * delete the save device state file
> + */
> + pds_vfio_put_save_file(pds_vfio);
> + pds_vfio_send_host_vf_lm_status_cmd(pds_vfio,
> + PDS_LM_STA_NONE);
> + return NULL;
> + }
> +
> + if (cur == VFIO_DEVICE_STATE_STOP && next == VFIO_DEVICE_STATE_RESUMING) {
> + /* create resume device data file */
> + err = pds_vfio_get_restore_file(pds_vfio);
> + if (err)
> + return ERR_PTR(err);
> +
> + return pds_vfio->restore_file->filep;
> + }
> +
> + if (cur == VFIO_DEVICE_STATE_RESUMING && next == VFIO_DEVICE_STATE_STOP) {
> + /* Set device state */
> + err = pds_vfio_set_lm_state_cmd(pds_vfio);
> + if (err)
> + return ERR_PTR(err);
> +
> + /* delete resume device data file */
> + pds_vfio_put_restore_file(pds_vfio);
> + return NULL;
> + }
> +
> + if (cur == VFIO_DEVICE_STATE_RUNNING && next == VFIO_DEVICE_STATE_RUNNING_P2P) {
> + pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, PDS_LM_STA_IN_PROGRESS);
> + /* Device should be stopped
> + * no interrupts, dma or change in internal state
> + */
> + err = pds_vfio_suspend_device_cmd(pds_vfio);
> + if (err)
> + return ERR_PTR(err);
> +
> + return NULL;
> + }
The comment here, as well as the no-op transitions from STOP->P2P and
P2P->STOP has me concerned whether a device in this suspend state
really meets the definition of our P2P state. The RUNNING_P2P state is
a quiescent state where the device must accept access, including
peer-to-peer DMA, but it cannot initiate DMA or interrupts. Is that
consistent with this suspend state? Thanks,
Alex
> +
> + if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && next == VFIO_DEVICE_STATE_RUNNING) {
> + /* Device should be functional
> + * interrupts, dma, mmio or changes to internal state is allowed
> + */
> + err = pds_vfio_resume_device_cmd(pds_vfio);
> + if (err)
> + return ERR_PTR(err);
> +
> + pds_vfio_send_host_vf_lm_status_cmd(pds_vfio,
> + PDS_LM_STA_NONE);
> + return NULL;
> + }
> +
> + if (cur == VFIO_DEVICE_STATE_STOP && next == VFIO_DEVICE_STATE_RUNNING_P2P)
> + return NULL;
> +
> + if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && next == VFIO_DEVICE_STATE_STOP)
> + return NULL;
> +
> + return ERR_PTR(-EINVAL);
> +}
Powered by blists - more mailing lists