[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <6c05c616-502e-fa07-b8e8-6f733b1c2e2e@amd.com>
Date: Wed, 15 Mar 2023 22:48:52 +0530
From: Gautam Dawar <gdawar@....com>
To: Jason Wang <jasowang@...hat.com>,
Gautam Dawar <gautam.dawar@....com>
Cc: linux-net-drivers@....com, Edward Cree <ecree.xilinx@...il.com>,
Martin Habets <habetsm.xilinx@...il.com>,
"David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>,
Richard Cochran <richardcochran@...il.com>,
linux-kernel@...r.kernel.org, netdev@...r.kernel.org,
eperezma@...hat.com, harpreet.anand@....com, tanuj.kamde@....com,
koushik.dutta@....com
Subject: Re: [PATCH net-next v2 09/14] sfc: implement device status related
vdpa config operations
On 3/15/23 10:30, Jason Wang wrote:
> Caution: This message originated from an External Source. Use proper
> caution when opening attachments, clicking links, or responding.
>
>
> 在 2023/3/13 20:10, Gautam Dawar 写道:
>>
>> On 3/10/23 10:35, Jason Wang wrote:
>>> Caution: This message originated from an External Source. Use proper
>>> caution when opening attachments, clicking links, or responding.
>>>
>>>
>>> On Tue, Mar 7, 2023 at 7:38 PM Gautam Dawar <gautam.dawar@....com>
>>> wrote:
>>>> vDPA config opertions to handle get/set device status and device
>>>> reset have been implemented. Also .suspend config operation is
>>>> implemented to support Live Migration.
>>>>
>>>> Signed-off-by: Gautam Dawar <gautam.dawar@....com>
>>>> ---
>>>> drivers/net/ethernet/sfc/ef100_vdpa.c | 16 +-
>>>> drivers/net/ethernet/sfc/ef100_vdpa.h | 2 +
>>>> drivers/net/ethernet/sfc/ef100_vdpa_ops.c | 367
>>>> ++++++++++++++++++++--
>>>> 3 files changed, 355 insertions(+), 30 deletions(-)
>>>>
>>>> diff --git a/drivers/net/ethernet/sfc/ef100_vdpa.c
>>>> b/drivers/net/ethernet/sfc/ef100_vdpa.c
>>>> index c66e5aef69ea..4ba57827a6cd 100644
>>>> --- a/drivers/net/ethernet/sfc/ef100_vdpa.c
>>>> +++ b/drivers/net/ethernet/sfc/ef100_vdpa.c
>>>> @@ -68,9 +68,14 @@ static int vdpa_allocate_vis(struct efx_nic *efx,
>>>> unsigned int *allocated_vis)
>>>>
>>>> static void ef100_vdpa_delete(struct efx_nic *efx)
>>>> {
>>>> + struct vdpa_device *vdpa_dev;
>>>> +
>>>> if (efx->vdpa_nic) {
>>>> + vdpa_dev = &efx->vdpa_nic->vdpa_dev;
>>>> + ef100_vdpa_reset(vdpa_dev);
>>>> +
>>>> /* replace with _vdpa_unregister_device later */
>>>> - put_device(&efx->vdpa_nic->vdpa_dev.dev);
>>>> + put_device(&vdpa_dev->dev);
>>>> }
>>>> efx_mcdi_free_vis(efx);
>>>> }
>>>> @@ -171,6 +176,15 @@ static struct ef100_vdpa_nic
>>>> *ef100_vdpa_create(struct efx_nic *efx,
>>>> }
>>>> }
>>>>
>>>> + rc = devm_add_action_or_reset(&efx->pci_dev->dev,
>>>> + ef100_vdpa_irq_vectors_free,
>>>> + efx->pci_dev);
>>>> + if (rc) {
>>>> + pci_err(efx->pci_dev,
>>>> + "Failed adding devres for freeing irq
>>>> vectors\n");
>>>> + goto err_put_device;
>>>> + }
>>>> +
>>>> rc = get_net_config(vdpa_nic);
>>>> if (rc)
>>>> goto err_put_device;
>>>> diff --git a/drivers/net/ethernet/sfc/ef100_vdpa.h
>>>> b/drivers/net/ethernet/sfc/ef100_vdpa.h
>>>> index 348ca8a7404b..58791402e454 100644
>>>> --- a/drivers/net/ethernet/sfc/ef100_vdpa.h
>>>> +++ b/drivers/net/ethernet/sfc/ef100_vdpa.h
>>>> @@ -149,6 +149,8 @@ int ef100_vdpa_register_mgmtdev(struct efx_nic
>>>> *efx);
>>>> void ef100_vdpa_unregister_mgmtdev(struct efx_nic *efx);
>>>> void ef100_vdpa_irq_vectors_free(void *data);
>>>> int ef100_vdpa_init_vring(struct ef100_vdpa_nic *vdpa_nic, u16 idx);
>>>> +void ef100_vdpa_irq_vectors_free(void *data);
>>>> +int ef100_vdpa_reset(struct vdpa_device *vdev);
>>>>
>>>> static inline bool efx_vdpa_is_little_endian(struct ef100_vdpa_nic
>>>> *vdpa_nic)
>>>> {
>>>> diff --git a/drivers/net/ethernet/sfc/ef100_vdpa_ops.c
>>>> b/drivers/net/ethernet/sfc/ef100_vdpa_ops.c
>>>> index 0051c4c0e47c..95a2177f85a2 100644
>>>> --- a/drivers/net/ethernet/sfc/ef100_vdpa_ops.c
>>>> +++ b/drivers/net/ethernet/sfc/ef100_vdpa_ops.c
>>>> @@ -22,11 +22,6 @@ static struct ef100_vdpa_nic *get_vdpa_nic(struct
>>>> vdpa_device *vdev)
>>>> return container_of(vdev, struct ef100_vdpa_nic, vdpa_dev);
>>>> }
>>>>
>>>> -void ef100_vdpa_irq_vectors_free(void *data)
>>>> -{
>>>> - pci_free_irq_vectors(data);
>>>> -}
>>>> -
>>>> static int create_vring_ctx(struct ef100_vdpa_nic *vdpa_nic, u16
>>>> idx)
>>>> {
>>>> struct efx_vring_ctx *vring_ctx;
>>>> @@ -52,14 +47,6 @@ static void delete_vring_ctx(struct
>>>> ef100_vdpa_nic *vdpa_nic, u16 idx)
>>>> vdpa_nic->vring[idx].vring_ctx = NULL;
>>>> }
>>>>
>>>> -static void reset_vring(struct ef100_vdpa_nic *vdpa_nic, u16 idx)
>>>> -{
>>>> - vdpa_nic->vring[idx].vring_type = EF100_VDPA_VQ_NTYPES;
>>>> - vdpa_nic->vring[idx].vring_state = 0;
>>>> - vdpa_nic->vring[idx].last_avail_idx = 0;
>>>> - vdpa_nic->vring[idx].last_used_idx = 0;
>>>> -}
>>>> -
>>>> int ef100_vdpa_init_vring(struct ef100_vdpa_nic *vdpa_nic, u16 idx)
>>>> {
>>>> u32 offset;
>>>> @@ -103,6 +90,236 @@ static bool is_qid_invalid(struct
>>>> ef100_vdpa_nic *vdpa_nic, u16 idx,
>>>> return false;
>>>> }
>>>>
>>>> +static void irq_vring_fini(struct ef100_vdpa_nic *vdpa_nic, u16 idx)
>>>> +{
>>>> + struct ef100_vdpa_vring_info *vring = &vdpa_nic->vring[idx];
>>>> + struct pci_dev *pci_dev = vdpa_nic->efx->pci_dev;
>>>> +
>>>> + devm_free_irq(&pci_dev->dev, vring->irq, vring);
>>>> + vring->irq = -EINVAL;
>>>> +}
>>>> +
>>>> +static irqreturn_t vring_intr_handler(int irq, void *arg)
>>>> +{
>>>> + struct ef100_vdpa_vring_info *vring = arg;
>>>> +
>>>> + if (vring->cb.callback)
>>>> + return vring->cb.callback(vring->cb.private);
>>>> +
>>>> + return IRQ_NONE;
>>>> +}
>>>> +
>>>> +static int ef100_vdpa_irq_vectors_alloc(struct pci_dev *pci_dev,
>>>> u16 nvqs)
>>>> +{
>>>> + int rc;
>>>> +
>>>> + rc = pci_alloc_irq_vectors(pci_dev, nvqs, nvqs, PCI_IRQ_MSIX);
>>>> + if (rc < 0)
>>>> + pci_err(pci_dev,
>>>> + "Failed to alloc %d IRQ vectors, err:%d\n",
>>>> nvqs, rc);
>>>> + return rc;
>>>> +}
>>>> +
>>>> +void ef100_vdpa_irq_vectors_free(void *data)
>>>> +{
>>>> + pci_free_irq_vectors(data);
>>>> +}
>>>> +
>>>> +static int irq_vring_init(struct ef100_vdpa_nic *vdpa_nic, u16 idx)
>>>> +{
>>>> + struct ef100_vdpa_vring_info *vring = &vdpa_nic->vring[idx];
>>>> + struct pci_dev *pci_dev = vdpa_nic->efx->pci_dev;
>>>> + int irq;
>>>> + int rc;
>>>> +
>>>> + snprintf(vring->msix_name, 256, "x_vdpa[%s]-%d\n",
>>>> + pci_name(pci_dev), idx);
>>>> + irq = pci_irq_vector(pci_dev, idx);
>>>> + rc = devm_request_irq(&pci_dev->dev, irq,
>>>> vring_intr_handler, 0,
>>>> + vring->msix_name, vring);
>>>> + if (rc)
>>>> + pci_err(pci_dev,
>>>> + "devm_request_irq failed for vring %d, rc
>>>> %d\n",
>>>> + idx, rc);
>>>> + else
>>>> + vring->irq = irq;
>>>> +
>>>> + return rc;
>>>> +}
>>>> +
>>>> +static int delete_vring(struct ef100_vdpa_nic *vdpa_nic, u16 idx)
>>>> +{
>>>> + struct efx_vring_dyn_cfg vring_dyn_cfg;
>>>> + int rc;
>>>> +
>>>> + if (!(vdpa_nic->vring[idx].vring_state & EF100_VRING_CREATED))
>>>> + return 0;
>>>> +
>>>> + rc = efx_vdpa_vring_destroy(vdpa_nic->vring[idx].vring_ctx,
>>>> + &vring_dyn_cfg);
>>>> + if (rc)
>>>> + dev_err(&vdpa_nic->vdpa_dev.dev,
>>>> + "%s: delete vring failed index:%u, err:%d\n",
>>>> + __func__, idx, rc);
>>>> + vdpa_nic->vring[idx].last_avail_idx = vring_dyn_cfg.avail_idx;
>>>> + vdpa_nic->vring[idx].last_used_idx = vring_dyn_cfg.used_idx;
>>>> + vdpa_nic->vring[idx].vring_state &= ~EF100_VRING_CREATED;
>>>> +
>>>> + irq_vring_fini(vdpa_nic, idx);
>>>> +
>>>> + return rc;
>>>> +}
>>>> +
>>>> +static void ef100_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
>>>> +{
>>>> + struct ef100_vdpa_nic *vdpa_nic = get_vdpa_nic(vdev);
>>>> + u32 idx_val;
>>>> +
>>>> + if (is_qid_invalid(vdpa_nic, idx, __func__))
>>>> + return;
>>>> +
>>>> + if (!(vdpa_nic->vring[idx].vring_state & EF100_VRING_CREATED))
>>>> + return;
>>>> +
>>>> + idx_val = idx;
>>>> + _efx_writed(vdpa_nic->efx, cpu_to_le32(idx_val),
>>>> + vdpa_nic->vring[idx].doorbell_offset);
>>>> +}
>>>> +
>>>> +static bool can_create_vring(struct ef100_vdpa_nic *vdpa_nic, u16
>>>> idx)
>>>> +{
>>>> + if (vdpa_nic->vring[idx].vring_state ==
>>>> EF100_VRING_CONFIGURED &&
>>>> + vdpa_nic->status & VIRTIO_CONFIG_S_DRIVER_OK &&
>>>> + !(vdpa_nic->vring[idx].vring_state & EF100_VRING_CREATED))
>>>> + return true;
>>>> +
>>>> + return false;
>>>> +}
>>>> +
>>>> +static int create_vring(struct ef100_vdpa_nic *vdpa_nic, u16 idx)
>>>> +{
>>>> + struct efx_vring_dyn_cfg vring_dyn_cfg;
>>>> + struct efx_vring_cfg vring_cfg;
>>>> + int rc;
>>>> +
>>>> + rc = irq_vring_init(vdpa_nic, idx);
>>>> + if (rc) {
>>>> + dev_err(&vdpa_nic->vdpa_dev.dev,
>>>> + "%s: irq_vring_init failed. index:%u,
>>>> err:%d\n",
>>>> + __func__, idx, rc);
>>>> + return rc;
>>>> + }
>>>> + vring_cfg.desc = vdpa_nic->vring[idx].desc;
>>>> + vring_cfg.avail = vdpa_nic->vring[idx].avail;
>>>> + vring_cfg.used = vdpa_nic->vring[idx].used;
>>>> + vring_cfg.size = vdpa_nic->vring[idx].size;
>>>> + vring_cfg.features = vdpa_nic->features;
>>>> + vring_cfg.msix_vector = idx;
>>>> + vring_dyn_cfg.avail_idx = vdpa_nic->vring[idx].last_avail_idx;
>>>> + vring_dyn_cfg.used_idx = vdpa_nic->vring[idx].last_used_idx;
>>>> +
>>>> + rc = efx_vdpa_vring_create(vdpa_nic->vring[idx].vring_ctx,
>>>> + &vring_cfg, &vring_dyn_cfg);
>>>> + if (rc) {
>>>> + dev_err(&vdpa_nic->vdpa_dev.dev,
>>>> + "%s: vring_create failed index:%u, err:%d\n",
>>>> + __func__, idx, rc);
>>>> + goto err_vring_create;
>>>> + }
>>>> + vdpa_nic->vring[idx].vring_state |= EF100_VRING_CREATED;
>>>> +
>>>> + /* A VQ kick allows the device to read the avail_idx, which
>>>> will be
>>>> + * required at the destination after live migration.
>>>> + */
>>>> + ef100_vdpa_kick_vq(&vdpa_nic->vdpa_dev, idx);
>>>> +
>>>> + return 0;
>>>> +
>>>> +err_vring_create:
>>>> + irq_vring_fini(vdpa_nic, idx);
>>>> + return rc;
>>>> +}
>>>> +
>>>> +static void reset_vring(struct ef100_vdpa_nic *vdpa_nic, u16 idx)
>>>> +{
>>>> + delete_vring(vdpa_nic, idx);
>>>> + vdpa_nic->vring[idx].vring_type = EF100_VDPA_VQ_NTYPES;
>>>> + vdpa_nic->vring[idx].vring_state = 0;
>>>> + vdpa_nic->vring[idx].last_avail_idx = 0;
>>>> + vdpa_nic->vring[idx].last_used_idx = 0;
>>>> +}
>>>> +
>>>> +static void ef100_reset_vdpa_device(struct ef100_vdpa_nic *vdpa_nic)
>>>> +{
>>>> + int i;
>>>> +
>>>> + WARN_ON(!mutex_is_locked(&vdpa_nic->lock));
>>>> +
>>>> + if (!vdpa_nic->status)
>>>> + return;
>>>> +
>>>> + vdpa_nic->vdpa_state = EF100_VDPA_STATE_INITIALIZED;
>>>> + vdpa_nic->status = 0;
>>>> + vdpa_nic->features = 0;
>>>> + for (i = 0; i < (vdpa_nic->max_queue_pairs * 2); i++)
>>>> + reset_vring(vdpa_nic, i);
>>>> + ef100_vdpa_irq_vectors_free(vdpa_nic->efx->pci_dev);
>>>> +}
>>>> +
>>>> +/* May be called under the rtnl lock */
>>>> +int ef100_vdpa_reset(struct vdpa_device *vdev)
>>>> +{
>>>> + struct ef100_vdpa_nic *vdpa_nic = get_vdpa_nic(vdev);
>>>> +
>>>> + /* vdpa device can be deleted anytime but the bar_config
>>>> + * could still be vdpa and hence efx->state would be
>>>> STATE_VDPA.
>>>> + * Accordingly, ensure vdpa device exists before reset
>>>> handling
>>>> + */
>>>> + if (!vdpa_nic)
>>>> + return -ENODEV;
>>>> +
>>>> + mutex_lock(&vdpa_nic->lock);
>>>> + ef100_reset_vdpa_device(vdpa_nic);
>>>> + mutex_unlock(&vdpa_nic->lock);
>>>> + return 0;
>>>> +}
>>>> +
>>>> +static int start_vdpa_device(struct ef100_vdpa_nic *vdpa_nic)
>>>> +{
>>>> + struct efx_nic *efx = vdpa_nic->efx;
>>>> + struct ef100_nic_data *nic_data;
>>>> + int i, j;
>>>> + int rc;
>>>> +
>>>> + nic_data = efx->nic_data;
>>>> + rc = ef100_vdpa_irq_vectors_alloc(efx->pci_dev,
>>>> + vdpa_nic->max_queue_pairs * 2);
>>>> + if (rc < 0) {
>>>> + pci_err(efx->pci_dev,
>>>> + "vDPA IRQ alloc failed for vf: %u err:%d\n",
>>>> + nic_data->vf_index, rc);
>>>> + return rc;
>>>> + }
>>>> +
>>>> + for (i = 0; i < (vdpa_nic->max_queue_pairs * 2); i++) {
>>>> + if (can_create_vring(vdpa_nic, i)) {
>>>> + rc = create_vring(vdpa_nic, i);
>>>> + if (rc)
>>>> + goto clear_vring;
>>>> + }
>>>> + }
>>>> +
>>>> + vdpa_nic->vdpa_state = EF100_VDPA_STATE_STARTED;
>>> It looks to me that this duplicates with the DRIVER_OK status bit.
>> vdpa_state is set EF100_VDPA_STATE_STARTED during DRIVER_OK handling.
>> See my later response for its purpose.
>>>
>>>> + return 0;
>>>> +
>>>> +clear_vring:
>>>> + for (j = 0; j < i; j++)
>>>> + delete_vring(vdpa_nic, j);
>>>> +
>>>> + ef100_vdpa_irq_vectors_free(efx->pci_dev);
>>>> + return rc;
>>>> +}
>>>> +
>>>> static int ef100_vdpa_set_vq_address(struct vdpa_device *vdev,
>>>> u16 idx, u64 desc_area, u64
>>>> driver_area,
>>>> u64 device_area)
>>>> @@ -144,22 +361,6 @@ static void ef100_vdpa_set_vq_num(struct
>>>> vdpa_device *vdev, u16 idx, u32 num)
>>>> mutex_unlock(&vdpa_nic->lock);
>>>> }
>>>>
>>>> -static void ef100_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
>>>> -{
>>>> - struct ef100_vdpa_nic *vdpa_nic = get_vdpa_nic(vdev);
>>>> - u32 idx_val;
>>>> -
>>>> - if (is_qid_invalid(vdpa_nic, idx, __func__))
>>>> - return;
>>>> -
>>>> - if (!(vdpa_nic->vring[idx].vring_state & EF100_VRING_CREATED))
>>>> - return;
>>>> -
>>>> - idx_val = idx;
>>>> - _efx_writed(vdpa_nic->efx, cpu_to_le32(idx_val),
>>>> - vdpa_nic->vring[idx].doorbell_offset);
>>>> -}
>>>> -
>>>> static void ef100_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx,
>>>> struct vdpa_callback *cb)
>>>> {
>>>> @@ -176,6 +377,7 @@ static void ef100_vdpa_set_vq_ready(struct
>>>> vdpa_device *vdev, u16 idx,
>>>> bool ready)
>>>> {
>>>> struct ef100_vdpa_nic *vdpa_nic = get_vdpa_nic(vdev);
>>>> + int rc;
>>>>
>>>> if (is_qid_invalid(vdpa_nic, idx, __func__))
>>>> return;
>>>> @@ -184,9 +386,21 @@ static void ef100_vdpa_set_vq_ready(struct
>>>> vdpa_device *vdev, u16 idx,
>>>> if (ready) {
>>>> vdpa_nic->vring[idx].vring_state |=
>>>> EF100_VRING_READY_CONFIGURED;
>>>> + if (vdpa_nic->vdpa_state ==
>>>> EF100_VDPA_STATE_STARTED &&
>>>> + can_create_vring(vdpa_nic, idx)) {
>>>> + rc = create_vring(vdpa_nic, idx);
>>>> + if (rc)
>>>> + /* Rollback ready configuration
>>>> + * So that the above layer driver
>>>> + * can make another attempt to set
>>>> ready
>>>> + */
>>>> + vdpa_nic->vring[idx].vring_state &=
>>>> + ~EF100_VRING_READY_CONFIGURED;
>>>> + }
>>>> } else {
>>>> vdpa_nic->vring[idx].vring_state &=
>>>> ~EF100_VRING_READY_CONFIGURED;
>>>> + delete_vring(vdpa_nic, idx);
>>>> }
>>>> mutex_unlock(&vdpa_nic->lock);
>>>> }
>>>> @@ -296,6 +510,12 @@ static u64
>>>> ef100_vdpa_get_device_features(struct vdpa_device *vdev)
>>>> }
>>>>
>>>> features |= BIT_ULL(VIRTIO_NET_F_MAC);
>>>> + /* As QEMU SVQ doesn't implement the following features,
>>>> + * masking them off to allow Live Migration
>>>> + */
>>>> + features &= ~BIT_ULL(VIRTIO_F_IN_ORDER);
>>>> + features &= ~BIT_ULL(VIRTIO_F_ORDER_PLATFORM);
>>> It's better not to work around userspace bugs in the kernel. We should
>>> fix Qemu instead.
>>
>> There's already a QEMU patch [1] submitted to support
>> VIRTIO_F_ORDER_PLATFORM but it hasn't concluded yet. Also, there is no
>> support for VIRTIO_F_IN_ORDER in the kernel virtio driver. The motive
>> of this change is to have VM Live Migration working with the kernel
>> in-tree driver without requiring any changes.
>>
>> Once QEMU is able to handle these features, we can submit a patch to
>> undo these changes.
>
>
> I can understand the motivation, but it works for prototyping but not
> formal kernel code (especially consider SVQ is not mature and still
> being development). What's more, we can not assume Qemu is the only
> user, we have other users like DPDK and cloud-hypervisors.
Ok, if the expectation is to have the user deal with the issues and make
required changes on the in-tree driver to make it work, I'll remove this
part.
>
> Thanks
>
>
>>
>>>
>>>> +
>>>> return features;
>>>> }
>>>>
>>>> @@ -356,6 +576,77 @@ static u32 ef100_vdpa_get_vendor_id(struct
>>>> vdpa_device *vdev)
>>>> return EF100_VDPA_VENDOR_ID;
>>>> }
>>>>
>>>> +static u8 ef100_vdpa_get_status(struct vdpa_device *vdev)
>>>> +{
>>>> + struct ef100_vdpa_nic *vdpa_nic = get_vdpa_nic(vdev);
>>>> + u8 status;
>>>> +
>>>> + mutex_lock(&vdpa_nic->lock);
>>>> + status = vdpa_nic->status;
>>>> + mutex_unlock(&vdpa_nic->lock);
>>>> + return status;
>>>> +}
>>>> +
>>>> +static void ef100_vdpa_set_status(struct vdpa_device *vdev, u8
>>>> status)
>>>> +{
>>>> + struct ef100_vdpa_nic *vdpa_nic = get_vdpa_nic(vdev);
>>>> + u8 new_status;
>>>> + int rc;
>>>> +
>>>> + mutex_lock(&vdpa_nic->lock);
>>>> + if (!status) {
>>>> + dev_info(&vdev->dev,
>>>> + "%s: Status received is 0. Device reset
>>>> being done\n",
>>>> + __func__);
>>> This is trigger-able by the userspace. It might be better to use
>>> dev_dbg() instead.
>> Will change.
>>>
>>>> + ef100_reset_vdpa_device(vdpa_nic);
>>>> + goto unlock_return;
>>>> + }
>>>> + new_status = status & ~vdpa_nic->status;
>>>> + if (new_status == 0) {
>>>> + dev_info(&vdev->dev,
>>>> + "%s: New status same as current status\n",
>>>> __func__);
>>> Same here.
>> Ok.
>>>
>>>> + goto unlock_return;
>>>> + }
>>>> + if (new_status & VIRTIO_CONFIG_S_FAILED) {
>>>> + ef100_reset_vdpa_device(vdpa_nic);
>>>> + goto unlock_return;
>>>> + }
>>>> +
>>>> + if (new_status & VIRTIO_CONFIG_S_ACKNOWLEDGE) {
>>>> + vdpa_nic->status |= VIRTIO_CONFIG_S_ACKNOWLEDGE;
>>>> + new_status &= ~VIRTIO_CONFIG_S_ACKNOWLEDGE;
>>>> + }
>>>> + if (new_status & VIRTIO_CONFIG_S_DRIVER) {
>>>> + vdpa_nic->status |= VIRTIO_CONFIG_S_DRIVER;
>>>> + new_status &= ~VIRTIO_CONFIG_S_DRIVER;
>>>> + }
>>>> + if (new_status & VIRTIO_CONFIG_S_FEATURES_OK) {
>>>> + vdpa_nic->status |= VIRTIO_CONFIG_S_FEATURES_OK;
>>>> + vdpa_nic->vdpa_state = EF100_VDPA_STATE_NEGOTIATED;
>>> It might be better to explain the reason we need to track another
>>> state in vdpa_state instead of simply using the device status.
>> vdpa_state helps to ensure correct status transitions in the
>> .set_status callback and safe-guards against incorrect/malicious
>> user-space driver.
>
>
> Ok, let's document this in the definition of vdpa_state.
Sure, will do.
>
>
>>>
>>>> + new_status &= ~VIRTIO_CONFIG_S_FEATURES_OK;
>>>> + }
>>>> + if (new_status & VIRTIO_CONFIG_S_DRIVER_OK &&
>>>> + vdpa_nic->vdpa_state == EF100_VDPA_STATE_NEGOTIATED) {
>>>> + vdpa_nic->status |= VIRTIO_CONFIG_S_DRIVER_OK;
>>>> + rc = start_vdpa_device(vdpa_nic);
>>>> + if (rc) {
>>>> + dev_err(&vdpa_nic->vdpa_dev.dev,
>>>> + "%s: vDPA device failed:%d\n",
>>>> __func__, rc);
>>>> + vdpa_nic->status &=
>>>> ~VIRTIO_CONFIG_S_DRIVER_OK;
>>>> + goto unlock_return;
>>>> + }
>>>> + new_status &= ~VIRTIO_CONFIG_S_DRIVER_OK;
>>>> + }
>>>> + if (new_status) {
>>>> + dev_warn(&vdev->dev,
>>>> + "%s: Mismatch Status: %x & State: %u\n",
>>>> + __func__, new_status, vdpa_nic->vdpa_state);
>>>> + }
>>>> +
>>>> +unlock_return:
>>>> + mutex_unlock(&vdpa_nic->lock);
>>>> +}
>>>> +
>>>> static size_t ef100_vdpa_get_config_size(struct vdpa_device *vdev)
>>>> {
>>>> return sizeof(struct virtio_net_config);
>>>> @@ -393,6 +684,20 @@ static void ef100_vdpa_set_config(struct
>>>> vdpa_device *vdev, unsigned int offset,
>>>> vdpa_nic->mac_configured = true;
>>>> }
>>>>
>>>> +static int ef100_vdpa_suspend(struct vdpa_device *vdev)
>>>> +{
>>>> + struct ef100_vdpa_nic *vdpa_nic = get_vdpa_nic(vdev);
>>>> + int i, rc;
>>>> +
>>>> + mutex_lock(&vdpa_nic->lock);
>>>> + for (i = 0; i < (vdpa_nic->max_queue_pairs * 2); i++) {
>>>> + rc = delete_vring(vdpa_nic, i);
>>> Note that the suspension matters for the whole device. It means the
>>> config space should not be changed. But the code here only suspends
>>> the vring, is this intende/d?
>> Are you referring to the possibility of updating device configuration
>> (eg. MAC address) using .set_config() after suspend operation? Is
>> there any other user triggered operation that falls in this category?
>
>
> Updating MAC should be prohibited, one typical use case is the link
> status.
I think this can be dealt with an additional SUSPEND state which can be
used to avoid any changes to the config space.
>
>
>>>
>>> Reset may have the same issue.
>> Could you pls elaborate on the requirement during device reset?
>
>
> I meant ef100_reset_vdpa_device() may suffer from the same issue:
>
> It only reset all the vring but not the config space?
Ok, I think resetting config space would basically be clearing the
memory for vdpa_nic->net_config which is the initial state after vdpa
device allocation.
Gautam
>
> Thanks
>
>
>>>
>>> Thanks
>> [1]
>> https://patchew.org/QEMU/20230213191929.1547497-1-eperezma@redhat.com/
>>>
>>>> + if (rc)
>>>> + break;
>>>> + }
>>>> + mutex_unlock(&vdpa_nic->lock);
>>>> + return rc;
>>>> +}
>>>> static void ef100_vdpa_free(struct vdpa_device *vdev)
>>>> {
>>>> struct ef100_vdpa_nic *vdpa_nic = get_vdpa_nic(vdev);
>>>> @@ -428,9 +733,13 @@ const struct vdpa_config_ops
>>>> ef100_vdpa_config_ops = {
>>>> .get_vq_num_max = ef100_vdpa_get_vq_num_max,
>>>> .get_device_id = ef100_vdpa_get_device_id,
>>>> .get_vendor_id = ef100_vdpa_get_vendor_id,
>>>> + .get_status = ef100_vdpa_get_status,
>>>> + .set_status = ef100_vdpa_set_status,
>>>> + .reset = ef100_vdpa_reset,
>>>> .get_config_size = ef100_vdpa_get_config_size,
>>>> .get_config = ef100_vdpa_get_config,
>>>> .set_config = ef100_vdpa_set_config,
>>>> .get_generation = NULL,
>>>> + .suspend = ef100_vdpa_suspend,
>>>> .free = ef100_vdpa_free,
>>>> };
>>>> --
>>>> 2.30.1
>>>>
>>
>
Powered by blists - more mailing lists