netdev - Re: [RFC PATCH net-next 18/19] pds_vdpa: add support for vdpa and vdpamgmt interfaces

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CACGkMEvhr1MYrf2DyeVbtd5uOC+K6+-0P4DPYndkwJiXxkjsWw@mail.gmail.com>
Date:   Mon, 5 Dec 2022 15:40:43 +0800
From:   Jason Wang <jasowang@...hat.com>
To:     Shannon Nelson <shnelson@....com>
Cc:     Shannon Nelson <snelson@...sando.io>, netdev@...r.kernel.org,
        davem@...emloft.net, kuba@...nel.org, mst@...hat.com,
        virtualization@...ts.linux-foundation.org, drivers@...sando.io
Subject: Re: [RFC PATCH net-next 18/19] pds_vdpa: add support for vdpa and
 vdpamgmt interfaces

On Wed, Nov 30, 2022 at 8:11 AM Shannon Nelson <shnelson@....com> wrote:
>
> On 11/21/22 10:32 PM, Jason Wang wrote:
> > On Sat, Nov 19, 2022 at 6:57 AM Shannon Nelson <snelson@...sando.io> wrote:
> >>
> >> This is the vDPA device support, where we advertise that we can
> >> support the virtio queues and deal with the configuration work
> >> through the pds_core's adminq.
> >>
> >> Signed-off-by: Shannon Nelson <snelson@...sando.io>
> >> ---
> >>   drivers/vdpa/pds/Makefile   |   3 +-
> >>   drivers/vdpa/pds/aux_drv.c  |  33 ++
> >>   drivers/vdpa/pds/debugfs.c  | 167 ++++++++
> >>   drivers/vdpa/pds/debugfs.h  |   4 +
> >>   drivers/vdpa/pds/vdpa_dev.c | 796 ++++++++++++++++++++++++++++++++++++
> >>   5 files changed, 1002 insertions(+), 1 deletion(-)
> >>   create mode 100644 drivers/vdpa/pds/vdpa_dev.c
> >>
> >> diff --git a/drivers/vdpa/pds/Makefile b/drivers/vdpa/pds/Makefile
> >> index fafd356ddf86..7fde4a4a1620 100644
> >> --- a/drivers/vdpa/pds/Makefile
> >> +++ b/drivers/vdpa/pds/Makefile
> >> @@ -7,4 +7,5 @@ pds_vdpa-y := aux_drv.o \
> >>                cmds.o \
> >>                pci_drv.o \
> >>                debugfs.o \
> >> -             virtio_pci.o
> >> +             virtio_pci.o \
> >> +             vdpa_dev.o
> >> diff --git a/drivers/vdpa/pds/aux_drv.c b/drivers/vdpa/pds/aux_drv.c
> >> index aef3c984dc90..83b9a5a79325 100644
> >> --- a/drivers/vdpa/pds/aux_drv.c
> >> +++ b/drivers/vdpa/pds/aux_drv.c
> >> @@ -12,6 +12,7 @@
> >>   #include <linux/pds/pds_vdpa.h>
> >>
> >>   #include "aux_drv.h"
> >> +#include "vdpa_dev.h"
> >>   #include "pci_drv.h"
> >>   #include "debugfs.h"
> >>
> >> @@ -25,10 +26,25 @@ static void
> >>   pds_vdpa_aux_notify_handler(struct pds_auxiliary_dev *padev,
> >>                              union pds_core_notifyq_comp *event)
> >>   {
> >> +       struct pds_vdpa_device *pdsv = padev->priv;
> >>          struct device *dev = &padev->aux_dev.dev;
> >>          u16 ecode = le16_to_cpu(event->ecode);
> >>
> >>          dev_info(dev, "%s: event code %d\n", __func__, ecode);
> >> +
> >> +       /* Give the upper layers a hint that something interesting
> >> +        * may have happened.  It seems that the only thing this
> >> +        * triggers in the virtio-net drivers above us is a check
> >> +        * of link status.
> >> +        *
> >> +        * We don't set the NEEDS_RESET flag for EVENT_RESET
> >> +        * because we're likely going through a recovery or
> >> +        * fw_update and will be back up and running soon.
> >> +        */
> >> +       if (ecode == PDS_EVENT_RESET || ecode == PDS_EVENT_LINK_CHANGE) {
> >> +               if (pdsv->hw.config_cb.callback)
> >> +                       pdsv->hw.config_cb.callback(pdsv->hw.config_cb.private);
> >> +       }
> >>   }
> >>
> >>   static int
> >> @@ -80,10 +96,25 @@ pds_vdpa_aux_probe(struct auxiliary_device *aux_dev,
> >>                  goto err_register_client;
> >>          }
> >>
> >> +       /* Get device ident info and set up the vdpa_mgmt_dev */
> >> +       err = pds_vdpa_get_mgmt_info(vdpa_aux);
> >> +       if (err)
> >> +               goto err_register_client;
> >> +
> >> +       /* Let vdpa know that we can provide devices */
> >> +       err = vdpa_mgmtdev_register(&vdpa_aux->vdpa_mdev);
> >> +       if (err) {
> >> +               dev_err(dev, "%s: Failed to initialize vdpa_mgmt interface: %pe\n",
> >> +                       __func__, ERR_PTR(err));
> >> +               goto err_mgmt_reg;
> >> +       }
> >> +
> >>          pds_vdpa_debugfs_add_ident(vdpa_aux);
> >>
> >>          return 0;
> >>
> >> +err_mgmt_reg:
> >> +       padev->ops->unregister_client(padev);
> >>   err_register_client:
> >>          auxiliary_set_drvdata(aux_dev, NULL);
> >>   err_invalid_driver:
> >> @@ -98,6 +129,8 @@ pds_vdpa_aux_remove(struct auxiliary_device *aux_dev)
> >>          struct pds_vdpa_aux *vdpa_aux = auxiliary_get_drvdata(aux_dev);
> >>          struct device *dev = &aux_dev->dev;
> >>
> >> +       vdpa_mgmtdev_unregister(&vdpa_aux->vdpa_mdev);
> >> +
> >>          vdpa_aux->padev->ops->unregister_client(vdpa_aux->padev);
> >>          if (vdpa_aux->vdpa_vf)
> >>                  pci_dev_put(vdpa_aux->vdpa_vf->pdev);
> >> diff --git a/drivers/vdpa/pds/debugfs.c b/drivers/vdpa/pds/debugfs.c
> >> index f766412209df..aa3143126a7e 100644
> >> --- a/drivers/vdpa/pds/debugfs.c
> >> +++ b/drivers/vdpa/pds/debugfs.c
> >> @@ -11,6 +11,7 @@
> >>   #include <linux/pds/pds_auxbus.h>
> >>   #include <linux/pds/pds_vdpa.h>
> >>
> >> +#include "vdpa_dev.h"
> >>   #include "aux_drv.h"
> >>   #include "pci_drv.h"
> >>   #include "debugfs.h"
> >> @@ -19,6 +20,72 @@
> >>
> >>   static struct dentry *dbfs_dir;
> >>
> >> +#define PRINT_SBIT_NAME(__seq, __f, __name)                     \
> >> +       do {                                                    \
> >> +               if (__f & __name)                               \
> >> +                       seq_printf(__seq, " %s", &#__name[16]); \
> >> +       } while (0)
> >> +
> >> +static void
> >> +print_status_bits(struct seq_file *seq, u16 status)
> >> +{
> >> +       seq_puts(seq, "status:");
> >> +       PRINT_SBIT_NAME(seq, status, VIRTIO_CONFIG_S_ACKNOWLEDGE);
> >> +       PRINT_SBIT_NAME(seq, status, VIRTIO_CONFIG_S_DRIVER);
> >> +       PRINT_SBIT_NAME(seq, status, VIRTIO_CONFIG_S_DRIVER_OK);
> >> +       PRINT_SBIT_NAME(seq, status, VIRTIO_CONFIG_S_FEATURES_OK);
> >> +       PRINT_SBIT_NAME(seq, status, VIRTIO_CONFIG_S_NEEDS_RESET);
> >> +       PRINT_SBIT_NAME(seq, status, VIRTIO_CONFIG_S_FAILED);
> >> +       seq_puts(seq, "\n");
> >> +}
> >> +
> >> +#define PRINT_FBIT_NAME(__seq, __f, __name)                \
> >> +       do {                                               \
> >> +               if (__f & BIT_ULL(__name))                 \
> >> +                       seq_printf(__seq, " %s", #__name); \
> >> +       } while (0)
> >> +
> >> +static void
> >> +print_feature_bits(struct seq_file *seq, u64 features)
> >> +{
> >> +       seq_puts(seq, "features:");
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_CSUM);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_GUEST_CSUM);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_MTU);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_MAC);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_GUEST_TSO4);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_GUEST_TSO6);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_GUEST_ECN);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_GUEST_UFO);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_HOST_TSO4);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_HOST_TSO6);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_HOST_ECN);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_HOST_UFO);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_MRG_RXBUF);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_STATUS);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_CTRL_VQ);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_CTRL_RX);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_CTRL_VLAN);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_CTRL_RX_EXTRA);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_GUEST_ANNOUNCE);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_MQ);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_CTRL_MAC_ADDR);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_HASH_REPORT);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_RSS);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_RSC_EXT);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_STANDBY);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_NET_F_SPEED_DUPLEX);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_F_NOTIFY_ON_EMPTY);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_F_ANY_LAYOUT);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_F_VERSION_1);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_F_ACCESS_PLATFORM);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_F_RING_PACKED);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_F_ORDER_PLATFORM);
> >> +       PRINT_FBIT_NAME(seq, features, VIRTIO_F_SR_IOV);
> >> +       seq_puts(seq, "\n");
> >> +}
> >> +
> >>   void
> >>   pds_vdpa_debugfs_create(void)
> >>   {
> >> @@ -49,10 +116,18 @@ static int
> >>   identity_show(struct seq_file *seq, void *v)
> >>   {
> >>          struct pds_vdpa_aux *vdpa_aux = seq->private;
> >> +       struct vdpa_mgmt_dev *mgmt;
> >>
> >>          seq_printf(seq, "aux_dev:            %s\n",
> >>                     dev_name(&vdpa_aux->padev->aux_dev.dev));
> >>
> >> +       mgmt = &vdpa_aux->vdpa_mdev;
> >> +       seq_printf(seq, "max_vqs:            %d\n", mgmt->max_supported_vqs);
> >> +       seq_printf(seq, "config_attr_mask:   %#llx\n", mgmt->config_attr_mask);
> >> +       seq_printf(seq, "supported_features: %#llx\n", mgmt->supported_features);
> >> +       print_feature_bits(seq, mgmt->supported_features);
> >> +       seq_printf(seq, "local_mac_bit:      %d\n", vdpa_aux->local_mac_bit);
> >> +
> >>          return 0;
> >>   }
> >>   DEFINE_SHOW_ATTRIBUTE(identity);
> >> @@ -64,4 +139,96 @@ pds_vdpa_debugfs_add_ident(struct pds_vdpa_aux *vdpa_aux)
> >>                              vdpa_aux, &identity_fops);
> >>   }
> >>
> >> +static int
> >> +config_show(struct seq_file *seq, void *v)
> >> +{
> >> +       struct pds_vdpa_device *pdsv = seq->private;
> >> +       struct virtio_net_config *vc = &pdsv->vn_config;
> >> +
> >> +       seq_printf(seq, "mac:                  %pM\n", vc->mac);
> >> +       seq_printf(seq, "max_virtqueue_pairs:  %d\n",
> >> +                  __virtio16_to_cpu(true, vc->max_virtqueue_pairs));
> >> +       seq_printf(seq, "mtu:                  %d\n", __virtio16_to_cpu(true, vc->mtu));
> >> +       seq_printf(seq, "speed:                %d\n", le32_to_cpu(vc->speed));
> >> +       seq_printf(seq, "duplex:               %d\n", vc->duplex);
> >> +       seq_printf(seq, "rss_max_key_size:     %d\n", vc->rss_max_key_size);
> >> +       seq_printf(seq, "rss_max_indirection_table_length: %d\n",
> >> +                  le16_to_cpu(vc->rss_max_indirection_table_length));
> >> +       seq_printf(seq, "supported_hash_types: %#x\n",
> >> +                  le32_to_cpu(vc->supported_hash_types));
> >> +       seq_printf(seq, "vn_status:            %#x\n",
> >> +                  __virtio16_to_cpu(true, vc->status));
> >> +       print_status_bits(seq, __virtio16_to_cpu(true, vc->status));
> >> +
> >> +       seq_printf(seq, "hw_status:            %#x\n", pdsv->hw.status);
> >> +       print_status_bits(seq, pdsv->hw.status);
> >> +       seq_printf(seq, "req_features:         %#llx\n", pdsv->hw.req_features);
> >> +       print_feature_bits(seq, pdsv->hw.req_features);
> >> +       seq_printf(seq, "actual_features:      %#llx\n", pdsv->hw.actual_features);
> >> +       print_feature_bits(seq, pdsv->hw.actual_features);
> >> +       seq_printf(seq, "vdpa_index:           %d\n", pdsv->hw.vdpa_index);
> >> +       seq_printf(seq, "num_vqs:              %d\n", pdsv->hw.num_vqs);
> >> +
> >> +       return 0;
> >> +}
> >> +DEFINE_SHOW_ATTRIBUTE(config);
> >> +
> >> +static int
> >> +vq_show(struct seq_file *seq, void *v)
> >> +{
> >> +       struct pds_vdpa_vq_info *vq = seq->private;
> >> +       struct pds_vdpa_intr_info *intrs;
> >> +
> >> +       seq_printf(seq, "ready:      %d\n", vq->ready);
> >> +       seq_printf(seq, "desc_addr:  %#llx\n", vq->desc_addr);
> >> +       seq_printf(seq, "avail_addr: %#llx\n", vq->avail_addr);
> >> +       seq_printf(seq, "used_addr:  %#llx\n", vq->used_addr);
> >> +       seq_printf(seq, "q_len:      %d\n", vq->q_len);
> >> +       seq_printf(seq, "qid:        %d\n", vq->qid);
> >> +
> >> +       seq_printf(seq, "doorbell:   %#llx\n", vq->doorbell);
> >> +       seq_printf(seq, "avail_idx:  %d\n", vq->avail_idx);
> >> +       seq_printf(seq, "used_idx:   %d\n", vq->used_idx);
> >> +       seq_printf(seq, "intr_index: %d\n", vq->intr_index);
> >> +
> >> +       intrs = vq->pdsv->vdpa_aux->vdpa_vf->intrs;
> >> +       seq_printf(seq, "irq:        %d\n", intrs[vq->intr_index].irq);
> >> +       seq_printf(seq, "irq-name:   %s\n", intrs[vq->intr_index].name);
> >> +
> >> +       seq_printf(seq, "hw_qtype:   %d\n", vq->hw_qtype);
> >> +       seq_printf(seq, "hw_qindex:  %d\n", vq->hw_qindex);
> >> +
> >> +       return 0;
> >> +}
> >> +DEFINE_SHOW_ATTRIBUTE(vq);
> >> +
> >> +void
> >> +pds_vdpa_debugfs_add_vdpadev(struct pds_vdpa_device *pdsv)
> >> +{
> >> +       struct dentry *dentry;
> >> +       const char *name;
> >> +       int i;
> >> +
> >> +       dentry = pdsv->vdpa_aux->vdpa_vf->dentry;
> >> +       name = dev_name(&pdsv->vdpa_dev.dev);
> >> +
> >> +       pdsv->dentry = debugfs_create_dir(name, dentry);
> >> +
> >> +       debugfs_create_file("config", 0400, pdsv->dentry, pdsv, &config_fops);
> >> +
> >> +       for (i = 0; i < pdsv->hw.num_vqs; i++) {
> >> +               char name[8];
> >> +
> >> +               snprintf(name, sizeof(name), "vq%02d", i);
> >> +               debugfs_create_file(name, 0400, pdsv->dentry, &pdsv->hw.vqs[i], &vq_fops);
> >> +       }
> >> +}
> >> +
> >> +void
> >> +pds_vdpa_debugfs_del_vdpadev(struct pds_vdpa_device *pdsv)
> >> +{
> >> +       debugfs_remove_recursive(pdsv->dentry);
> >> +       pdsv->dentry = NULL;
> >> +}
> >> +
> >>   #endif /* CONFIG_DEBUG_FS */
> >> diff --git a/drivers/vdpa/pds/debugfs.h b/drivers/vdpa/pds/debugfs.h
> >> index 939a4c248aac..f0567e4ee4e4 100644
> >> --- a/drivers/vdpa/pds/debugfs.h
> >> +++ b/drivers/vdpa/pds/debugfs.h
> >> @@ -13,12 +13,16 @@ void pds_vdpa_debugfs_destroy(void);
> >>   void pds_vdpa_debugfs_add_pcidev(struct pds_vdpa_pci_device *vdpa_pdev);
> >>   void pds_vdpa_debugfs_del_pcidev(struct pds_vdpa_pci_device *vdpa_pdev);
> >>   void pds_vdpa_debugfs_add_ident(struct pds_vdpa_aux *vdpa_aux);
> >> +void pds_vdpa_debugfs_add_vdpadev(struct pds_vdpa_device *pdsv);
> >> +void pds_vdpa_debugfs_del_vdpadev(struct pds_vdpa_device *pdsv);
> >>   #else
> >>   static inline void pds_vdpa_debugfs_create(void) { }
> >>   static inline void pds_vdpa_debugfs_destroy(void) { }
> >>   static inline void pds_vdpa_debugfs_add_pcidev(struct pds_vdpa_pci_device *vdpa_pdev) { }
> >>   static inline void pds_vdpa_debugfs_del_pcidev(struct pds_vdpa_pci_device *vdpa_pdev) { }
> >>   static inline void pds_vdpa_debugfs_add_ident(struct pds_vdpa_aux *vdpa_aux) { }
> >> +static inline void pds_vdpa_debugfs_add_vdpadev(struct pds_vdpa_device *pdsv) { }
> >> +static inline void pds_vdpa_debugfs_del_vdpadev(struct pds_vdpa_device *pdsv) { }
> >>   #endif
> >>
> >>   #endif /* _PDS_VDPA_DEBUGFS_H_ */
> >> diff --git a/drivers/vdpa/pds/vdpa_dev.c b/drivers/vdpa/pds/vdpa_dev.c
> >> new file mode 100644
> >> index 000000000000..824be42aff0d
> >> --- /dev/null
> >> +++ b/drivers/vdpa/pds/vdpa_dev.c
> >> @@ -0,0 +1,796 @@
> >> +// SPDX-License-Identifier: GPL-2.0-only
> >> +/* Copyright(c) 2022 Pensando Systems, Inc */
> >> +
> >> +#include <linux/interrupt.h>
> >> +#include <linux/module.h>
> >> +#include <linux/pci.h>
> >> +#include <linux/sysfs.h>
> >> +#include <linux/types.h>
> >> +#include <linux/vdpa.h>
> >> +#include <uapi/linux/virtio_pci.h>
> >> +#include <uapi/linux/vdpa.h>
> >> +
> >> +#include <linux/pds/pds_intr.h>
> >> +#include <linux/pds/pds_core_if.h>
> >> +#include <linux/pds/pds_adminq.h>
> >> +#include <linux/pds/pds_auxbus.h>
> >> +#include <linux/pds/pds_vdpa.h>
> >> +
> >> +#include "vdpa_dev.h"
> >> +#include "pci_drv.h"
> >> +#include "aux_drv.h"
> >> +#include "pci_drv.h"
> >> +#include "cmds.h"
> >> +#include "debugfs.h"
> >> +
> >> +static int
> >> +pds_vdpa_setup_driver(struct pds_vdpa_device *pdsv)
> >> +{
> >> +       struct device *dev = &pdsv->vdpa_dev.dev;
> >> +       int err = 0;
> >> +       int i;
> >> +
> >> +       /* Verify all vqs[] are in ready state */
> >> +       for (i = 0; i < pdsv->hw.num_vqs; i++) {
> >> +               if (!pdsv->hw.vqs[i].ready) {
> >> +                       dev_warn(dev, "%s: qid %d not ready\n", __func__, i);
> >> +                       err = -ENOENT;
> >> +               }
> >> +       }
> >> +
> >> +       return err;
> >> +}
> >> +
> >> +static struct pds_vdpa_device *
> >> +vdpa_to_pdsv(struct vdpa_device *vdpa_dev)
> >> +{
> >> +       return container_of(vdpa_dev, struct pds_vdpa_device, vdpa_dev);
> >> +}
> >> +
> >> +static struct pds_vdpa_hw *
> >> +vdpa_to_hw(struct vdpa_device *vdpa_dev)
> >> +{
> >> +       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
> >> +
> >> +       return &pdsv->hw;
> >> +}
> >> +
> >> +static int
> >> +pds_vdpa_set_vq_address(struct vdpa_device *vdpa_dev, u16 qid,
> >> +                       u64 desc_addr, u64 driver_addr, u64 device_addr)
> >> +{
> >> +       struct pds_vdpa_hw *hw = vdpa_to_hw(vdpa_dev);
> >> +
> >> +       hw->vqs[qid].desc_addr = desc_addr;
> >> +       hw->vqs[qid].avail_addr = driver_addr;
> >> +       hw->vqs[qid].used_addr = device_addr;
> >> +
> >> +       return 0;
> >> +}
> >> +
> >> +static void
> >> +pds_vdpa_set_vq_num(struct vdpa_device *vdpa_dev, u16 qid, u32 num)
> >> +{
> >> +       struct pds_vdpa_hw *hw = vdpa_to_hw(vdpa_dev);
> >> +
> >> +       hw->vqs[qid].q_len = num;
> >> +}
> >> +
> >> +static void
> >> +pds_vdpa_kick_vq(struct vdpa_device *vdpa_dev, u16 qid)
> >> +{
> >> +       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
> >> +
> >> +       iowrite16(qid, pdsv->hw.vqs[qid].notify);
> >> +}
> >> +
> >> +static void
> >> +pds_vdpa_set_vq_cb(struct vdpa_device *vdpa_dev, u16 qid,
> >> +                  struct vdpa_callback *cb)
> >> +{
> >> +       struct pds_vdpa_hw *hw = vdpa_to_hw(vdpa_dev);
> >> +
> >> +       hw->vqs[qid].event_cb = *cb;
> >> +}
> >> +
> >> +static irqreturn_t
> >> +pds_vdpa_isr(int irq, void *data)
> >> +{
> >> +       struct pds_core_intr __iomem *intr_ctrl;
> >> +       struct pds_vdpa_device *pdsv;
> >> +       struct pds_vdpa_vq_info *vq;
> >> +
> >> +       vq = data;
> >> +       pdsv = vq->pdsv;
> >> +
> >> +       if (vq->event_cb.callback)
> >> +               vq->event_cb.callback(vq->event_cb.private);
> >> +
> >> +       /* Since we don't actually know how many vq descriptors are
> >> +        * covered in this interrupt cycle, we simply clean all the
> >> +        * credits and re-enable the interrupt.
> >> +        */
> >> +       intr_ctrl = (struct pds_core_intr __iomem *)pdsv->vdpa_aux->vdpa_vf->vd_mdev.isr;
> >> +       pds_core_intr_clean_flags(&intr_ctrl[vq->intr_index],
> >> +                                 PDS_CORE_INTR_CRED_REARM);
> >> +
> >> +       return IRQ_HANDLED;
> >> +}
> >> +
> >> +static void
> >> +pds_vdpa_release_irq(struct pds_vdpa_device *pdsv, int qid)
> >> +{
> >> +       struct pds_vdpa_intr_info *intrs = pdsv->vdpa_aux->vdpa_vf->intrs;
> >> +       struct pci_dev *pdev = pdsv->vdpa_aux->vdpa_vf->pdev;
> >> +       struct pds_core_intr __iomem *intr_ctrl;
> >> +       int index;
> >> +
> >> +       intr_ctrl = (struct pds_core_intr __iomem *)pdsv->vdpa_aux->vdpa_vf->vd_mdev.isr;
> >> +       index = pdsv->hw.vqs[qid].intr_index;
> >> +       if (index == VIRTIO_MSI_NO_VECTOR)
> >> +               return;
> >> +
> >> +       if (intrs[index].irq == VIRTIO_MSI_NO_VECTOR)
> >> +               return;
> >> +
> >> +       if (qid & 0x1) {
> >> +               pdsv->hw.vqs[qid].intr_index = VIRTIO_MSI_NO_VECTOR;
> >> +       } else {
> >> +               pds_core_intr_mask(&intr_ctrl[index], PDS_CORE_INTR_MASK_SET);
> >> +               devm_free_irq(&pdev->dev, intrs[index].irq, &pdsv->hw.vqs[qid]);
> >> +               pdsv->hw.vqs[qid].intr_index = VIRTIO_MSI_NO_VECTOR;
> >> +               intrs[index].irq = VIRTIO_MSI_NO_VECTOR;
> >> +       }
> >> +}
> >> +
> >> +static void
> >> +pds_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool ready)
> >> +{
> >> +       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
> >> +       struct pci_dev *pdev = pdsv->vdpa_aux->vdpa_vf->pdev;
> >> +       struct pds_vdpa_hw *hw = vdpa_to_hw(vdpa_dev);
> >> +       struct device *dev = &pdsv->vdpa_dev.dev;
> >> +       struct pds_core_intr __iomem *intr_ctrl;
> >> +       int err;
> >> +
> >> +       dev_dbg(dev, "%s: qid %d ready %d => %d\n",
> >> +                __func__, qid, hw->vqs[qid].ready, ready);
> >> +       if (ready == hw->vqs[qid].ready)
> >> +               return;
> >> +
> >> +       intr_ctrl = (struct pds_core_intr __iomem *)pdsv->vdpa_aux->vdpa_vf->vd_mdev.isr;
> >
> > It looks to me pds has a different layout/semantic for isr than virtio
> > spec. I'd suggest to not reuse spec isr here to avoid confusion.
>
> Hmm, yes, that needs some straightening out.
>
> >
> >> +       if (ready) {
> >
> > Spec said no interrupt before DRIVER_OK, it looks more simple if we
> > move the interrupt setup to set_status():
> >
> > E.g we can know if we have sufficient vectors and use different
> > mapping policies in advance.
>
> I'll look at that.
>
> >
> >> +               struct pds_vdpa_intr_info *intrs = pdsv->vdpa_aux->vdpa_vf->intrs;
> >> +               int index = VIRTIO_MSI_NO_VECTOR;
> >> +               int i;
> >> +
> >> +               /*  Tx and Rx queues share interrupts, and they start with
> >> +                *  even numbers, so only find an interrupt for the even numbered
> >> +                *  qid, and let the odd number use what the previous queue got.
> >> +                */
> >> +               if (qid & 0x1) {
> >> +                       int even = qid & ~0x1;
> >> +
> >> +                       index = hw->vqs[even].intr_index;
> >> +               } else {
> >> +                       for (i = 0; i < pdsv->vdpa_aux->vdpa_vf->nintrs; i++) {
> >> +                               if (intrs[i].irq == VIRTIO_MSI_NO_VECTOR) {
> >> +                                       index = i;
> >> +                                       break;
> >> +                               }
> >> +                       }
> >> +               }
> >> +
> >> +               if (qid & 0x1) {
> >> +                       hw->vqs[qid].intr_index = index;
> >> +               } else if (index != VIRTIO_MSI_NO_VECTOR) {
> >> +                       int irq;
> >> +
> >> +                       irq = pci_irq_vector(pdev, index);
> >> +                       snprintf(intrs[index].name, sizeof(intrs[index].name),
> >> +                                "vdpa-%s-%d", dev_name(dev), index);
> >> +
> >> +                       err = devm_request_irq(&pdev->dev, irq, pds_vdpa_isr, 0,
> >> +                                              intrs[index].name, &hw->vqs[qid]);
> >> +                       if (err) {
> >> +                               dev_info(dev, "%s: no irq for qid %d: %pe\n",
> >> +                                        __func__, qid, ERR_PTR(err));
> >
> > Should we fail?
> >
> >> +                       } else {
> >> +                               intrs[index].irq = irq;
> >> +                               hw->vqs[qid].intr_index = index;
> >> +                               pds_core_intr_mask(&intr_ctrl[index],
> >> +                                                  PDS_CORE_INTR_MASK_CLEAR);
> >
> > I guess the reason that you don't simply use VF MSI-X is the DPU can
> > support vDPA subdevice in the future?
> >
> >> +                       }
> >> +               } else {
> >> +                       dev_info(dev, "%s: no intr slot for qid %d\n",
> >> +                                __func__, qid);
> >
> > Do we need to fail here?
> >
> >> +               }
> >> +
> >> +               /* Pass vq setup info to DSC */
> >> +               err = pds_vdpa_cmd_init_vq(pdsv, qid, &hw->vqs[qid]);
> >> +               if (err) {
> >> +                       pds_vdpa_release_irq(pdsv, qid);
> >> +                       ready = false;
> >> +               }
> >> +       } else {
> >> +               pds_vdpa_release_irq(pdsv, qid);
> >> +               (void) pds_vdpa_cmd_reset_vq(pdsv, qid);
> >> +       }
> >> +
> >> +       hw->vqs[qid].ready = ready;
> >> +}
> >> +
> >> +static bool
> >> +pds_vdpa_get_vq_ready(struct vdpa_device *vdpa_dev, u16 qid)
> >> +{
> >> +       struct pds_vdpa_hw *hw = vdpa_to_hw(vdpa_dev);
> >> +
> >> +       return hw->vqs[qid].ready;
> >> +}
> >> +
> >> +static int
> >> +pds_vdpa_set_vq_state(struct vdpa_device *vdpa_dev, u16 qid,
> >> +                     const struct vdpa_vq_state *state)
> >> +{
> >> +       struct pds_vdpa_hw *hw = vdpa_to_hw(vdpa_dev);
> >> +
> >> +       hw->vqs[qid].used_idx = state->split.avail_index;
> >> +       hw->vqs[qid].avail_idx = state->split.avail_index;
> >> +
> >> +       return 0;
> >> +}
> >> +
> >> +static int
> >> +pds_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid,
> >> +                     struct vdpa_vq_state *state)
> >> +{
> >> +       struct pds_vdpa_hw *hw = vdpa_to_hw(vdpa_dev);
> >> +
> >> +       state->split.avail_index = hw->vqs[qid].avail_idx;
> >
> > Who is in charge of reading avail_idx from the hardware?
>
> We didn't have that available in the early FW, so it isn't here yet.
> Work in progerss.
>
> >
> >> +
> >> +       return 0;
> >> +}
> >> +
> >> +static struct vdpa_notification_area
> >> +pds_vdpa_get_vq_notification(struct vdpa_device *vdpa_dev, u16 qid)
> >> +{
> >> +       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
> >> +       struct pds_vdpa_hw *hw = vdpa_to_hw(vdpa_dev);
> >> +       struct virtio_pci_modern_device *vd_mdev;
> >> +       struct vdpa_notification_area area;
> >> +
> >> +       area.addr = hw->vqs[qid].notify_pa;
> >> +
> >> +       vd_mdev = &pdsv->vdpa_aux->vdpa_vf->vd_mdev;
> >> +       if (!vd_mdev->notify_offset_multiplier)
> >> +               area.size = PAGE_SIZE;
> >> +       else
> >> +               area.size = vd_mdev->notify_offset_multiplier;
> >> +
> >> +       return area;
> >> +}
> >> +
> >> +static int
> >> +pds_vdpa_get_vq_irq(struct vdpa_device *vdpa_dev, u16 qid)
> >> +{
> >> +       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
> >> +       struct pds_vdpa_hw *hw = vdpa_to_hw(vdpa_dev);
> >> +       int irq = VIRTIO_MSI_NO_VECTOR;
> >> +       int index;
> >> +
> >> +       if (pdsv->vdpa_aux->vdpa_vf->intrs) {
> >> +               index = hw->vqs[qid].intr_index;
> >> +               irq = pdsv->vdpa_aux->vdpa_vf->intrs[index].irq;
> >
> > The notification area mapping might only work well when each vq has
> > it's own irq. Otherwise guest may see spurious interrupt which may
> > degrade the performance.
>
> We haven't been expecting to use shared interrupts - are we being overly
> optimistic?

So at least from the codes above, I think we may end up with e.g two
queues that are using the same irq? And the comment said:

               /*  Tx and Rx queues share interrupts, and they start with
                 *  even numbers, so only find an interrupt for the
even numbered
                 *  qid, and let the odd number use what the previous queue got.
                 */
                if (qid & 0x1) {
                        int even = qid & ~0x1;

                index = hw->vqs[even].intr_index;

It said TX and RX share interrupts.

>
>
> >
> >> +       }
> >> +
> >> +       return irq;
> >> +}
> >> +
> >> +static u32
> >> +pds_vdpa_get_vq_align(struct vdpa_device *vdpa_dev)
> >> +{
> >> +
> >> +       return PAGE_SIZE;
> >> +}
> >> +
> >> +static u32
> >> +pds_vdpa_get_vq_group(struct vdpa_device *vdpa_dev, u16 idx)
> >> +{
> >> +       return 0;
> >> +}
> >> +
> >> +static u64
> >> +pds_vdpa_get_device_features(struct vdpa_device *vdpa_dev)
> >> +{
> >> +       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
> >> +
> >> +       return le64_to_cpu(pdsv->vdpa_aux->ident.hw_features);
> >> +}
> >> +
> >> +static int
> >> +pds_vdpa_set_driver_features(struct vdpa_device *vdpa_dev, u64 features)
> >> +{
> >> +       struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
> >> +       struct pds_vdpa_hw *hw = vdpa_to_hw(vdpa_dev);
> >> +       struct device *dev = &pdsv->vdpa_dev.dev;
> >> +       u64 nego_features;
> >> +       u64 set_features;
> >> +       u64 missing;
> >> +       int err;
> >> +
> >> +       if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)) && features) {
> >> +               dev_err(dev, "VIRTIO_F_ACCESS_PLATFORM is not negotiated\n");
> >> +               return -EOPNOTSUPP;
> >
> > Should we fail the FEATURE_OK in this case and all the other below
> > error conditions?
>
> Perhaps I'm missing a nuance in the inteface... isn't that what we're
> doing by returning a non-zero status?

Kind of, but to be compliant with the spec, the subsequent get_feature
should return status without FEATURE_OK, I'm not sure this can be
guaranteed:

static u8
pds_vdpa_get_status(struct vdpa_device *vdpa_dev)
{
        struct pds_vdpa_hw *hw = vdpa_to_hw(vdpa_dev);

      return hw->status;
}

> >> +                       dev_warn(dev, "Known FW issue - overriding to use max_vq_pairs %d\n",
> >> +                                hw->num_vqs / 2);
> >
> > Should we fail here? Since the device has a different max_vqp that expected.
>
> Wasn't sure if we should annoy users with a fail here, or try to adjust
> and continue on with something that should work.

I think it's better to fail since it's the behaviour of other vDPA
devices and software virtio devices.

Thanks