[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20210116082331.GL944463@unreal>
Date: Sat, 16 Jan 2021 10:23:31 +0200
From: Leon Romanovsky <leon@...nel.org>
To: Alex Williamson <alex.williamson@...hat.com>
Cc: Bjorn Helgaas <bhelgaas@...gle.com>,
Saeed Mahameed <saeedm@...dia.com>,
Jason Gunthorpe <jgg@...dia.com>,
Jakub Kicinski <kuba@...nel.org>, linux-pci@...r.kernel.org,
linux-rdma@...r.kernel.org, netdev@...r.kernel.org,
Don Dutile <ddutile@...hat.com>,
Alexander Duyck <alexander.duyck@...il.com>
Subject: Re: [PATCH mlx5-next v2 1/5] PCI: Add sysfs callback to allow MSI-X
table size change of SR-IOV VFs
On Thu, Jan 14, 2021 at 05:05:43PM -0700, Alex Williamson wrote:
> On Thu, 14 Jan 2021 12:31:36 +0200
> Leon Romanovsky <leon@...nel.org> wrote:
>
> > From: Leon Romanovsky <leonro@...dia.com>
> >
> > Extend PCI sysfs interface with a new callback that allows configure
> > the number of MSI-X vectors for specific SR-IO VF. This is needed
> > to optimize the performance of newly bound devices by allocating
> > the number of vectors based on the administrator knowledge of targeted VM.
> >
> > This function is applicable for SR-IOV VF because such devices allocate
> > their MSI-X table before they will run on the VMs and HW can't guess the
> > right number of vectors, so the HW allocates them statically and equally.
> >
> > The newly added /sys/bus/pci/devices/.../sriov_vf_msix_count file will be seen
> > for the VFs and it is writable as long as a driver is not bounded to the VF.
> >
> > The values accepted are:
> > * > 0 - this will be number reported by the VF's MSI-X capability
> > * < 0 - not valid
> > * = 0 - will reset to the device default value
> >
> > Signed-off-by: Leon Romanovsky <leonro@...dia.com>
> > ---
> > Documentation/ABI/testing/sysfs-bus-pci | 20 +++++++++
> > drivers/pci/iov.c | 58 +++++++++++++++++++++++++
> > drivers/pci/msi.c | 47 ++++++++++++++++++++
> > drivers/pci/pci-sysfs.c | 1 +
> > drivers/pci/pci.h | 2 +
> > include/linux/pci.h | 3 ++
> > 6 files changed, 131 insertions(+)
> >
> > diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
> > index 25c9c39770c6..34a8c6bcde70 100644
> > --- a/Documentation/ABI/testing/sysfs-bus-pci
> > +++ b/Documentation/ABI/testing/sysfs-bus-pci
> > @@ -375,3 +375,23 @@ Description:
> > The value comes from the PCI kernel device state and can be one
> > of: "unknown", "error", "D0", D1", "D2", "D3hot", "D3cold".
> > The file is read only.
> > +
> > +What: /sys/bus/pci/devices/.../sriov_vf_msix_count
> > +Date: December 2020
> > +Contact: Leon Romanovsky <leonro@...dia.com>
> > +Description:
> > + This file is associated with the SR-IOV VFs.
> > + It allows configuration of the number of MSI-X vectors for
> > + the VF. This is needed to optimize performance of newly bound
> > + devices by allocating the number of vectors based on the
> > + administrator knowledge of targeted VM.
> > +
> > + The values accepted are:
> > + * > 0 - this will be number reported by the VF's MSI-X
> > + capability
> > + * < 0 - not valid
> > + * = 0 - will reset to the device default value
> > +
> > + The file is writable if the PF is bound to a driver that
> > + set sriov_vf_total_msix > 0 and there is no driver bound
> > + to the VF.
> > diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
> > index 4afd4ee4f7f0..5bc496f8ffa3 100644
> > --- a/drivers/pci/iov.c
> > +++ b/drivers/pci/iov.c
> > @@ -31,6 +31,7 @@ int pci_iov_virtfn_devfn(struct pci_dev *dev, int vf_id)
> > return (dev->devfn + dev->sriov->offset +
> > dev->sriov->stride * vf_id) & 0xff;
> > }
> > +EXPORT_SYMBOL(pci_iov_virtfn_devfn);
> >
> > /*
> > * Per SR-IOV spec sec 3.3.10 and 3.3.11, First VF Offset and VF Stride may
> > @@ -426,6 +427,63 @@ const struct attribute_group sriov_dev_attr_group = {
> > .is_visible = sriov_attrs_are_visible,
> > };
> >
> > +#ifdef CONFIG_PCI_MSI
> > +static ssize_t sriov_vf_msix_count_show(struct device *dev,
> > + struct device_attribute *attr,
> > + char *buf)
> > +{
> > + struct pci_dev *pdev = to_pci_dev(dev);
> > + int count = pci_msix_vec_count(pdev);
> > +
> > + if (count < 0)
> > + return count;
> > +
> > + return sprintf(buf, "%d\n", count);
> > +}
> > +
> > +static ssize_t sriov_vf_msix_count_store(struct device *dev,
> > + struct device_attribute *attr,
> > + const char *buf, size_t count)
> > +{
> > + struct pci_dev *vf_dev = to_pci_dev(dev);
> > + int val, ret;
> > +
> > + ret = kstrtoint(buf, 0, &val);
> > + if (ret)
> > + return ret;
> > +
> > + ret = pci_set_msix_vec_count(vf_dev, val);
> > + if (ret)
> > + return ret;
> > +
> > + return count;
> > +}
> > +static DEVICE_ATTR_RW(sriov_vf_msix_count);
> > +#endif
> > +
> > +static struct attribute *sriov_vf_dev_attrs[] = {
> > +#ifdef CONFIG_PCI_MSI
> > + &dev_attr_sriov_vf_msix_count.attr,
> > +#endif
> > + NULL,
> > +};
> > +
> > +static umode_t sriov_vf_attrs_are_visible(struct kobject *kobj,
> > + struct attribute *a, int n)
> > +{
> > + struct device *dev = kobj_to_dev(kobj);
> > +
> > + if (dev_is_pf(dev))
> > + return 0;
>
> Wouldn't it be cleaner to also hide this on VFs where
> pci_msix_vec_count() returns an error or where the PF driver doesn't
> implement .sriov_set_msix_vec_count()? IOW, expose it only where it
> could actually work.
I wasn't sure about the policy in PCI/core, but sure will change.
>
> > +
> > + return a->mode;
> > +}
> > +
> > +const struct attribute_group sriov_vf_dev_attr_group = {
> > + .attrs = sriov_vf_dev_attrs,
> > + .is_visible = sriov_vf_attrs_are_visible,
> > +};
> > +
> > int __weak pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
> > {
> > return 0;
> > diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> > index 3162f88fe940..5a40200343c9 100644
> > --- a/drivers/pci/msi.c
> > +++ b/drivers/pci/msi.c
> > @@ -991,6 +991,53 @@ int pci_msix_vec_count(struct pci_dev *dev)
> > }
> > EXPORT_SYMBOL(pci_msix_vec_count);
> >
> > +/**
> > + * pci_set_msix_vec_count - change the reported number of MSI-X vectors
> > + * This function is applicable for SR-IOV VF because such devices allocate
> > + * their MSI-X table before they will run on the VMs and HW can't guess the
> > + * right number of vectors, so the HW allocates them statically and equally.
>
> Nit, this is an assumption of the VF usage and conjecture of the
> implementation.
This is one of the possible implementations.
>
> > + * @dev: VF device that is going to be changed
> > + * @count amount of MSI-X vectors
> > + **/
> > +int pci_set_msix_vec_count(struct pci_dev *dev, int count)
>
> pci_vf_set_msix_vec_count()? Long, I know, but if it's limited to VFs
> name it accordingly. Thanks,
I'll do.
>
> Alex
>
> > +{
> > + struct pci_dev *pdev = pci_physfn(dev);
> > + int ret;
> > +
> > + if (!dev->msix_cap || !pdev->msix_cap || count < 0)
> > + /*
> > + * We don't support negative numbers for now,
> > + * but maybe in the future it will make sense.
> > + */
> > + return -EINVAL;
> > +
> > + device_lock(&pdev->dev);
> > + if (!pdev->driver || !pdev->driver->sriov_set_msix_vec_count) {
> > + ret = -EOPNOTSUPP;
> > + goto err_pdev;
> > + }
> > +
> > + device_lock(&dev->dev);
> > + if (dev->driver) {
> > + /*
> > + * Driver already probed this VF and configured itself
> > + * based on previously configured (or default) MSI-X vector
> > + * count. It is too late to change this field for this
> > + * specific VF.
> > + */
> > + ret = -EBUSY;
> > + goto err_dev;
> > + }
> > +
> > + ret = pdev->driver->sriov_set_msix_vec_count(dev, count);
> > +
> > +err_dev:
> > + device_unlock(&dev->dev);
> > +err_pdev:
> > + device_unlock(&pdev->dev);
> > + return ret;
> > +}
> > +
> > static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
> > int nvec, struct irq_affinity *affd, int flags)
> > {
> > diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
> > index fb072f4b3176..0af2222643c2 100644
> > --- a/drivers/pci/pci-sysfs.c
> > +++ b/drivers/pci/pci-sysfs.c
> > @@ -1557,6 +1557,7 @@ static const struct attribute_group *pci_dev_attr_groups[] = {
> > &pci_dev_hp_attr_group,
> > #ifdef CONFIG_PCI_IOV
> > &sriov_dev_attr_group,
> > + &sriov_vf_dev_attr_group,
> > #endif
> > &pci_bridge_attr_group,
> > &pcie_dev_attr_group,
> > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> > index 5c59365092fa..dbbfa9e73ea8 100644
> > --- a/drivers/pci/pci.h
> > +++ b/drivers/pci/pci.h
> > @@ -183,6 +183,7 @@ extern unsigned int pci_pm_d3hot_delay;
> >
> > #ifdef CONFIG_PCI_MSI
> > void pci_no_msi(void);
> > +int pci_set_msix_vec_count(struct pci_dev *dev, int count);
> > #else
> > static inline void pci_no_msi(void) { }
> > #endif
> > @@ -502,6 +503,7 @@ resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno);
> > void pci_restore_iov_state(struct pci_dev *dev);
> > int pci_iov_bus_range(struct pci_bus *bus);
> > extern const struct attribute_group sriov_dev_attr_group;
> > +extern const struct attribute_group sriov_vf_dev_attr_group;
> > #else
> > static inline int pci_iov_init(struct pci_dev *dev)
> > {
> > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > index b32126d26997..6be96d468eda 100644
> > --- a/include/linux/pci.h
> > +++ b/include/linux/pci.h
> > @@ -856,6 +856,8 @@ struct module;
> > * e.g. drivers/net/e100.c.
> > * @sriov_configure: Optional driver callback to allow configuration of
> > * number of VFs to enable via sysfs "sriov_numvfs" file.
> > + * @sriov_set_msix_vec_count: Driver callback to change number of MSI-X vectors
> > + * exposed by the sysfs "vf_msix_vec" entry.
> > * @err_handler: See Documentation/PCI/pci-error-recovery.rst
> > * @groups: Sysfs attribute groups.
> > * @driver: Driver model structure.
> > @@ -871,6 +873,7 @@ struct pci_driver {
> > int (*resume)(struct pci_dev *dev); /* Device woken up */
> > void (*shutdown)(struct pci_dev *dev);
> > int (*sriov_configure)(struct pci_dev *dev, int num_vfs); /* On PF */
> > + int (*sriov_set_msix_vec_count)(struct pci_dev *vf, int msix_vec_count); /* On PF */
> > const struct pci_error_handlers *err_handler;
> > const struct attribute_group **groups;
> > struct device_driver driver;
> > --
> > 2.29.2
> >
>
Powered by blists - more mailing lists