lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1493883171.10737.12.camel@ni.com>
Date:   Thu, 4 May 2017 07:32:54 +0000
From:   Vee Khee Wong <vee.khee.wong@...com>
To:     "bhelgaas@...gle.com" <bhelgaas@...gle.com>
CC:     "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        Jonathan Hearn <jonathan.hearn@...com>,
        "Ram.Amrani@...ium.com" <Ram.Amrani@...ium.com>,
        Hui Chun Ong <hui.chun.ong@...com>,
        "shhuiw@...mail.com" <shhuiw@...mail.com>,
        "rajatja@...gle.com" <rajatja@...gle.com>,
        "linux-pci@...r.kernel.org" <linux-pci@...r.kernel.org>,
        "keith.busch@...el.com" <keith.busch@...el.com>,
        "jonathan.yong@...el.com" <jonathan.yong@...el.com>
Subject: Re: [PATCH] pci-sysfs: Make PCI bridge attribute visible in sysfs



On Mon, 2017-04-17 at 10:41 -0500, Bjorn Helgaas wrote:
> On Mon, Apr 17, 2017 at 12:50 AM, Wong Vee Khee <vee.khee.wong@...com
> > wrote:
> > 
> > From: vwong <vee.khee.wong@...com>
> > 
> > Export the PCIe link attributes of PCI bridges to sysfs.
> This needs justification for *why* we should export these via sysfs.
> 
> Some of these things, e.g., secondary/subordinate bus numbers, are
> already visible to non-privileged users via "lspci -v".
> 
We need to expose these attributes via sysfs due to several reasons
listed below:

1) PCIe capabilities info such as Maximum/Actual link width and link speed only visible to privileged users via "lspci -vvv". The software that my team is working on need to get PCIe link information as non-root user.

2) From a client perspective, it require a lot of overhead parsing output of lspci to get PCIe capabilities. Our application is only interested in getting PCIe bridges but lspci print info for all PCIe devices.

> > 
> > Signed-off-by: Wong Vee Khee <vee.khee.wong@...com>
> > Signed-off-by: Hui Chun Ong <hui.chun.ong@...com>
> > ---
> >  drivers/pci/pci-sysfs.c       | 197
> > +++++++++++++++++++++++++++++++++++++++++-
> >  include/uapi/linux/pci_regs.h |   4 +
> >  2 files changed, 197 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
> > index 25d010d..a218c43 100644
> > --- a/drivers/pci/pci-sysfs.c
> > +++ b/drivers/pci/pci-sysfs.c
> > @@ -154,6 +154,127 @@ static ssize_t resource_show(struct device
> > *dev, struct device_attribute *attr,
> >  }
> >  static DEVICE_ATTR_RO(resource);
> > 
> > +static ssize_t max_link_speed_show(struct device *dev,
> > +                                  struct device_attribute *attr,
> > char *buf)
> > +{
> > +       struct pci_dev *pci_dev = to_pci_dev(dev);
> > +       u32 linkcap;
> > +       int err;
> > +       const char *speed;
> > +
> > +       err = pcie_capability_read_dword(pci_dev, PCI_EXP_LNKCAP,
> > &linkcap);
> > +
> > +       if (!err && linkcap) {
>   if (err)
>     return -EINVAL;
> 
> I don't think there's a reason to test "linkcap" here.  Per spec,
> zero
> is not a valid value of LNKCAP, so if we got a zero, I think showing
> "Unknown speed" would be fine.
> 
> > 
> > +               switch (linkcap & PCI_EXP_LNKCAP_MLS) {
> > +               case PCI_EXP_LNKCAP_MLS_8_0GB:
> > +                       speed = "8 GT/s";
> > +                       break;
> > +               case PCI_EXP_LNKCAP_MLS_5_0GB:
> > +                       speed = "5 GT/s";
> > +                       break;
> > +               case PCI_EXP_LNKCAP_MLS_2_5GB:
> > +                       speed = "2.5 GT/s";
> > +                       break;
> > +               default:
> > +                       speed = "Unknown speed";
> > +               }
> > +
> > +               return sprintf(buf, "%s\n", speed);
> > +       }
> > +
> > +       return -EINVAL;
> > +}
> > +static DEVICE_ATTR_RO(max_link_speed);
> > +
> > +static ssize_t max_link_width_show(struct device *dev,
> > +                                  struct device_attribute *attr,
> > char *buf)
> > +{
> > +       struct pci_dev *pci_dev = to_pci_dev(dev);
> > +       u32 linkcap;
> > +       int err;
> > +
> > +       err = pcie_capability_read_dword(pci_dev, PCI_EXP_LNKCAP,
> > &linkcap);
> > +
> > +       return err ? -EINVAL : sprintf(
> > +               buf, "%u\n", (linkcap & PCI_EXP_LNKCAP_MLW) >> 4);
>   if (err)
>     return -EINVAL;
> 
>   return sprintf(...);
> 
> > 
> > +}
> > +static DEVICE_ATTR_RO(max_link_width);
> > +
> > +static ssize_t current_link_speed_show(struct device *dev,
> > +                                      struct device_attribute
> > *attr, char *buf)
> > +{
> > +       struct pci_dev *pci_dev = to_pci_dev(dev);
> > +       u16 linkstat;
> > +       int err;
> > +       const char *speed;
> > +
> > +       err = pcie_capability_read_word(pci_dev, PCI_EXP_LNKSTA,
> > &linkstat);
> > +
> > +       if (!err && linkstat) {
> See max_link_speed above.
> 
> > 
> > +               switch (linkstat & PCI_EXP_LNKSTA_CLS) {
> > +               case PCI_EXP_LNKSTA_CLS_8_0GB:
> > +                       speed = "8 GT/s";
> > +                       break;
> > +               case PCI_EXP_LNKSTA_CLS_5_0GB:
> > +                       speed = "5 GT/s";
> > +                       break;
> > +               case PCI_EXP_LNKSTA_CLS_2_5GB:
> > +                       speed = "2.5 GT/s";
> > +                       break;
> > +               default:
> > +                       speed = "Unknown speed";
> > +               }
> > +
> > +               return sprintf(buf, "%s\n", speed);
> > +       }
> > +
> > +       return -EINVAL;
> > +}
> > +static DEVICE_ATTR_RO(current_link_speed);
> > +
> > +static ssize_t current_link_width_show(struct device *dev,
> > +                                      struct device_attribute
> > *attr, char *buf)
> > +{
> > +       struct pci_dev *pci_dev = to_pci_dev(dev);
> > +       u16 linkstat;
> > +       int err;
> > +
> > +       err = pcie_capability_read_word(pci_dev, PCI_EXP_LNKSTA,
> > &linkstat);
> > +
> > +       return err ? -EINVAL : sprintf(
> > +               buf, "%u\n",
> > +               (linkstat & PCI_EXP_LNKSTA_NLW) >>
> > PCI_EXP_LNKSTA_NLW_SHIFT);
> > +}
> > +static DEVICE_ATTR_RO(current_link_width);
> > +
> > +static ssize_t secondary_bus_number_show(struct device *dev,
> > +                                        struct device_attribute
> > *attr,
> > +                                        char *buf)
> > +{
> > +       struct pci_dev *pci_dev = to_pci_dev(dev);
> > +       u8 sec_bus;
> > +       int err;
> > +
> > +       err = pci_read_config_byte(pci_dev, PCI_SECONDARY_BUS,
> > &sec_bus);
> There is already a /sys/devices/pciDDDD:BB/DDDD:BB:dd.f/<secondary>/
> directory and a .../pci_bus/ directory that looks like it is the
> secondary bus.  Is that enough?
> 
> If we also need this file, I would like it to do something sensible
> when there is no secondary bus.  Maybe that is exposing the bus
> numbers directly, or maybe it is something else.  I tend to think
> that
> if you just want the register contents, lspci is enough, and if you
> need something in sysfs, it should be a little more digested, e.g.,
> the existing subdirectory.
> 
> It'd be helpful to know something about how you want to use this.
> 
> > 
> > +       return err ? -EINVAL : sprintf(buf, "%u\n", sec_bus);
> > +}
> > +static DEVICE_ATTR_RO(secondary_bus_number);
> > +
> > +static ssize_t subordinate_bus_number_show(struct device *dev,
> > +                                          struct device_attribute
> > *attr,
> > +                                          char *buf)
> > +{
> > +       struct pci_dev *pci_dev = to_pci_dev(dev);
> > +       u8 sub_bus;
> > +       int err;
> > +
> > +       err = pci_read_config_byte(pci_dev, PCI_SUBORDINATE_BUS,
> > &sub_bus);
> > +
> > +       return err ? -EINVAL : sprintf(buf, "%u\n", sub_bus);
> > +}
> > +static DEVICE_ATTR_RO(subordinate_bus_number);
> > +
> >  static ssize_t modalias_show(struct device *dev, struct
> > device_attribute *attr,
> >                              char *buf)
> >  {
> > @@ -602,12 +723,17 @@ static struct attribute *pci_dev_attrs[] = {
> >         NULL,
> >  };
> > 
> > -static const struct attribute_group pci_dev_group = {
> > -       .attrs = pci_dev_attrs,
> > +static struct attribute *pci_bridge_attrs[] = {
> > +       &dev_attr_subordinate_bus_number.attr,
> > +       &dev_attr_secondary_bus_number.attr,
> > +       NULL,
> >  };
> > 
> > -const struct attribute_group *pci_dev_groups[] = {
> > -       &pci_dev_group,
> > +static struct attribute *pcie_dev_attrs[] = {
> > +       &dev_attr_current_link_speed.attr,
> > +       &dev_attr_current_link_width.attr,
> > +       &dev_attr_max_link_width.attr,
> > +       &dev_attr_max_link_speed.attr,
> >         NULL,
> >  };
> > 
> > @@ -1540,6 +1666,57 @@ static umode_t
> > pci_dev_hp_attrs_are_visible(struct kobject *kobj,
> >         return a->mode;
> >  }
> > 
> > +static umode_t pci_bridge_attrs_are_visible(struct kobject *kobj,
> > +                                           struct attribute *a,
> > int n)
> > +{
> > +       struct device *dev = kobj_to_dev(kobj);
> > +       struct pci_dev *pdev = to_pci_dev(dev);
> > +
> > +       if (!pci_is_bridge(pdev))
> > +               return 0;
>   if (pci_is_bridge(pdev))
>     return a->mode;
> 
>   return 0;
> 
> I think it's easier to read without the negation.  Possibly that's
> just my personal preference :)
> 
> > 
> > +
> > +       return a->mode;
> > +}
> > +
> > +static umode_t pcie_dev_attrs_are_visible(struct kobject *kobj,
> > +                                         struct attribute *a, int
> > n)
> > +{
> > +       struct device *dev = kobj_to_dev(kobj);
> > +       struct pci_dev *pdev = to_pci_dev(dev);
> > +
> > +       if (pci_find_capability(pdev, PCI_CAP_ID_EXP) == 0)
> Use pci_is_pcie().
> 
> > 
> > +               return 0;
> > +
> > +       return a->mode;
> > +}
> > +
> > +static const struct attribute_group pci_dev_group = {
> > +       .attrs = pci_dev_attrs,
> > +};
> > +
> > +const struct attribute_group *pci_dev_groups[] = {
> > +       &pci_dev_group,
> > +       NULL,
> > +};
> > +
> > +static const struct attribute_group pci_bridge_group = {
> > +       .attrs = pci_bridge_attrs,
> > +};
> > +
> > +const struct attribute_group *pci_bridge_groups[] = {
> > +       &pci_bridge_group,
> > +       NULL,
> > +};
> > +
> > +static const struct attribute_group pcie_dev_group = {
> > +       .attrs = pcie_dev_attrs,
> > +};
> > +
> > +const struct attribute_group *pcie_dev_groups[] = {
> > +       &pcie_dev_group,
> > +       NULL,
> > +};
> > +
> >  static struct attribute_group pci_dev_hp_attr_group = {
> >         .attrs = pci_dev_hp_attrs,
> >         .is_visible = pci_dev_hp_attrs_are_visible,
> > @@ -1574,12 +1751,24 @@ static struct attribute_group
> > pci_dev_attr_group = {
> >         .is_visible = pci_dev_attrs_are_visible,
> >  };
> > 
> > +static struct attribute_group pci_bridge_attr_group = {
> > +       .attrs = pci_bridge_attrs,
> > +       .is_visible = pci_bridge_attrs_are_visible,
> > +};
> > +
> > +static struct attribute_group pcie_dev_attr_group = {
> > +       .attrs = pcie_dev_attrs,
> > +       .is_visible = pcie_dev_attrs_are_visible,
> > +};
> > +
> >  static const struct attribute_group *pci_dev_attr_groups[] = {
> >         &pci_dev_attr_group,
> >         &pci_dev_hp_attr_group,
> >  #ifdef CONFIG_PCI_IOV
> >         &sriov_dev_attr_group,
> >  #endif
> > +       &pci_bridge_attr_group,
> > +       &pcie_dev_attr_group,
> >         NULL,
> >  };
> > 
> > diff --git a/include/uapi/linux/pci_regs.h
> > b/include/uapi/linux/pci_regs.h
> > index 634c9c4..b1770dc 100644
> > --- a/include/uapi/linux/pci_regs.h
> > +++ b/include/uapi/linux/pci_regs.h
> > @@ -517,6 +517,10 @@
> >  #define  PCI_EXP_LNKCAP_SLS    0x0000000f /* Supported Link Speeds
> > */
> >  #define  PCI_EXP_LNKCAP_SLS_2_5GB 0x00000001 /* LNKCAP2 SLS Vector
> > bit 0 */
> >  #define  PCI_EXP_LNKCAP_SLS_5_0GB 0x00000002 /* LNKCAP2 SLS Vector
> > bit 1 */
> > +#define  PCI_EXP_LNKCAP_MLS    0x0000000f /* Maximum Link Speeds
> > */
> > +#define  PCI_EXP_LNKCAP_MLS_2_5GB 0x00000001 /* LNKCAP2 SLS Vector
> > bit 0 */
> > +#define  PCI_EXP_LNKCAP_MLS_5_0GB 0x00000002 /* LNKCAP2 SLS Vector
> > bit 1 */
> > +#define  PCI_EXP_LNKCAP_MLS_8_0GB 0x00000003 /* LNKCAP2 SLS Vector
> > bit 2 */
> Rather than adding these _MLS_ symbols as duplicates of _SLS_, please
> just add one SLS_8_0GB symbol.
> 
> The _SLS_ names are an artifact of the fact that prior to PCIe r3.0,
> this field was the "Supported Link Speeds" field.  PCIe 3.0 renamed
> it
> to "Max Link Speed" and added the "Supported Link Speeds Vector" in
> the new Link Capabilities 2 register.
> 
> > 
> >  #define  PCI_EXP_LNKCAP_MLW    0x000003f0 /* Maximum Link Width */
> >  #define  PCI_EXP_LNKCAP_ASPMS  0x00000c00 /* ASPM Support */
> >  #define  PCI_EXP_LNKCAP_L0SEL  0x00007000 /* L0s Exit Latency */
> > --
> > 2.7.4
> > 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ