lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20181207182021.16344-1-mr.nuke.me@gmail.com>
Date:   Fri,  7 Dec 2018 12:20:00 -0600
From:   Alexandru Gagniuc <mr.nuke.me@...il.com>
To:     helgaas@...nel.org
Cc:     alex_gagniuc@...lteam.com, Austin.Bolen@...l.com,
        Shyam.Iyer@...l.com, Alexandru Gagniuc <mr.nuke.me@...il.com>,
        Bjorn Helgaas <bhelgaas@...gle.com>,
        "Rafael J. Wysocki" <rafael.j.wysocki@...el.com>,
        Keith Busch <keith.busch@...el.com>,
        Oza Pawandeep <poza@...eaurora.org>,
        Mika Westerberg <mika.westerberg@...ux.intel.com>,
        Frederick Lawler <fred@...dlawl.com>,
        Lukas Wunner <lukas@...ner.de>, linux-kernel@...r.kernel.org,
        linux-pci@...r.kernel.org
Subject: [PATCH v2] PCI: pciehp: Report degraded links via link bandwidth notification

A warning is generated when a PCIe device is probed with a degraded
link, but there was no similar mechanism to warn when the link becomes
degraded after probing. The Link Bandwidth Notification provides this
mechanism.

Use the link bandwidth notification interrupt to detect bandwidth
changes, and rescan the bandwidth, looking for the weakest point. This
is the same logic used in probe().

Signed-off-by: Alexandru Gagniuc <mr.nuke.me@...il.com>
---
Changes since v1:
 * Layer on top of the pcie port service drivers, instead of hotplug service.

This patch needs to be applied on top of:
	PCI: Add missing include to drivers/pci.h

Anticipated FAQ:

Q: Why is this unconditionally compiled in?
A: The symmetrical check in pci probe() is also always compiled in.

Q: Why call module_init() instead of adding a call in pcie_init_services() ?
A: A call in pcie_init_services() also requires a prototype in portdrv.h, a
non-static implementation in bw_notification.c. Using module_init() is
functionally equivalent, and takes less code.

Q: Why print only on degraded links and not when a link is back to full speed?
For symmetry with PCI probe(). Although I see a benefit in conveying that a
link is back to full speed, I expect this to be extremely rare. Secondary bus
reset is usually needed to retrain at full bandwidth.


 drivers/pci/pcie/Makefile          |   1 +
 drivers/pci/pcie/bw_notification.c | 107 +++++++++++++++++++++++++++++
 drivers/pci/pcie/portdrv.h         |   4 +-
 drivers/pci/pcie/portdrv_core.c    |  14 ++--
 4 files changed, 121 insertions(+), 5 deletions(-)
 create mode 100644 drivers/pci/pcie/bw_notification.c

diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile
index ab514083d5d4..f1d7bc1e5efa 100644
--- a/drivers/pci/pcie/Makefile
+++ b/drivers/pci/pcie/Makefile
@@ -3,6 +3,7 @@
 # Makefile for PCI Express features and port driver
 
 pcieportdrv-y			:= portdrv_core.o portdrv_pci.o err.o
+pcieportdrv-y			+= bw_notification.o
 
 obj-$(CONFIG_PCIEPORTBUS)	+= pcieportdrv.o
 
diff --git a/drivers/pci/pcie/bw_notification.c b/drivers/pci/pcie/bw_notification.c
new file mode 100644
index 000000000000..64391ea9a172
--- /dev/null
+++ b/drivers/pci/pcie/bw_notification.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PCI Express Bandwidth notification services driver
+ * Author: Alexandru Gagniuc <mr.nuke.me@...il.com>
+ *
+ * Copyright (C) 2018, Dell Inc
+ *
+ * The PCIe bandwidth notification provides a way to notify the operating system
+ * when the link width or data rate changes. This capability is required for all
+ * root ports and downstream ports supporting links wider than x1 and/or
+ * multiple link speeds.
+ *
+ * This service port driver hooks into the bandwidth notification interrupt and
+ * warns when links become degraded in operation.
+ */
+
+#include <linux/module.h>
+
+#include "../pci.h"
+#include "portdrv.h"
+
+static bool pcie_link_bandwidth_notification_supported(struct pci_dev *dev)
+{
+	int ret;
+	u32 lnk_cap;
+
+	ret = pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnk_cap);
+	return (ret == PCIBIOS_SUCCESSFUL) && (lnk_cap & PCI_EXP_LNKCAP_LBNC);
+}
+
+static void pcie_enable_link_bandwidth_notification(struct pci_dev *dev)
+{
+	u16 lnk_ctl;
+
+	pcie_capability_read_word(dev, PCI_EXP_LNKCTL, &lnk_ctl);
+	lnk_ctl |= PCI_EXP_LNKCTL_LBMIE;
+	pcie_capability_write_word(dev, PCI_EXP_LNKCTL, lnk_ctl);
+}
+
+static void pcie_disable_link_bandwidth_notification(struct pci_dev *dev)
+{
+	u16 lnk_ctl;
+
+	pcie_capability_read_word(dev, PCI_EXP_LNKCTL, &lnk_ctl);
+	lnk_ctl &= ~PCI_EXP_LNKCTL_LBMIE;
+	pcie_capability_write_word(dev, PCI_EXP_LNKCTL, lnk_ctl);
+}
+
+static irqreturn_t pcie_bw_notification_irq(int irq, void *context)
+{
+	struct pcie_device *srv = context;
+	struct pci_dev *port = srv->port;
+	struct pci_dev *dev;
+	u16 link_status, events;
+	int ret;
+
+	ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status);
+	events = link_status & PCI_EXP_LNKSTA_LBMS;
+
+	if (!events || ret != PCIBIOS_SUCCESSFUL)
+		return IRQ_NONE;
+
+	/* Print status from upstream link partner, not this downstream port. */
+	list_for_each_entry(dev, &port->subordinate->devices, bus_list)
+		__pcie_print_link_status(dev, false);
+
+	pcie_capability_write_word(port, PCI_EXP_LNKSTA, events);
+	return IRQ_HANDLED;
+}
+
+static int pcie_bandwidth_notification_probe(struct pcie_device *srv)
+{
+	int ret;
+
+	/* Single-width or single-speed ports do not have to support this. */
+	if (!pcie_link_bandwidth_notification_supported(srv->port))
+		return -ENODEV;
+
+	ret = devm_request_irq(&srv->device, srv->irq, pcie_bw_notification_irq,
+			       IRQF_SHARED, "PCIe BW notif", srv);
+	if (ret)
+		return ret;
+
+	pcie_enable_link_bandwidth_notification(srv->port);
+
+	return 0;
+}
+
+static void pcie_bandwidth_notification_remove(struct pcie_device *srv)
+{
+	pcie_disable_link_bandwidth_notification(srv->port);
+}
+
+static struct pcie_port_service_driver pcie_bandwidth_notification_driver = {
+	.name		= "pcie_bw_notification",
+	.port_type	= PCI_EXP_TYPE_DOWNSTREAM,
+	.service	= PCIE_PORT_SERVICE_BwNOTIF,
+	.probe		= pcie_bandwidth_notification_probe,
+	.remove		= pcie_bandwidth_notification_remove,
+};
+
+static int __init pcie_bandwidth_notification_service_init(void)
+{
+	return pcie_port_service_register(&pcie_bandwidth_notification_driver);
+}
+
+module_init(pcie_bandwidth_notification_service_init);
diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h
index e495f04394d0..46652469ffaa 100644
--- a/drivers/pci/pcie/portdrv.h
+++ b/drivers/pci/pcie/portdrv.h
@@ -20,8 +20,10 @@
 #define PCIE_PORT_SERVICE_HP		(1 << PCIE_PORT_SERVICE_HP_SHIFT)
 #define PCIE_PORT_SERVICE_DPC_SHIFT	3	/* Downstream Port Containment */
 #define PCIE_PORT_SERVICE_DPC		(1 << PCIE_PORT_SERVICE_DPC_SHIFT)
+#define PCIE_PORT_SERVICE_BWNOTIF_SHIFT	4	/* Bandwidth notification */
+#define PCIE_PORT_SERVICE_BwNOTIF	(1 << PCIE_PORT_SERVICE_BWNOTIF_SHIFT)
 
-#define PCIE_PORT_DEVICE_MAXSERVICES   4
+#define PCIE_PORT_DEVICE_MAXSERVICES   5
 
 #ifdef CONFIG_PCIEAER
 int pcie_aer_init(void);
diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index f458ac9cb70c..86455ff7ced9 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -99,7 +99,7 @@ static int pcie_message_numbers(struct pci_dev *dev, int mask,
  */
 static int pcie_port_enable_irq_vec(struct pci_dev *dev, int *irqs, int mask)
 {
-	int nr_entries, nvec;
+	int nr_entries, nvec, pcie_irq;
 	u32 pme = 0, aer = 0, dpc = 0;
 
 	/* Allocate the maximum possible number of MSI/MSI-X vectors */
@@ -136,9 +136,12 @@ static int pcie_port_enable_irq_vec(struct pci_dev *dev, int *irqs, int mask)
 	}
 
 	/* PME and hotplug share an MSI/MSI-X vector */
-	if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP)) {
-		irqs[PCIE_PORT_SERVICE_PME_SHIFT] = pci_irq_vector(dev, pme);
-		irqs[PCIE_PORT_SERVICE_HP_SHIFT] = pci_irq_vector(dev, pme);
+	if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP |
+		    PCIE_PORT_SERVICE_BwNOTIF)) {
+		pcie_irq = pci_irq_vector(dev, pme);
+		irqs[PCIE_PORT_SERVICE_PME_SHIFT] = pcie_irq;
+		irqs[PCIE_PORT_SERVICE_HP_SHIFT] = pcie_irq;
+		irqs[PCIE_PORT_SERVICE_BWNOTIF_SHIFT] = pcie_irq;
 	}
 
 	if (mask & PCIE_PORT_SERVICE_AER)
@@ -250,6 +253,9 @@ static int get_port_device_capability(struct pci_dev *dev)
 	    pci_aer_available() && services & PCIE_PORT_SERVICE_AER)
 		services |= PCIE_PORT_SERVICE_DPC;
 
+	if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM)
+		services |= PCIE_PORT_SERVICE_BwNOTIF;
+
 	return services;
 }
 
-- 
2.17.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ