lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1435780900-23889-1-git-send-email-izumi.taku@jp.fujitsu.com>
Date:	Thu,  2 Jul 2015 05:01:40 +0900
From:	Taku Izumi <izumi.taku@...fujitsu.com>
To:	linux-kernel@...r.kernel.org, a.p.zijlstra@...llo.nl,
	mingo@...hat.com, acme@...nel.org
Cc:	Taku Izumi <izumi.taku@...fujitsu.com>
Subject: [PATCH] perf: Fix multi-segment problem of perf_event_intel_uncore

In multi-segment system, uncore devices may belong to buses whose segment
number is other than 0.

  ....
  0000:ff:10.5 System peripheral: Intel Corporation Xeon E5 v3/Core i7 Scratchpad & Semaphore Registers (rev 03)
  ...
  0001:7f:10.5 System peripheral: Intel Corporation Xeon E5 v3/Core i7 Scratchpad & Semaphore Registers (rev 03)
  ...
  0001:bf:10.5 System peripheral: Intel Corporation Xeon E5 v3/Core i7 Scratchpad & Semaphore Registers (rev 03)
  ...
  0001:ff:10.5 System peripheral: Intel Corporation Xeon E5 v3/Core i7 Scratchpad & Semaphore Registers (rev 03
  ...

In that case relation of bus number and physical id may be broken
because "uncore_pcibus_to_physid" doesn't take account of PCI segment.
For example, bus 0000:ff and 0001:ff uses the same entry of
"uncore_pcibus_to_physid" array.

This patch fixes ths problem by introducing segment-aware pci2phy_map instead.

Signed-off-by: Taku Izumi <izumi.taku@...fujitsu.com>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore.c      | 27 +++++++++++---
 arch/x86/kernel/cpu/perf_event_intel_uncore.h      |  9 ++++-
 arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c  | 23 +++++++++++-
 .../x86/kernel/cpu/perf_event_intel_uncore_snbep.c | 41 ++++++++++++++++++----
 4 files changed, 87 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 21b5e38..78c8686 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -7,7 +7,8 @@ struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
 static bool pcidrv_registered;
 struct pci_driver *uncore_pci_driver;
 /* pci bus to socket mapping */
-int uncore_pcibus_to_physid[256] = { [0 ... 255] = -1, };
+DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
+struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
 struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
 
 static DEFINE_RAW_SPINLOCK(uncore_box_lock);
@@ -806,10 +807,18 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
 	struct intel_uncore_pmu *pmu;
 	struct intel_uncore_box *box;
 	struct intel_uncore_type *type;
-	int phys_id;
+	int phys_id = -1;
 	bool first_box = false;
+	struct pci2phy_map *map;
 
-	phys_id = uncore_pcibus_to_physid[pdev->bus->number];
+	raw_spin_lock(&pci2phy_map_lock);
+	list_for_each_entry(map, &pci2phy_map_head, list) {
+		if (map->segment == pci_domain_nr(pdev->bus)) {
+			phys_id = map->pbus_to_physid[pdev->bus->number];
+			break;
+		}
+	}
+	raw_spin_unlock(&pci2phy_map_lock);
 	if (phys_id < 0)
 		return -ENODEV;
 
@@ -856,8 +865,18 @@ static void uncore_pci_remove(struct pci_dev *pdev)
 {
 	struct intel_uncore_box *box = pci_get_drvdata(pdev);
 	struct intel_uncore_pmu *pmu;
-	int i, cpu, phys_id = uncore_pcibus_to_physid[pdev->bus->number];
+	int i, cpu, phys_id = -1;
 	bool last_box = false;
+	struct pci2phy_map *map;
+
+	raw_spin_lock(&pci2phy_map_lock);
+	list_for_each_entry(map, &pci2phy_map_head, list) {
+		if (map->segment == pci_domain_nr(pdev->bus)) {
+			phys_id = map->pbus_to_physid[pdev->bus->number];
+			break;
+		}
+	}
+	raw_spin_unlock(&pci2phy_map_lock);
 
 	box = pci_get_drvdata(pdev);
 	if (!box) {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 0f77f0a..0fb2a23 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -117,6 +117,12 @@ struct uncore_event_desc {
 	const char *config;
 };
 
+struct pci2phy_map {
+	struct list_head list;
+	int segment;
+	int pbus_to_physid[256];
+};
+
 ssize_t uncore_event_show(struct kobject *kobj,
 			  struct kobj_attribute *attr, char *buf);
 
@@ -317,7 +323,8 @@ u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx);
 extern struct intel_uncore_type **uncore_msr_uncores;
 extern struct intel_uncore_type **uncore_pci_uncores;
 extern struct pci_driver *uncore_pci_driver;
-extern int uncore_pcibus_to_physid[256];
+extern raw_spinlock_t pci2phy_map_lock;
+extern struct list_head pci2phy_map_head;
 extern struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
 extern struct event_constraint uncore_constraint_empty;
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index b005a78..ccbc817 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -402,14 +402,35 @@ static int snb_pci2phy_map_init(int devid)
 {
 	struct pci_dev *dev = NULL;
 	int bus;
+	int segment;
+	struct pci2phy_map *map;
+	bool found = false;
 
 	dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev);
 	if (!dev)
 		return -ENOTTY;
 
 	bus = dev->bus->number;
+	segment = pci_domain_nr(dev->bus);
 
-	uncore_pcibus_to_physid[bus] = 0;
+	raw_spin_lock(&pci2phy_map_lock);
+	list_for_each_entry(map, &pci2phy_map_head, list) {
+		if (map->segment == segment) {
+			found = true;
+			break;
+		}
+	}
+	if (!found) {
+		map = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
+		if (map) {
+			map->segment = segment;
+			map->pbus_to_physid[bus] = 0;
+			list_add_tail(&map->list, &pci2phy_map_head);
+		}
+	} else {
+		map->pbus_to_physid[bus] = 0;
+	}
+	raw_spin_unlock(&pci2phy_map_lock);
 
 	pci_dev_put(dev);
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
index 6d6e85d..85ae74b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
@@ -1090,6 +1090,9 @@ static int snbep_pci2phy_map_init(int devid)
 	int i, bus, nodeid;
 	int err = 0;
 	u32 config = 0;
+	struct pci2phy_map *map;
+	int segment;
+	bool found = false;
 
 	while (1) {
 		/* find the UBOX device */
@@ -1106,13 +1109,33 @@ static int snbep_pci2phy_map_init(int devid)
 		err = pci_read_config_dword(ubox_dev, 0x54, &config);
 		if (err)
 			break;
+
+		segment = pci_domain_nr(ubox_dev->bus);
+		raw_spin_lock(&pci2phy_map_lock);
+		list_for_each_entry(map, &pci2phy_map_head, list) {
+			if (map->segment == segment) {
+				found = true;
+				break;
+			}
+		}
+		if (!found) {
+			map = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
+			if (!map) {
+				err = -ENOMEM;
+				break;
+			}
+			map->segment = segment;
+			list_add_tail(&map->list, &pci2phy_map_head);
+		}
+		raw_spin_unlock(&pci2phy_map_lock);
+
 		/*
 		 * every three bits in the Node ID mapping register maps
 		 * to a particular node.
 		 */
 		for (i = 0; i < 8; i++) {
 			if (nodeid == ((config >> (3 * i)) & 0x7)) {
-				uncore_pcibus_to_physid[bus] = i;
+				map->pbus_to_physid[bus] = i;
 				break;
 			}
 		}
@@ -1123,13 +1146,17 @@ static int snbep_pci2phy_map_init(int devid)
 		 * For PCI bus with no UBOX device, find the next bus
 		 * that has UBOX device and use its mapping.
 		 */
-		i = -1;
-		for (bus = 255; bus >= 0; bus--) {
-			if (uncore_pcibus_to_physid[bus] >= 0)
-				i = uncore_pcibus_to_physid[bus];
-			else
-				uncore_pcibus_to_physid[bus] = i;
+		raw_spin_lock(&pci2phy_map_lock);
+		list_for_each_entry(map, &pci2phy_map_head, list) {
+			i = -1;
+			for (bus = 255; bus >= 0; bus--) {
+				if (map->pbus_to_physid[bus] >= 0)
+					i = map->pbus_to_physid[bus];
+				else
+					map->pbus_to_physid[bus] = i;
+			}
 		}
+		raw_spin_unlock(&pci2phy_map_lock);
 	}
 
 	pci_dev_put(ubox_dev);
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ