[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1445267044-29551-2-git-send-email-prarit@redhat.com>
Date:	Mon, 19 Oct 2015 11:04:03 -0400
From:	Prarit Bhargava <prarit@...hat.com>
To:	linux-kernel@...r.kernel.org
Cc:	Prarit Bhargava <prarit@...hat.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	Ingo Molnar <mingo@...hat.com>,
	"H. Peter Anvin" <hpa@...or.com>, x86@...nel.org,
	Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
	Borislav Petkov <bp@...en8.de>,
	Len Brown <len.brown@...el.com>,
	Andy Lutomirski <luto@...capital.net>,
	Zhu Guihua <zhugh.fnst@...fujitsu.com>,
	Denys Vlasenko <dvlasenk@...hat.com>,
	Jan H. Schönherr <jschoenh@...zon.de>,
	Boris Ostrovsky <boris.ostrovsky@...cle.com>,
	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>,
	Thomas Renninger <trenn@...e.de>
Subject: [PATCH 1/2] cpu hotplug, add CONFIG_PERMANENT_CPU_TOPOLOGY and keep topology directory for lifetime of CPU
The information in /sys/devices/system/cpu/cpuX/topology
directory is useful for userspace monitoring applications and in-tree
utilities like cpupower & turbostat.
When down'ing a CPU the /sys/devices/system/cpu/cpuX/topology directory is
removed during the CPU_DEAD hotplug callback in the kernel.  The problem
with this model is that the CPU has not been physically removed and the
data in the topology directory is still valid.
This patch adds CONFIG_PERMANENT_CPU_TOPOLOGY, and is Y by default for
x86, an N for all other arches.  When enabled the kernel is modified so
that the topology directory is added to the core cpu sysfs files so that
the topology directory exists for the lifetime of the CPU.  When
disabled, the behavior of the current kernel is maintained (that is, the
topology directory is removed on a down and added on an up).  Adding
CONFIG_PERMANENT_CPU_TOPOLOGY may require additional architecture so that
the cpumask data the CPU's topology is not cleared during a CPU down.
This patch combines drivers/base/topology.c and drivers/base/cpu.c to
implement CONFIG_PERMANENT_CPU_TOPOLOGY, and leaves all arches except
x86 with the current behavior.
Before patch:
[root@...rit cpu143]# ls
cache    crash_notes       firmware_node  online     thermal_throttle
cpufreq  crash_notes_size  microcode      power      topology
cpuidle  driver            node3          subsystem  uevent
Down a cpu
[root@...rit cpu143]# echo 0 > online
[root@...el-brickland-05 cpu143]# ls
cpuidle      crash_notes_size  firmware_node  online  subsystem
crash_notes  driver            node3          power   uevent
[root@...el-brickland-05 cpu143]# ls -l topology
ls: cannot access topology: No such file or directory
After patch:
[root@...rit cpu143]# ls
cache    crash_notes       firmware_node  online     thermal_throttle
cpufreq  crash_notes_size  microcode      power      topology
cpuidle  driver            node3          subsystem  uevent
[root@...rit cpu143]# cat topology/*
27
ffff,c0000000,000000ff,ffc00000,00000000
54-71,126-143
3
8000,00000000,00000080,00000000,00000000
71,143
Down a cpu
[root@...rit cpu143]# echo 0 > online
[root@...el-brickland-05 cpu143]# ls
cpuidle      crash_notes_size  firmware_node  online  subsystem  uevent
crash_notes  driver	       node3	      power   topology
[root@...rit cpu143]# cat topology/*
27
ffff,c0000000,000000ff,ffc00000,00000000
54-71,126-143
3
8000,00000000,00000080,00000000,00000000
71,143
I did some light testing with and without BOOTPARAM_HOTPLUG_CPU0 enabled,
and up'd and down'd CPUs in sequence, randomly, by thread group, by
socket group and didn't see any issues.
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Ingo Molnar <mingo@...hat.com>
Cc: "H. Peter Anvin" <hpa@...or.com>
Cc: x86@...nel.org
Cc: Greg Kroah-Hartman <gregkh@...uxfoundation.org>
Cc: Borislav Petkov <bp@...en8.de>
Cc: Len Brown <len.brown@...el.com>
Cc: Andy Lutomirski <luto@...capital.net>
Cc: Zhu Guihua <zhugh.fnst@...fujitsu.com>
Cc: Denys Vlasenko <dvlasenk@...hat.com>
Cc: "Jan H. Schönherr" <jschoenh@...zon.de>
Cc: Boris Ostrovsky <boris.ostrovsky@...cle.com>
Cc: Prarit Bhargava <prarit@...hat.com>
Cc: "Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
Cc: Thomas Renninger <trenn@...e.de>
Signed-off-by: Prarit Bhargava <prarit@...hat.com>
---
 arch/x86/kernel/smpboot.c |   28 --------
 drivers/base/Kconfig      |   13 ++++
 drivers/base/Makefile     |    2 +-
 drivers/base/cpu.c        |  135 +++++++++++++++++++++++++++++++++++++++
 drivers/base/topology.c   |  155 ---------------------------------------------
 5 files changed, 149 insertions(+), 184 deletions(-)
 delete mode 100644 drivers/base/topology.c
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index e0c198e..19082c7 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1322,32 +1322,6 @@ __init void prefill_possible_map(void)
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-static void remove_siblinginfo(int cpu)
-{
-	int sibling;
-	struct cpuinfo_x86 *c = &cpu_data(cpu);
-
-	for_each_cpu(sibling, topology_core_cpumask(cpu)) {
-		cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
-		/*/
-		 * last thread sibling in this cpu core going down
-		 */
-		if (cpumask_weight(topology_sibling_cpumask(cpu)) == 1)
-			cpu_data(sibling).booted_cores--;
-	}
-
-	for_each_cpu(sibling, topology_sibling_cpumask(cpu))
-		cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
-	for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
-		cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling));
-	cpumask_clear(cpu_llc_shared_mask(cpu));
-	cpumask_clear(topology_sibling_cpumask(cpu));
-	cpumask_clear(topology_core_cpumask(cpu));
-	c->phys_proc_id = 0;
-	c->cpu_core_id = 0;
-	cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
-}
-
 static void remove_cpu_from_maps(int cpu)
 {
 	set_cpu_online(cpu, false);
@@ -1362,8 +1336,6 @@ void cpu_disable_common(void)
 {
 	int cpu = smp_processor_id();
 
-	remove_siblinginfo(cpu);
-
 	/* It's now safe to remove this processor from the online map */
 	lock_vector_lock();
 	remove_cpu_from_maps(cpu);
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 98504ec..321d261 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -324,4 +324,17 @@ config CMA_ALIGNMENT
 
 endif
 
+config PERMANENT_CPU_TOPOLOGY
+	bool "Permanent CPU Topology"
+	depends on HOTPLUG_CPU
+	default 1 if X86
+	default 0
+	help
+	  This option configures CPU topology to be permanent for the lifetime
+	  of the CPU (until it is physically removed).  Selecting Y here
+	  results in the kernel reporting the physical location for offlined
+	  CPUs.
+
+	  If unsure, leave the default value as is.
+
 endmenu
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 6b2a84e..567ab7c 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -4,7 +4,7 @@ obj-y			:= component.o core.o bus.o dd.o syscore.o \
 			   driver.o class.o platform.o \
 			   cpu.o firmware.o init.o map.o devres.o \
 			   attribute_container.o transport_class.o \
-			   topology.o container.o property.o cacheinfo.o
+			   container.o property.o cacheinfo.o
 obj-$(CONFIG_DEVTMPFS)	+= devtmpfs.o
 obj-$(CONFIG_DMA_CMA) += dma-contiguous.o
 obj-y			+= power/
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 91bbb19..9c30782 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -17,6 +17,8 @@
 #include <linux/of.h>
 #include <linux/cpufeature.h>
 #include <linux/tick.h>
+#include <linux/mm.h>
+#include <linux/hardirq.h>
 
 #include "base.h"
 
@@ -175,15 +177,145 @@ static struct attribute *crash_note_cpu_attrs[] = {
 	NULL
 };
 
+#define define_id_show_func(name)				\
+static ssize_t name##_show(struct device *dev,			\
+		struct device_attribute *attr, char *buf)	\
+{								\
+	return sprintf(buf, "%d\n", topology_##name(dev->id));	\
+}
+
+#define define_siblings_show_map(name, mask)				\
+static ssize_t name##_show(struct device *dev,				\
+			   struct device_attribute *attr, char *buf)	\
+{									\
+	return cpumap_print_to_pagebuf(false, buf, topology_##mask(dev->id));\
+}
+
+#define define_siblings_show_list(name, mask)				\
+static ssize_t name##_list_show(struct device *dev,			\
+				struct device_attribute *attr,		\
+				char *buf)				\
+{									\
+	return cpumap_print_to_pagebuf(true, buf, topology_##mask(dev->id));\
+}
+
+#define define_siblings_show_func(name, mask)	\
+	define_siblings_show_map(name, mask);	\
+	define_siblings_show_list(name, mask)
+
+define_id_show_func(physical_package_id);
+static DEVICE_ATTR_RO(physical_package_id);
+
+define_id_show_func(core_id);
+static DEVICE_ATTR_RO(core_id);
+
+define_siblings_show_func(thread_siblings, sibling_cpumask);
+static DEVICE_ATTR_RO(thread_siblings);
+static DEVICE_ATTR_RO(thread_siblings_list);
+
+define_siblings_show_func(core_siblings, core_cpumask);
+static DEVICE_ATTR_RO(core_siblings);
+static DEVICE_ATTR_RO(core_siblings_list);
+
+#ifdef CONFIG_SCHED_BOOK
+define_id_show_func(book_id);
+static DEVICE_ATTR_RO(book_id);
+define_siblings_show_func(book_siblings, book_cpumask);
+static DEVICE_ATTR_RO(book_siblings);
+static DEVICE_ATTR_RO(book_siblings_list);
+#endif
+
+static struct attribute *topology_attrs[] = {
+	&dev_attr_physical_package_id.attr,
+	&dev_attr_core_id.attr,
+	&dev_attr_thread_siblings.attr,
+	&dev_attr_thread_siblings_list.attr,
+	&dev_attr_core_siblings.attr,
+	&dev_attr_core_siblings_list.attr,
+#ifdef CONFIG_SCHED_BOOK
+	&dev_attr_book_id.attr,
+	&dev_attr_book_siblings.attr,
+	&dev_attr_book_siblings_list.attr,
+#endif
+	NULL
+};
+
 static struct attribute_group crash_note_cpu_attr_group = {
 	.attrs = crash_note_cpu_attrs,
 };
 #endif
 
+static struct attribute_group topology_attr_group = {
+	.attrs = topology_attrs,
+	.name = "topology"
+};
+
+#ifndef CONFIG_PERMANENT_CPU_TOPOLOGY
+/* Add/Remove cpu_topology interface for CPU device */
+static int topology_add_dev(unsigned int cpu)
+{
+	struct device *dev = get_cpu_device(cpu);
+
+	return sysfs_create_group(&dev->kobj, &topology_attr_group);
+}
+
+static void topology_remove_dev(unsigned int cpu)
+{
+	struct device *dev = get_cpu_device(cpu);
+
+	sysfs_remove_group(&dev->kobj, &topology_attr_group);
+}
+
+static int topology_cpu_callback(struct notifier_block *nfb,
+				 unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	int rc = 0;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
+		rc = topology_add_dev(cpu);
+		break;
+	case CPU_UP_CANCELED:
+	case CPU_UP_CANCELED_FROZEN:
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		topology_remove_dev(cpu);
+		break;
+	}
+	return notifier_from_errno(rc);
+}
+
+static int topology_sysfs_init(void)
+{
+	int cpu;
+	int rc = 0;
+
+	cpu_notifier_register_begin();
+
+	for_each_online_cpu(cpu) {
+		rc = topology_add_dev(cpu);
+		if (rc)
+			goto out;
+	}
+	__hotcpu_notifier(topology_cpu_callback, 0);
+
+out:
+	cpu_notifier_register_done();
+	return rc;
+}
+
+device_initcall(topology_sysfs_init);
+#endif
+
 static const struct attribute_group *common_cpu_attr_groups[] = {
 #ifdef CONFIG_KEXEC
 	&crash_note_cpu_attr_group,
 #endif
+#ifdef CONFIG_PERMANENT_CPU_TOPOLOGY
+	&topology_attr_group,
+#endif
 	NULL
 };
 
@@ -191,6 +323,9 @@ static const struct attribute_group *hotplugable_cpu_attr_groups[] = {
 #ifdef CONFIG_KEXEC
 	&crash_note_cpu_attr_group,
 #endif
+#ifdef CONFIG_PERMANENT_CPU_TOPOLOGY
+	&topology_attr_group,
+#endif
 	NULL
 };
 
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
deleted file mode 100644
index 8b7d7f8..0000000
--- a/drivers/base/topology.c
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * driver/base/topology.c - Populate sysfs with cpu topology information
- *
- * Written by: Zhang Yanmin, Intel Corporation
- *
- * Copyright (C) 2006, Intel Corp.
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-#include <linux/mm.h>
-#include <linux/cpu.h>
-#include <linux/module.h>
-#include <linux/hardirq.h>
-#include <linux/topology.h>
-
-#define define_id_show_func(name)				\
-static ssize_t name##_show(struct device *dev,			\
-		struct device_attribute *attr, char *buf)	\
-{								\
-	return sprintf(buf, "%d\n", topology_##name(dev->id));	\
-}
-
-#define define_siblings_show_map(name, mask)				\
-static ssize_t name##_show(struct device *dev,				\
-			   struct device_attribute *attr, char *buf)	\
-{									\
-	return cpumap_print_to_pagebuf(false, buf, topology_##mask(dev->id));\
-}
-
-#define define_siblings_show_list(name, mask)				\
-static ssize_t name##_list_show(struct device *dev,			\
-				struct device_attribute *attr,		\
-				char *buf)				\
-{									\
-	return cpumap_print_to_pagebuf(true, buf, topology_##mask(dev->id));\
-}
-
-#define define_siblings_show_func(name, mask)	\
-	define_siblings_show_map(name, mask);	\
-	define_siblings_show_list(name, mask)
-
-define_id_show_func(physical_package_id);
-static DEVICE_ATTR_RO(physical_package_id);
-
-define_id_show_func(core_id);
-static DEVICE_ATTR_RO(core_id);
-
-define_siblings_show_func(thread_siblings, sibling_cpumask);
-static DEVICE_ATTR_RO(thread_siblings);
-static DEVICE_ATTR_RO(thread_siblings_list);
-
-define_siblings_show_func(core_siblings, core_cpumask);
-static DEVICE_ATTR_RO(core_siblings);
-static DEVICE_ATTR_RO(core_siblings_list);
-
-#ifdef CONFIG_SCHED_BOOK
-define_id_show_func(book_id);
-static DEVICE_ATTR_RO(book_id);
-define_siblings_show_func(book_siblings, book_cpumask);
-static DEVICE_ATTR_RO(book_siblings);
-static DEVICE_ATTR_RO(book_siblings_list);
-#endif
-
-static struct attribute *default_attrs[] = {
-	&dev_attr_physical_package_id.attr,
-	&dev_attr_core_id.attr,
-	&dev_attr_thread_siblings.attr,
-	&dev_attr_thread_siblings_list.attr,
-	&dev_attr_core_siblings.attr,
-	&dev_attr_core_siblings_list.attr,
-#ifdef CONFIG_SCHED_BOOK
-	&dev_attr_book_id.attr,
-	&dev_attr_book_siblings.attr,
-	&dev_attr_book_siblings_list.attr,
-#endif
-	NULL
-};
-
-static struct attribute_group topology_attr_group = {
-	.attrs = default_attrs,
-	.name = "topology"
-};
-
-/* Add/Remove cpu_topology interface for CPU device */
-static int topology_add_dev(unsigned int cpu)
-{
-	struct device *dev = get_cpu_device(cpu);
-
-	return sysfs_create_group(&dev->kobj, &topology_attr_group);
-}
-
-static void topology_remove_dev(unsigned int cpu)
-{
-	struct device *dev = get_cpu_device(cpu);
-
-	sysfs_remove_group(&dev->kobj, &topology_attr_group);
-}
-
-static int topology_cpu_callback(struct notifier_block *nfb,
-				 unsigned long action, void *hcpu)
-{
-	unsigned int cpu = (unsigned long)hcpu;
-	int rc = 0;
-
-	switch (action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		rc = topology_add_dev(cpu);
-		break;
-	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		topology_remove_dev(cpu);
-		break;
-	}
-	return notifier_from_errno(rc);
-}
-
-static int topology_sysfs_init(void)
-{
-	int cpu;
-	int rc = 0;
-
-	cpu_notifier_register_begin();
-
-	for_each_online_cpu(cpu) {
-		rc = topology_add_dev(cpu);
-		if (rc)
-			goto out;
-	}
-	__hotcpu_notifier(topology_cpu_callback, 0);
-
-out:
-	cpu_notifier_register_done();
-	return rc;
-}
-
-device_initcall(topology_sysfs_init);
-- 
1.7.9.3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Powered by blists - more mailing lists
 
