[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20100209083236.GE29988@linux.vnet.ibm.com>
Date: Tue, 9 Feb 2010 14:02:36 +0530
From: Arun R Bharadwaj <arun@...ux.vnet.ibm.com>
To: Peter Zijlstra <a.p.zijlstra@...llo.nl>,
Benjamin Herrenschmidt <benh@...nel.crashing.org>,
Ingo Molnar <mingo@...e.hu>,
Vaidyanathan Srinivasan <svaidy@...ux.vnet.ibm.com>,
Dipankar Sarma <dipankar@...ibm.com>,
Balbir Singh <balbir@...ibm.com>,
Venkatesh Pallipadi <venkatesh.pallipadi@...el.com>
Cc: linux-kernel@...r.kernel.org,
Arun Bharadwaj <arun@...ux.vnet.ibm.com>
Subject: [v11 PATCH 4/9]: x86: refactor x86 idle power management code,
remove all instances of pm_idle
* Arun R Bharadwaj <arun@...ux.vnet.ibm.com> [2010-02-09 13:58:16]:
This patch cleans up x86 of all instances of pm_idle.
pm_idle which was earlier called from cpu_idle() idle loop
is replaced by cpuidle_idle_call.
x86 also registers to cpuidle when the idle routine is selected,
by populating the cpuidle_device data structure for each cpu.
This is replicated for apm module and for xen, which also used pm_idle.
Signed-off-by: Arun R Bharadwaj <arun@...ux.vnet.ibm.com>
---
arch/x86/kernel/apm_32.c | 45 ++++++++++++++++++++++--
arch/x86/kernel/process.c | 78 +++++++++++++++++++++++++++++++-----------
arch/x86/kernel/process_32.c | 3 +
arch/x86/kernel/process_64.c | 3 +
arch/x86/xen/setup.c | 29 +++++++++++++++
drivers/acpi/processor_core.c | 8 ++--
drivers/acpi/processor_idle.c | 44 ++++++++++-------------
7 files changed, 157 insertions(+), 53 deletions(-)
Index: linux.trees.git/arch/x86/kernel/process.c
===================================================================
--- linux.trees.git.orig/arch/x86/kernel/process.c
+++ linux.trees.git/arch/x86/kernel/process.c
@@ -14,6 +14,8 @@
#include <linux/utsname.h>
#include <trace/events/power.h>
#include <linux/hw_breakpoint.h>
+#include <linux/cpuidle.h>
+
#include <asm/system.h>
#include <asm/apic.h>
#include <asm/syscalls.h>
@@ -329,12 +331,6 @@ long sys_execve(char __user *name, char
unsigned long boot_option_idle_override = 0;
EXPORT_SYMBOL(boot_option_idle_override);
-/*
- * Powermanagement idle function, if any..
- */
-void (*pm_idle)(void);
-EXPORT_SYMBOL(pm_idle);
-
#ifdef CONFIG_X86_32
/*
* This halt magic was a workaround for ancient floppy DMA
@@ -414,17 +410,15 @@ static void do_nothing(void *unused)
}
/*
- * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
- * pm_idle and update to new pm_idle value. Required while changing pm_idle
- * handler on SMP systems.
+ * cpu_idle_wait - Required while changing idle routine handler on SMP systems.
*
- * Caller must have changed pm_idle to the new value before the call. Old
- * pm_idle value will not be used by any CPU after the return of this function.
+ * Caller must have changed idle routine to the new value before the call. Old
+ * value will not be used by any CPU after the return of this function.
*/
void cpu_idle_wait(void)
{
smp_mb();
- /* kick all the CPUs so that they exit out of pm_idle */
+ /* kick all the CPUs so that they exit out of idle loop */
smp_call_function(do_nothing, NULL, 1);
}
EXPORT_SYMBOL_GPL(cpu_idle_wait);
@@ -603,15 +597,57 @@ static void c1e_idle(void)
default_idle();
}
+static void (*local_idle)(void);
+
+#ifndef CONFIG_CPU_IDLE
+void cpuidle_idle_call(void)
+{
+ if (local_idle)
+ local_idle();
+ else
+ default_idle();
+}
+#endif
+
+DEFINE_PER_CPU(struct cpuidle_device, idle_devices);
+
+struct cpuidle_driver cpuidle_default_driver = {
+ .name = "cpuidle_default",
+};
+
+static void local_idle_loop(struct cpuidle_device *dev,
+ struct cpuidle_state *st)
+{
+ local_idle();
+}
+
+static int setup_cpuidle_simple(void)
+{
+ struct cpuidle_device *dev;
+ int cpu;
+
+ cpuidle_register_driver(&cpuidle_default_driver);
+
+ for_each_online_cpu(cpu) {
+ dev = &per_cpu(idle_devices, cpu);
+ dev->cpu = cpu;
+ dev->states[0].enter = local_idle_loop;
+ dev->state_count = 1;
+ cpuidle_register_device(dev);
+ }
+ return 0;
+}
+device_initcall(setup_cpuidle_simple);
+
void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
- if (pm_idle == poll_idle && smp_num_siblings > 1) {
+ if (local_idle == poll_idle && smp_num_siblings > 1) {
printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
" performance may degrade.\n");
}
#endif
- if (pm_idle)
+ if (local_idle)
return;
if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
@@ -619,18 +655,20 @@ void __cpuinit select_idle_routine(const
* One CPU supports mwait => All CPUs supports mwait
*/
printk(KERN_INFO "using mwait in idle threads.\n");
- pm_idle = mwait_idle;
+ local_idle = mwait_idle;
} else if (check_c1e_idle(c)) {
printk(KERN_INFO "using C1E aware idle routine\n");
- pm_idle = c1e_idle;
+ local_idle = c1e_idle;
} else
- pm_idle = default_idle;
+ local_idle = default_idle;
+
+ return;
}
void __init init_c1e_mask(void)
{
/* If we're using c1e_idle, we need to allocate c1e_mask. */
- if (pm_idle == c1e_idle)
+ if (local_idle == c1e_idle)
zalloc_cpumask_var(&c1e_mask, GFP_KERNEL);
}
@@ -641,7 +679,7 @@ static int __init idle_setup(char *str)
if (!strcmp(str, "poll")) {
printk("using polling idle threads.\n");
- pm_idle = poll_idle;
+ local_idle = poll_idle;
} else if (!strcmp(str, "mwait"))
force_mwait = 1;
else if (!strcmp(str, "halt")) {
@@ -652,7 +690,7 @@ static int __init idle_setup(char *str)
* To continue to load the CPU idle driver, don't touch
* the boot_option_idle_override.
*/
- pm_idle = default_idle;
+ local_idle = default_idle;
idle_halt = 1;
return 0;
} else if (!strcmp(str, "nomwait")) {
Index: linux.trees.git/arch/x86/kernel/process_32.c
===================================================================
--- linux.trees.git.orig/arch/x86/kernel/process_32.c
+++ linux.trees.git/arch/x86/kernel/process_32.c
@@ -38,6 +38,7 @@
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/kdebug.h>
+#include <linux/cpuidle.h>
#include <asm/pgtable.h>
#include <asm/system.h>
@@ -110,7 +111,7 @@ void cpu_idle(void)
local_irq_disable();
/* Don't trace irqs off for idle */
stop_critical_timings();
- pm_idle();
+ cpuidle_idle_call();
start_critical_timings();
}
tick_nohz_restart_sched_tick();
Index: linux.trees.git/arch/x86/kernel/process_64.c
===================================================================
--- linux.trees.git.orig/arch/x86/kernel/process_64.c
+++ linux.trees.git/arch/x86/kernel/process_64.c
@@ -37,6 +37,7 @@
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/ftrace.h>
+#include <linux/cpuidle.h>
#include <asm/pgtable.h>
#include <asm/system.h>
@@ -137,7 +138,7 @@ void cpu_idle(void)
enter_idle();
/* Don't trace irqs off for idle */
stop_critical_timings();
- pm_idle();
+ cpuidle_idle_call();
start_critical_timings();
/* In many cases the interrupt that ended idle
has already called exit_idle. But some idle
Index: linux.trees.git/arch/x86/kernel/apm_32.c
===================================================================
--- linux.trees.git.orig/arch/x86/kernel/apm_32.c
+++ linux.trees.git/arch/x86/kernel/apm_32.c
@@ -227,6 +227,7 @@
#include <linux/suspend.h>
#include <linux/kthread.h>
#include <linux/jiffies.h>
+#include <linux/cpuidle.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -2255,6 +2256,45 @@ static struct dmi_system_id __initdata a
{ }
};
+DEFINE_PER_CPU(struct cpuidle_device, apm_idle_devices);
+
+struct cpuidle_driver cpuidle_apm_driver = {
+ .name = "cpuidle_apm",
+};
+
+static void apm_idle_loop(struct cpuidle_device *dev, struct cpuidle_state *st)
+{
+ apm_cpu_idle();
+}
+
+static void setup_cpuidle_apm(void)
+{
+ struct cpuidle_device *dev;
+ int cpu;
+
+ cpuidle_register_driver(&cpuidle_apm_driver);
+
+ for_each_online_cpu(cpu) {
+ dev = &per_cpu(apm_idle_devices, cpu);
+ dev->cpu = cpu;
+ dev->states[0].enter = apm_idle_loop;
+ dev->state_count = 1;
+ cpuidle_register_device(dev);
+ }
+}
+
+void exit_cpuidle_apm(void)
+{
+ struct cpuidle_device *dev;
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ dev = &per_cpu(apm_idle_devices, cpu);
+ cpuidle_unregister_device(dev);
+ }
+}
+
+
/*
* Just start the APM thread. We do NOT want to do APM BIOS
* calls from anything but the APM thread, if for no other reason
@@ -2392,8 +2432,7 @@ static int __init apm_init(void)
if (HZ != 100)
idle_period = (idle_period * HZ) / 100;
if (idle_threshold < 100) {
- original_pm_idle = pm_idle;
- pm_idle = apm_cpu_idle;
+ setup_cpuidle_apm();
set_pm_idle = 1;
}
@@ -2405,7 +2444,7 @@ static void __exit apm_exit(void)
int error;
if (set_pm_idle) {
- pm_idle = original_pm_idle;
+ exit_cpuidle_apm();
/*
* We are about to unload the current idle thread pm callback
* (pm_idle), Wait for all processors to update cached/local
Index: linux.trees.git/arch/x86/xen/setup.c
===================================================================
--- linux.trees.git.orig/arch/x86/xen/setup.c
+++ linux.trees.git/arch/x86/xen/setup.c
@@ -8,6 +8,7 @@
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/pm.h>
+#include <linux/cpuidle.h>
#include <asm/elf.h>
#include <asm/vdso.h>
@@ -151,6 +152,32 @@ void __cpuinit xen_enable_syscall(void)
#endif /* CONFIG_X86_64 */
}
+DEFINE_PER_CPU(struct cpuidle_device, xen_idle_devices);
+struct cpuidle_driver cpuidle_xen_driver = {
+ .name = "cpuidle_xen",
+};
+
+static void xen_idle_loop(struct cpuidle_device *dev, struct cpuidle_state *st)
+{
+ xen_idle();
+}
+
+static void setup_cpuidle_xen(void)
+{
+ struct cpuidle_device *dev;
+ int cpu;
+
+ cpuidle_register_driver(&cpuidle_xen_driver);
+
+ for_each_online_cpu(cpu) {
+ dev = &per_cpu(xen_idle_devices, cpu);
+ dev->cpu = cpu;
+ dev->states[0].enter = xen_idle_loop;
+ dev->state_count = 1;
+ cpuidle_register_device(dev);
+ }
+}
+
void __init xen_arch_setup(void)
{
struct physdev_set_iopl set_iopl;
@@ -186,7 +213,7 @@ void __init xen_arch_setup(void)
MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ?
COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
- pm_idle = xen_idle;
+ setup_cpuidle_xen();
paravirt_disable_iospace();
Index: linux.trees.git/drivers/acpi/processor_idle.c
===================================================================
--- linux.trees.git.orig/drivers/acpi/processor_idle.c
+++ linux.trees.git/drivers/acpi/processor_idle.c
@@ -814,18 +814,16 @@ static inline void acpi_idle_do_entry(st
*
* This is equivalent to the HALT instruction.
*/
-static int acpi_idle_enter_c1(struct cpuidle_device *dev,
+static void acpi_idle_enter_c1(struct cpuidle_device *dev,
struct cpuidle_state *state)
{
- ktime_t kt1, kt2;
- s64 idle_time;
struct acpi_processor *pr;
struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
pr = __get_cpu_var(processors);
if (unlikely(!pr))
- return 0;
+ return;
local_irq_disable();
@@ -833,20 +831,15 @@ static int acpi_idle_enter_c1(struct cpu
if (acpi_idle_suspend) {
local_irq_enable();
cpu_relax();
- return 0;
+ return;
}
lapic_timer_state_broadcast(pr, cx, 1);
- kt1 = ktime_get_real();
acpi_idle_do_entry(cx);
- kt2 = ktime_get_real();
- idle_time = ktime_to_us(ktime_sub(kt2, kt1));
local_irq_enable();
cx->usage++;
lapic_timer_state_broadcast(pr, cx, 0);
-
- return idle_time;
}
/**
@@ -854,7 +847,7 @@ static int acpi_idle_enter_c1(struct cpu
* @dev: the target CPU
* @state: the state data
*/
-static int acpi_idle_enter_simple(struct cpuidle_device *dev,
+static void acpi_idle_enter_simple(struct cpuidle_device *dev,
struct cpuidle_state *state)
{
struct acpi_processor *pr;
@@ -866,10 +859,12 @@ static int acpi_idle_enter_simple(struct
pr = __get_cpu_var(processors);
if (unlikely(!pr))
- return 0;
+ return;
- if (acpi_idle_suspend)
- return(acpi_idle_enter_c1(dev, state));
+ if (acpi_idle_suspend) {
+ acpi_idle_enter_c1(dev, state);
+ return;
+ }
local_irq_disable();
current_thread_info()->status &= ~TS_POLLING;
@@ -882,7 +877,7 @@ static int acpi_idle_enter_simple(struct
if (unlikely(need_resched())) {
current_thread_info()->status |= TS_POLLING;
local_irq_enable();
- return 0;
+ return;
}
/*
@@ -913,7 +908,6 @@ static int acpi_idle_enter_simple(struct
lapic_timer_state_broadcast(pr, cx, 0);
cx->time += sleep_ticks;
- return idle_time;
}
static int c3_cpu_count;
@@ -926,7 +920,7 @@ static DEFINE_SPINLOCK(c3_lock);
*
* If BM is detected, the deepest non-C3 idle state is entered instead.
*/
-static int acpi_idle_enter_bm(struct cpuidle_device *dev,
+static void acpi_idle_enter_bm(struct cpuidle_device *dev,
struct cpuidle_state *state)
{
struct acpi_processor *pr;
@@ -939,20 +933,23 @@ static int acpi_idle_enter_bm(struct cpu
pr = __get_cpu_var(processors);
if (unlikely(!pr))
- return 0;
+ return;
- if (acpi_idle_suspend)
- return(acpi_idle_enter_c1(dev, state));
+ if (acpi_idle_suspend) {
+ acpi_idle_enter_c1(dev, state);
+ return;
+ }
if (acpi_idle_bm_check()) {
if (dev->safe_state) {
dev->last_state = dev->safe_state;
- return dev->safe_state->enter(dev, dev->safe_state);
+ dev->safe_state->enter(dev, dev->safe_state);
+ return;
} else {
local_irq_disable();
acpi_safe_halt();
local_irq_enable();
- return 0;
+ return;
}
}
@@ -967,7 +964,7 @@ static int acpi_idle_enter_bm(struct cpu
if (unlikely(need_resched())) {
current_thread_info()->status |= TS_POLLING;
local_irq_enable();
- return 0;
+ return;
}
acpi_unlazy_tlb(smp_processor_id());
@@ -1025,7 +1022,6 @@ static int acpi_idle_enter_bm(struct cpu
lapic_timer_state_broadcast(pr, cx, 0);
cx->time += sleep_ticks;
- return idle_time;
}
struct cpuidle_driver acpi_idle_driver = {
Index: linux.trees.git/drivers/acpi/processor_core.c
===================================================================
--- linux.trees.git.orig/drivers/acpi/processor_core.c
+++ linux.trees.git/drivers/acpi/processor_core.c
@@ -1081,9 +1081,11 @@ static int __init acpi_processor_init(vo
if (!acpi_processor_dir)
return -ENOMEM;
#endif
- result = cpuidle_register_driver(&acpi_idle_driver);
- if (result < 0)
- goto out_proc;
+ if (!boot_option_idle_override) {
+ result = cpuidle_register_driver(&acpi_idle_driver);
+ if (result < 0)
+ goto out_proc;
+ }
result = acpi_bus_register_driver(&acpi_processor_driver);
if (result < 0)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists