Paravirtualize the idle loop to explicitly trap to the hypervisor when blocking, and to use the NO_IDLE_HZ functionality introduced by s390 to inform the rcu subsystem that the CPU is quiescent. Signed-off-by: Ian Pratt Signed-off-by: Christian Limpach Signed-off-by: Chris Wright Signed-off-by: Jeremy Fitzhardinge --- arch/i386/mach-xen/setup-xen.c | 59 ++++++++++++++++++++++++++++++++++ drivers/xen/Kconfig | 8 ++++ drivers/xen/core/time.c | 21 ++++++++++++ include/asm-i386/mach-xen/mach_time.h | 3 + 4 files changed, 91 insertions(+) diff -r 793d8e45fb1e arch/i386/mach-xen/setup-xen.c --- a/arch/i386/mach-xen/setup-xen.c Tue Jul 18 03:41:42 2006 -0400 +++ b/arch/i386/mach-xen/setup-xen.c Tue Jul 18 03:43:53 2006 -0400 @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -15,6 +16,9 @@ #include +#include "mach_time.h" + +static void xen_idle(void); struct start_info *xen_start_info; EXPORT_SYMBOL(xen_start_info); @@ -82,4 +86,59 @@ void __init machine_specific_arch_setup( initrd_reserve_bootmem = 0; initrd_below_start_ok = 1; } + + + pm_idle = xen_idle; } + +/* + * stop_hz_timer / start_hz_timer - enter/exit 'tickless mode' on an idle cpu + * These functions are based on implementations from arch/s390/kernel/time.c + */ +static void stop_hz_timer(void) +{ + unsigned int cpu = smp_processor_id(); + unsigned long j; + + cpu_set(cpu, nohz_cpu_mask); + + /* See matching smp_mb in rcu_start_batch in rcupdate.c. These mbs */ + /* ensure that if __rcu_pending (nested in rcu_needs_cpu) fetches a */ + /* value of rcp->cur that matches rdp->quiescbatch and allows us to */ + /* stop the hz timer then the cpumasks created for subsequent values */ + /* of cur in rcu_start_batch are guaranteed to pick up the updated */ + /* nohz_cpu_mask and so will not depend on this cpu. */ + + smp_mb(); + + /* Leave ourselves in 'tick mode' if rcu or softirq or timer pending. */ + if (rcu_needs_cpu(cpu) || local_softirq_pending() || + (j = next_timer_interrupt(), time_before_eq(j, jiffies))) { + cpu_clear(cpu, nohz_cpu_mask); + j = jiffies + 1; + } + + BUG_ON(HYPERVISOR_set_timer_op(jiffies_to_st(j)) != 0); +} + +static void start_hz_timer(void) +{ + cpu_clear(smp_processor_id(), nohz_cpu_mask); +} + +static void xen_idle(void) +{ + local_irq_disable(); + + if (need_resched()) + local_irq_enable(); + else { + current_thread_info()->status &= ~TS_POLLING; + smp_mb__after_clear_bit(); + stop_hz_timer(); + /* Blocking includes an implicit local_irq_enable(). */ + HYPERVISOR_sched_op(SCHEDOP_block, 0); + start_hz_timer(); + current_thread_info()->status |= TS_POLLING; + } +} diff -r 793d8e45fb1e drivers/xen/Kconfig --- a/drivers/xen/Kconfig Tue Jul 18 03:41:42 2006 -0400 +++ b/drivers/xen/Kconfig Tue Jul 18 03:43:53 2006 -0400 @@ -12,6 +12,14 @@ config XEN if XEN +config NO_IDLE_HZ + bool + default y + help + Switches the regular HZ timer off when the system is going idle. + This helps Xen to detect that the Linux system is idle, reducing + the overhead of idle systems. + config XEN_SHADOW_MODE bool default y diff -r 793d8e45fb1e drivers/xen/core/time.c --- a/drivers/xen/core/time.c Tue Jul 18 03:41:42 2006 -0400 +++ b/drivers/xen/core/time.c Tue Jul 18 03:43:53 2006 -0400 @@ -227,6 +227,27 @@ void do_timer_interrupt_hook(struct pt_r update_process_times(user_mode_vm(regs)); } + +/* Convert jiffies to Xen system time. */ +u64 jiffies_to_st(unsigned long j) +{ + unsigned long seq; + long delta; + u64 st; + + do { + seq = read_seqbegin(&xtime_lock); + delta = j - jiffies; + /* NB. The next check can trigger in some wrap-around cases, + * but that's ok: we'll just end up with a shorter timeout. */ + if (delta < 1) + delta = 1; + st = processed_system_time + (delta * (u64)NS_PER_TICK); + } while (read_seqretry(&xtime_lock, seq)); + + return st; +} + static cycle_t xen_clocksource_read(void) { struct shadow_time_info *shadow = &per_cpu(shadow_time, smp_processor_id()); diff -r 793d8e45fb1e include/asm-i386/mach-xen/mach_time.h --- a/include/asm-i386/mach-xen/mach_time.h Tue Jul 18 03:41:42 2006 -0400 +++ b/include/asm-i386/mach-xen/mach_time.h Tue Jul 18 03:43:53 2006 -0400 @@ -13,4 +13,7 @@ int mach_set_rtc_mmss(unsigned long nowt int mach_set_rtc_mmss(unsigned long nowtime); unsigned long mach_get_cmos_time(void); +/* Convert jiffies to Xen system time. */ +u64 jiffies_to_st(unsigned long j); + #endif /* !_MACH_TIME_H */ -- - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/