commit bdf333e0ebb4bd33ea20ee1825314381681208b0 Author: Peter Zijlstra Date: Thu Feb 13 15:50:16 2014 +0100 peterz's IPI patch, rebased by Andy to -linus diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 1da25a5..d6a7b7c 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -42,18 +42,15 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) */ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) { - if (!current_set_polling_and_test()) { - if (static_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) { - mb(); - clflush((void *)¤t_thread_info()->flags); - mb(); - } - - __monitor((void *)¤t_thread_info()->flags, 0, 0); - if (!need_resched()) - __mwait(eax, ecx); + if (static_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) { + mb(); + clflush((void *)¤t_thread_info()->flags); + mb(); } - current_clr_polling(); + + __monitor((void *)¤t_thread_info()->flags, 0, 0); + if (!need_resched()) + __mwait(eax, ecx); } #endif /* _ASM_X86_MWAIT_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e1940c0..1f2fe57 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -88,6 +88,7 @@ struct thread_info { #define TIF_FORK 18 /* ret_from_fork */ #define TIF_NOHZ 19 /* in adaptive nohz mode */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ +#define TIF_POLLING_NRFLAG 21 #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ #define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ @@ -111,6 +112,7 @@ struct thread_info { #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) #define _TIF_NOHZ (1 << TIF_NOHZ) +#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) #define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) @@ -234,8 +236,6 @@ static inline struct thread_info *current_thread_info(void) * have to worry about atomic accesses. */ #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ -#define TS_POLLING 0x0004 /* idle task polling need_resched, - skip sending interrupt */ #define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ #ifndef __ASSEMBLY__ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 3fb8d95..4939eb7 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -309,9 +309,11 @@ void arch_cpu_idle(void) */ void default_idle(void) { - trace_cpu_idle_rcuidle(1, smp_processor_id()); - safe_halt(); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); + if (!current_clr_polling_and_test()) { + trace_cpu_idle_rcuidle(1, smp_processor_id()); + safe_halt(); + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); + } } #ifdef CONFIG_APM_MODULE EXPORT_SYMBOL(default_idle); diff --git a/include/linux/sched.h b/include/linux/sched.h index a781dec..cae2249 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2667,85 +2667,26 @@ static inline int spin_needbreak(spinlock_t *lock) * thread_info.status and one based on TIF_POLLING_NRFLAG in * thread_info.flags */ -#ifdef TS_POLLING -static inline int tsk_is_polling(struct task_struct *p) -{ - return task_thread_info(p)->status & TS_POLLING; -} -static inline void __current_set_polling(void) -{ - current_thread_info()->status |= TS_POLLING; -} - -static inline bool __must_check current_set_polling_and_test(void) -{ - __current_set_polling(); - - /* - * Polling state must be visible before we test NEED_RESCHED, - * paired by resched_task() - */ - smp_mb(); - - return unlikely(tif_need_resched()); -} - -static inline void __current_clr_polling(void) -{ - current_thread_info()->status &= ~TS_POLLING; -} - -static inline bool __must_check current_clr_polling_and_test(void) -{ - __current_clr_polling(); - - /* - * Polling state must be visible before we test NEED_RESCHED, - * paired by resched_task() - */ - smp_mb(); - - return unlikely(tif_need_resched()); -} -#elif defined(TIF_POLLING_NRFLAG) +#ifdef CONFIG_SMP static inline int tsk_is_polling(struct task_struct *p) { return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); } -static inline void __current_set_polling(void) +static inline void current_set_polling(void) { set_thread_flag(TIF_POLLING_NRFLAG); } -static inline bool __must_check current_set_polling_and_test(void) -{ - __current_set_polling(); - - /* - * Polling state must be visible before we test NEED_RESCHED, - * paired by resched_task() - * - * XXX: assumes set/clear bit are identical barrier wise. - */ - smp_mb__after_clear_bit(); - - return unlikely(tif_need_resched()); -} - -static inline void __current_clr_polling(void) +static inline void current_clr_polling(void) { clear_thread_flag(TIF_POLLING_NRFLAG); } static inline bool __must_check current_clr_polling_and_test(void) { - __current_clr_polling(); + current_clr_polling(); - /* - * Polling state must be visible before we test NEED_RESCHED, - * paired by resched_task() - */ smp_mb__after_clear_bit(); return unlikely(tif_need_resched()); @@ -2753,34 +2694,15 @@ static inline bool __must_check current_clr_polling_and_test(void) #else static inline int tsk_is_polling(struct task_struct *p) { return 0; } -static inline void __current_set_polling(void) { } -static inline void __current_clr_polling(void) { } +static inline void current_set_polling(void) { } +static inline void current_clr_polling(void) { } -static inline bool __must_check current_set_polling_and_test(void) -{ - return unlikely(tif_need_resched()); -} static inline bool __must_check current_clr_polling_and_test(void) { return unlikely(tif_need_resched()); } #endif -static inline void current_clr_polling(void) -{ - __current_clr_polling(); - - /* - * Ensure we check TIF_NEED_RESCHED after we clear the polling bit. - * Once the bit is cleared, we'll get IPIs with every new - * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also - * fold. - */ - smp_mb(); /* paired with resched_task() */ - - preempt_fold_need_resched(); -} - static __always_inline bool need_resched(void) { return unlikely(tif_need_resched()); diff --git a/kernel/cpu/idle.c b/kernel/cpu/idle.c index 277f494..88d2b40 100644 --- a/kernel/cpu/idle.c +++ b/kernel/cpu/idle.c @@ -10,6 +10,7 @@ #include #include +#include "../sched/sched.h" static int __read_mostly cpu_idle_force_poll; @@ -70,6 +71,8 @@ static void cpu_idle_loop(void) while (1) { tick_nohz_idle_enter(); + current_set_polling(); + while (!need_resched()) { check_pgt_cache(); rmb(); @@ -92,30 +95,25 @@ static void cpu_idle_loop(void) if (cpu_idle_force_poll || tick_check_broadcast_expired()) { cpu_idle_poll(); } else { - if (!current_clr_polling_and_test()) { - stop_critical_timings(); - rcu_idle_enter(); - arch_cpu_idle(); - WARN_ON_ONCE(irqs_disabled()); - rcu_idle_exit(); - start_critical_timings(); - } else { - local_irq_enable(); - } - __current_set_polling(); + stop_critical_timings(); + rcu_idle_enter(); + arch_cpu_idle(); + WARN_ON_ONCE(irqs_disabled()); + rcu_idle_exit(); + start_critical_timings(); } arch_cpu_idle_exit(); } + current_clr_polling(); + /* - * Since we fell out of the loop above, we know - * TIF_NEED_RESCHED must be set, propagate it into - * PREEMPT_NEED_RESCHED. - * - * This is required because for polling idle loops we will - * not have had an IPI to fold the state for us. + * We need to test and propagate the TIF_NEED_RESCHED bit here + * because we might not have send the reschedule IPI to idle + * tasks. */ - preempt_set_need_resched(); + set_preempt_need_resched(); + sched_ttwu_pending(); tick_nohz_idle_exit(); schedule_preempt_disabled(); } @@ -138,7 +136,6 @@ void cpu_startup_entry(enum cpuhp_state state) */ boot_init_stack_canary(); #endif - __current_set_polling(); arch_cpu_idle_prepare(); cpu_idle_loop(); } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b46131e..4b5f168 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -504,45 +504,63 @@ static inline void init_hrtick(void) } #endif /* CONFIG_SCHED_HRTICK */ -/* - * resched_task - mark a task 'to be rescheduled now'. - * - * On UP this means the setting of the need_resched flag, on SMP it - * might also involve a cross-CPU call to trigger the scheduler on - * the target CPU. - */ -void resched_task(struct task_struct *p) +static void __resched_cpu(int cpu) { - int cpu; + struct rq *rq = cpu_rq(cpu); + struct task_struct *p; + struct thread_info *ti; + unsigned long val, old, new; + bool ipi; - lockdep_assert_held(&task_rq(p)->lock); + rcu_read_lock(); + do { + p = ACCESS_ONCE(rq->curr); + ti = task_thread_info(p); + + val = ti->flags; + for (;;) { + new = val | _TIF_NEED_RESCHED; + old = cmpxchg(&ti->flags, val, new); + if (old == val) + break; + val = old; + } - if (test_tsk_need_resched(p)) - return; + ipi = !(old & _TIF_POLLING_NRFLAG); - set_tsk_need_resched(p); + } while (p != ACCESS_ONCE(rq->curr)); + rcu_read_unlock(); - cpu = task_cpu(p); + if (ipi) + smp_send_reschedule(cpu); +} + +void resched_cpu(int cpu) +{ if (cpu == smp_processor_id()) { + set_tsk_need_resched(current); set_preempt_need_resched(); return; } - /* NEED_RESCHED must be visible before we test polling */ - smp_mb(); - if (!tsk_is_polling(p)) - smp_send_reschedule(cpu); + __resched_cpu(cpu); } -void resched_cpu(int cpu) +/* + * resched_task - mark a task 'to be rescheduled now'. + * + * On UP this means the setting of the need_resched flag, on SMP it + * might also involve a cross-CPU call to trigger the scheduler on + * the target CPU. + */ +void resched_task(struct task_struct *p) { - struct rq *rq = cpu_rq(cpu); - unsigned long flags; + lockdep_assert_held(&task_rq(p)->lock); - if (!raw_spin_trylock_irqsave(&rq->lock, flags)) + if (test_tsk_need_resched(p)) return; - resched_task(cpu_curr(cpu)); - raw_spin_unlock_irqrestore(&rq->lock, flags); + + resched_cpu(task_cpu(p)); } #ifdef CONFIG_SMP @@ -1476,7 +1494,7 @@ static int ttwu_remote(struct task_struct *p, int wake_flags) } #ifdef CONFIG_SMP -static void sched_ttwu_pending(void) +void sched_ttwu_pending(void) { struct rq *rq = this_rq(); struct llist_node *llist = llist_del_all(&rq->wake_list); @@ -2707,7 +2725,7 @@ need_resched: put_prev_task(rq, prev); next = pick_next_task(rq); - clear_tsk_need_resched(prev); + clear_tsk_need_resched(next); clear_preempt_need_resched(); rq->skip_clock_update = 0; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c2119fd..f4cb63a 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1173,6 +1173,7 @@ extern const struct sched_class idle_sched_class; #ifdef CONFIG_SMP +extern void sched_ttwu_pending(void); extern void update_group_power(struct sched_domain *sd, int cpu); extern void trigger_load_balance(struct rq *rq); @@ -1183,6 +1184,8 @@ extern void idle_exit_fair(struct rq *this_rq); #else /* CONFIG_SMP */ +static inline void sched_ttwu_pending(void) { } + static inline void idle_balance(int cpu, struct rq *rq) { }