Its a rather large function, inline doesn't seems to make much sense. $ size defconfig-build/kernel/sched/core.o{.orig,} text data bss dec hex filename 56533 21037 2320 79890 13812 defconfig-build/kernel/sched/core.o.orig 55733 21037 2320 79090 134f2 defconfig-build/kernel/sched/core.o $ for i in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor ; do echo performance > $i ; done $ perf stat --null --repeat 25 -- perf bench sched messaging -g 40 -l 5000 pre: 4.582798193 seconds time elapsed ( +- 1.41% ) 4.733374877 seconds time elapsed ( +- 2.10% ) 4.560955136 seconds time elapsed ( +- 1.43% ) 4.631062303 seconds time elapsed ( +- 1.40% ) post: 4.364765213 seconds time elapsed ( +- 0.91% ) 4.454442734 seconds time elapsed ( +- 1.18% ) 4.448893817 seconds time elapsed ( +- 1.41% ) 4.424346872 seconds time elapsed ( +- 0.97% ) Signed-off-by: Peter Zijlstra (Intel) --- --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -170,6 +170,71 @@ static struct rq *this_rq_lock(void) return rq; } +/* + * __task_rq_lock - lock the rq @p resides on. + */ +struct rq *__task_rq_lock(struct task_struct *p) + __acquires(rq->lock) +{ + struct rq *rq; + + lockdep_assert_held(&p->pi_lock); + + for (;;) { + rq = task_rq(p); + raw_spin_lock(&rq->lock); + if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) { + lockdep_pin_lock(&rq->lock); + return rq; + } + raw_spin_unlock(&rq->lock); + + while (unlikely(task_on_rq_migrating(p))) + cpu_relax(); + } +} + +/* + * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. + */ +struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) + __acquires(p->pi_lock) + __acquires(rq->lock) +{ + struct rq *rq; + + for (;;) { + raw_spin_lock_irqsave(&p->pi_lock, *flags); + rq = task_rq(p); + raw_spin_lock(&rq->lock); + /* + * move_queued_task() task_rq_lock() + * + * ACQUIRE (rq->lock) + * [S] ->on_rq = MIGRATING [L] rq = task_rq() + * WMB (__set_task_cpu()) ACQUIRE (rq->lock); + * [S] ->cpu = new_cpu [L] task_rq() + * [L] ->on_rq + * RELEASE (rq->lock) + * + * If we observe the old cpu in task_rq_lock, the acquire of + * the old rq->lock will fully serialize against the stores. + * + * If we observe the new cpu in task_rq_lock, the acquire will + * pair with the WMB to ensure we must then also see migrating. + */ + if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) { + lockdep_pin_lock(&rq->lock); + return rq; + } + raw_spin_unlock(&rq->lock); + raw_spin_unlock_irqrestore(&p->pi_lock, *flags); + + while (unlikely(task_on_rq_migrating(p))) + cpu_relax(); + } +} + #ifdef CONFIG_SCHED_HRTICK /* * Use HR-timers to deliver accurate preemption points. --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1451,70 +1451,11 @@ static inline void sched_rt_avg_update(s static inline void sched_avg_update(struct rq *rq) { } #endif -/* - * __task_rq_lock - lock the rq @p resides on. - */ -static inline struct rq *__task_rq_lock(struct task_struct *p) - __acquires(rq->lock) -{ - struct rq *rq; - - lockdep_assert_held(&p->pi_lock); - - for (;;) { - rq = task_rq(p); - raw_spin_lock(&rq->lock); - if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) { - lockdep_pin_lock(&rq->lock); - return rq; - } - raw_spin_unlock(&rq->lock); - - while (unlikely(task_on_rq_migrating(p))) - cpu_relax(); - } -} - -/* - * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. - */ -static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) +struct rq *__task_rq_lock(struct task_struct *p) + __acquires(rq->lock); +struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) __acquires(p->pi_lock) - __acquires(rq->lock) -{ - struct rq *rq; - - for (;;) { - raw_spin_lock_irqsave(&p->pi_lock, *flags); - rq = task_rq(p); - raw_spin_lock(&rq->lock); - /* - * move_queued_task() task_rq_lock() - * - * ACQUIRE (rq->lock) - * [S] ->on_rq = MIGRATING [L] rq = task_rq() - * WMB (__set_task_cpu()) ACQUIRE (rq->lock); - * [S] ->cpu = new_cpu [L] task_rq() - * [L] ->on_rq - * RELEASE (rq->lock) - * - * If we observe the old cpu in task_rq_lock, the acquire of - * the old rq->lock will fully serialize against the stores. - * - * If we observe the new cpu in task_rq_lock, the acquire will - * pair with the WMB to ensure we must then also see migrating. - */ - if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) { - lockdep_pin_lock(&rq->lock); - return rq; - } - raw_spin_unlock(&rq->lock); - raw_spin_unlock_irqrestore(&p->pi_lock, *flags); - - while (unlikely(task_on_rq_migrating(p))) - cpu_relax(); - } -} + __acquires(rq->lock); static inline void __task_rq_unlock(struct rq *rq) __releases(rq->lock)