Suggested-by: Brendan Jackman Signed-off-by: Peter Zijlstra (Intel) --- kernel/sched/core.c | 1 + kernel/sched/fair.c | 47 +++++++++++++++++++++++++++++++++++++++++------ kernel/sched/sched.h | 1 + 3 files changed, 43 insertions(+), 6 deletions(-) --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6002,6 +6002,7 @@ void __init sched_init(void) rq_attach_root(rq, &def_root_domain); #ifdef CONFIG_NO_HZ_COMMON rq->last_load_update_tick = jiffies; + rq->last_blocked_load_update_tick = jiffies; atomic_set(&rq->nohz_flags, 0); #endif #ifdef CONFIG_NO_HZ_FULL --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5379,6 +5379,14 @@ decay_load_missed(unsigned long load, un } return load; } + +static struct { + cpumask_var_t idle_cpus_mask; + atomic_t nr_cpus; + unsigned long next_balance; /* in jiffy units */ + unsigned long next_stats; +} nohz ____cacheline_aligned; + #endif /* CONFIG_NO_HZ_COMMON */ /** @@ -6942,6 +6950,7 @@ enum fbq_type { regular, remote, all }; #define LBF_NEED_BREAK 0x02 #define LBF_DST_PINNED 0x04 #define LBF_SOME_PINNED 0x08 +#define LBF_NOHZ_STATS 0x10 struct lb_env { struct sched_domain *sd; @@ -7380,6 +7389,10 @@ static void update_blocked_averages(int if (cfs_rq_is_decayed(cfs_rq)) list_del_leaf_cfs_rq(cfs_rq); } + +#ifdef CONFIG_NO_HZ_COMMON + rq->last_blocked_load_update_tick = jiffies; +#endif rq_unlock_irqrestore(rq, &rf); } @@ -7439,6 +7452,9 @@ static inline void update_blocked_averag rq_lock_irqsave(rq, &rf); update_rq_clock(rq); update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq); +#ifdef CONFIG_NO_HZ_COMMON + rq->last_blocked_load_update_tick = jiffies; +#endif rq_unlock_irqrestore(rq, &rf); } @@ -7773,6 +7789,19 @@ group_type group_classify(struct sched_g return group_other; } +static void update_nohz_stats(struct rq *rq) +{ + unsigned int cpu = rq->cpu; + + if (!cpumask_test_cpu(cpu, nohz.idle_cpus_mask)) + return; + + if (!time_after(jiffies, rq->last_blocked_load_update_tick)) + return; + + update_blocked_averages(cpu); +} + /** * update_sg_lb_stats - Update sched_group's statistics for load balancing. * @env: The load balancing environment. @@ -7795,6 +7824,9 @@ static inline void update_sg_lb_stats(st for_each_cpu_and(i, sched_group_span(group), env->cpus) { struct rq *rq = cpu_rq(i); + if (env->flags & LBF_NOHZ_STATS) + update_nohz_stats(rq); + /* Bias balancing toward cpus of our domain */ if (local_group) load = target_load(i, load_idx); @@ -7950,6 +7982,15 @@ static inline void update_sd_lb_stats(st if (child && child->flags & SD_PREFER_SIBLING) prefer_sibling = 1; +#ifdef CONFIG_NO_HZ_COMMON + if (env->idle == CPU_NEWLY_IDLE) { + env->flags |= LBF_NOHZ_STATS; + + if (cpumask_subset(nohz.idle_cpus_mask, sched_domain_span(env->sd))) + nohz.next_stats = jiffies + msecs_to_jiffies(LOAD_AVG_PERIOD); + } +#endif + load_idx = get_sd_load_idx(env->sd, env->idle); do { @@ -8968,12 +9009,6 @@ static inline int on_null_domain(struct * needed, they will kick the idle load balancer, which then does idle * load balancing for all the idle CPUs. */ -static struct { - cpumask_var_t idle_cpus_mask; - atomic_t nr_cpus; - unsigned long next_balance; /* in jiffy units */ - unsigned long next_stats; -} nohz ____cacheline_aligned; static inline int find_new_ilb(void) { --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -722,6 +722,7 @@ struct rq { #ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_SMP unsigned long last_load_update_tick; + unsigned long last_blocked_load_update_tick; #endif /* CONFIG_SMP */ atomic_t nohz_flags; #endif /* CONFIG_NO_HZ_COMMON */