Add a few knobs to poke while playing with the new code. Not-Signed-off-by: Peter Zijlstra (Intel) --- include/linux/sched/sysctl.h | 1 kernel/sched/fair.c | 86 ++++++++++++++++++++++++++++++++++--------- kernel/sched/features.h | 10 +++++ kernel/sysctl.c | 7 +++ 4 files changed, 86 insertions(+), 18 deletions(-) --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -37,6 +37,7 @@ extern unsigned int sysctl_sched_migrati extern unsigned int sysctl_sched_nr_migrate; extern unsigned int sysctl_sched_time_avg; extern unsigned int sysctl_sched_shares_window; +extern unsigned int sysctl_sched_shift; int sched_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -114,6 +114,8 @@ unsigned int __read_mostly sysctl_sched_ unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; #endif +const_debug unsigned int sysctl_sched_shift = 9; + static inline void update_load_add(struct load_weight *lw, unsigned long inc) { lw->weight += inc; @@ -5354,18 +5356,24 @@ static inline int select_idle_smt(struct static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target) { struct sched_domain *this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc)); - u64 avg_idle = this_rq()->avg_idle; - u64 avg_cost = this_sd->avg_scan_cost; u64 time, cost; s64 delta; int cpu, wrap; - /* - * Due to large variance we need a large fuzz factor; hackbench in - * particularly is sensitive here. - */ - if ((avg_idle / 512) < avg_cost) - return -1; + if (sched_feat(AVG_CPU)) { + u64 avg_idle = this_rq()->avg_idle; + u64 avg_cost = this_sd->avg_scan_cost; + + if (sched_feat(PRINT_AVG)) + trace_printk("idle: %Ld cost: %Ld\n", avg_idle, avg_cost); + + /* + * Due to large variance we need a large fuzz factor; hackbench in + * particularly is sensitive here. + */ + if ((avg_idle >> sysctl_sched_shift) < avg_cost) + return -1; + } time = local_clock(); @@ -5379,6 +5387,7 @@ static int select_idle_cpu(struct task_s time = local_clock() - time; cost = this_sd->avg_scan_cost; delta = (s64)(time - cost) / 8; + /* trace_printk("time: %Ld cost: %Ld delta: %Ld\n", time, cost, delta); */ this_sd->avg_scan_cost += delta; return cpu; @@ -5390,7 +5399,7 @@ static int select_idle_cpu(struct task_s static int select_idle_sibling(struct task_struct *p, int target) { struct sched_domain *sd; - int i = task_cpu(p); + int start, i = task_cpu(p); if (idle_cpu(target)) return target; @@ -5401,21 +5410,62 @@ static int select_idle_sibling(struct ta if (i != target && cpus_share_cache(i, target) && idle_cpu(i)) return i; + start = target; + if (sched_feat(ORDER_IDLE)) + start = per_cpu(sd_llc_id, target); /* first cpu in llc domain */ + sd = rcu_dereference(per_cpu(sd_llc, target)); if (!sd) return target; - i = select_idle_core(p, sd, target); - if ((unsigned)i < nr_cpumask_bits) - return i; + if (sched_feat(OLD_IDLE)) { + struct sched_group *sg; - i = select_idle_cpu(p, sd, target); - if ((unsigned)i < nr_cpumask_bits) - return i; + for_each_lower_domain(sd) { + sg = sd->groups; + do { + if (!cpumask_intersects(sched_group_cpus(sg), + tsk_cpus_allowed(p))) + goto next; + + /* Ensure the entire group is idle */ + for_each_cpu(i, sched_group_cpus(sg)) { + if (i == target || !idle_cpu(i)) + goto next; + } - i = select_idle_smt(p, sd, target); - if ((unsigned)i < nr_cpumask_bits) - return i; + /* + * It doesn't matter which cpu we pick, the + * whole group is idle. + */ + target = cpumask_first_and(sched_group_cpus(sg), + tsk_cpus_allowed(p)); + goto done; +next: + sg = sg->next; + } while (sg != sd->groups); + } +done: + return target; + } + + if (sched_feat(IDLE_CORE)) { + i = select_idle_core(p, sd, start); + if ((unsigned)i < nr_cpumask_bits) + return i; + } + + if (sched_feat(IDLE_CPU)) { + i = select_idle_cpu(p, sd, start); + if ((unsigned)i < nr_cpumask_bits) + return i; + } + + if (sched_feat(IDLE_SMT)) { + i = select_idle_smt(p, sd, start); + if ((unsigned)i < nr_cpumask_bits) + return i; + } return target; } --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -69,3 +69,13 @@ SCHED_FEAT(RT_RUNTIME_SHARE, true) SCHED_FEAT(LB_MIN, false) SCHED_FEAT(ATTACH_AGE_LOAD, true) +SCHED_FEAT(OLD_IDLE, false) +SCHED_FEAT(ORDER_IDLE, false) + +SCHED_FEAT(IDLE_CORE, true) +SCHED_FEAT(IDLE_CPU, true) +SCHED_FEAT(AVG_CPU, true) +SCHED_FEAT(PRINT_AVG, false) + +SCHED_FEAT(IDLE_SMT, true) + --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -334,6 +334,13 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, { + .procname = "sched_shift", + .data = &sysctl_sched_shift, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { .procname = "sched_nr_migrate", .data = &sysctl_sched_nr_migrate, .maxlen = sizeof(unsigned int),