[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <9ac6da7f862347b5af117db74d4d4254@hisilicon.com>
Date: Tue, 16 Mar 2021 07:33:35 +0000
From: "Song Bao Hua (Barry Song)" <song.bao.hua@...ilicon.com>
To: Peter Zijlstra <peterz@...radead.org>
CC: "tim.c.chen@...ux.intel.com" <tim.c.chen@...ux.intel.com>,
"catalin.marinas@....com" <catalin.marinas@....com>,
"will@...nel.org" <will@...nel.org>,
"rjw@...ysocki.net" <rjw@...ysocki.net>,
"vincent.guittot@...aro.org" <vincent.guittot@...aro.org>,
"bp@...en8.de" <bp@...en8.de>,
"tglx@...utronix.de" <tglx@...utronix.de>,
"mingo@...hat.com" <mingo@...hat.com>,
"lenb@...nel.org" <lenb@...nel.org>,
"dietmar.eggemann@....com" <dietmar.eggemann@....com>,
"rostedt@...dmis.org" <rostedt@...dmis.org>,
"bsegall@...gle.com" <bsegall@...gle.com>,
"mgorman@...e.de" <mgorman@...e.de>,
"msys.mizuma@...il.com" <msys.mizuma@...il.com>,
"valentin.schneider@....com" <valentin.schneider@....com>,
"gregkh@...uxfoundation.org" <gregkh@...uxfoundation.org>,
Jonathan Cameron <jonathan.cameron@...wei.com>,
"juri.lelli@...hat.com" <juri.lelli@...hat.com>,
"mark.rutland@....com" <mark.rutland@....com>,
"sudeep.holla@....com" <sudeep.holla@....com>,
"aubrey.li@...ux.intel.com" <aubrey.li@...ux.intel.com>,
"linux-arm-kernel@...ts.infradead.org"
<linux-arm-kernel@...ts.infradead.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"linux-acpi@...r.kernel.org" <linux-acpi@...r.kernel.org>,
"x86@...nel.org" <x86@...nel.org>, "xuwei (O)" <xuwei5@...wei.com>,
"Zengtao (B)" <prime.zeng@...ilicon.com>,
"guodong.xu@...aro.org" <guodong.xu@...aro.org>,
yangyicong <yangyicong@...wei.com>,
"Liguozhu (Kenneth)" <liguozhu@...ilicon.com>,
"linuxarm@...neuler.org" <linuxarm@...neuler.org>,
"hpa@...or.com" <hpa@...or.com>
Subject: RE: [RFC PATCH v4 2/3] scheduler: add scheduler level for clusters
> -----Original Message-----
> From: Peter Zijlstra [mailto:peterz@...radead.org]
> Sent: Tuesday, March 2, 2021 11:43 PM
> To: Song Bao Hua (Barry Song) <song.bao.hua@...ilicon.com>
> Cc: tim.c.chen@...ux.intel.com; catalin.marinas@....com; will@...nel.org;
> rjw@...ysocki.net; vincent.guittot@...aro.org; bp@...en8.de;
> tglx@...utronix.de; mingo@...hat.com; lenb@...nel.org;
> dietmar.eggemann@....com; rostedt@...dmis.org; bsegall@...gle.com;
> mgorman@...e.de; msys.mizuma@...il.com; valentin.schneider@....com;
> gregkh@...uxfoundation.org; Jonathan Cameron <jonathan.cameron@...wei.com>;
> juri.lelli@...hat.com; mark.rutland@....com; sudeep.holla@....com;
> aubrey.li@...ux.intel.com; linux-arm-kernel@...ts.infradead.org;
> linux-kernel@...r.kernel.org; linux-acpi@...r.kernel.org; x86@...nel.org;
> xuwei (O) <xuwei5@...wei.com>; Zengtao (B) <prime.zeng@...ilicon.com>;
> guodong.xu@...aro.org; yangyicong <yangyicong@...wei.com>; Liguozhu (Kenneth)
> <liguozhu@...ilicon.com>; linuxarm@...neuler.org; hpa@...or.com
> Subject: Re: [RFC PATCH v4 2/3] scheduler: add scheduler level for clusters
>
> On Tue, Mar 02, 2021 at 11:59:39AM +1300, Barry Song wrote:
> > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > index 88a2e2b..d805e59 100644
> > --- a/kernel/sched/core.c
> > +++ b/kernel/sched/core.c
> > @@ -7797,6 +7797,16 @@ int sched_cpu_activate(unsigned int cpu)
> > if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
> > static_branch_inc_cpuslocked(&sched_smt_present);
> > #endif
> > +
> > +#ifdef CONFIG_SCHED_CLUSTER
> > + /*
> > + * When going up, increment the number of cluster cpus with
> > + * cluster present.
> > + */
> > + if (cpumask_weight(cpu_cluster_mask(cpu)) > 1)
> > + static_branch_inc_cpuslocked(&sched_cluster_present);
> > +#endif
> > +
> > set_cpu_active(cpu, true);
> >
> > if (sched_smp_initialized) {
> > @@ -7873,6 +7883,14 @@ int sched_cpu_deactivate(unsigned int cpu)
> > static_branch_dec_cpuslocked(&sched_smt_present);
> > #endif
> >
> > +#ifdef CONFIG_SCHED_CLUSTER
> > + /*
> > + * When going down, decrement the number of cpus with cluster present.
> > + */
> > + if (cpumask_weight(cpu_cluster_mask(cpu)) > 1)
> > + static_branch_dec_cpuslocked(&sched_cluster_present);
> > +#endif
> > +
> > if (!sched_smp_initialized)
> > return 0;
>
> I don't think that's correct. IIUC this will mean the
> sched_cluster_present thing will be enabled on anything with SMT (very
> much including x86 big cores after the next patch).
>
> I'm thinking that at the very least you should check a CLS domain
> exists, but that might be hard at this point, because the sched domains
> haven't been build yet.
might be able to achieve the same goal by:
int cls_wt = cpumask_weight(cpu_cluster_mask(cpu));
if ((cls_wt > cpumask_weight(cpu_smt_mask(cpu))) &&
&& (cls_wt < cpumask_weight(cpu_coregroup_mask(cpu))))
sched_cluster_present...
>
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index 8a8bd7b..3db7b07 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -6009,6 +6009,11 @@ static inline int __select_idle_cpu(int cpu)
> > return -1;
> > }
> >
> > +#ifdef CONFIG_SCHED_CLUSTER
> > +DEFINE_STATIC_KEY_FALSE(sched_cluster_present);
> > +EXPORT_SYMBOL_GPL(sched_cluster_present);
>
> I really rather think this shouldn't be exported
Ok. Make sense.
>
> > +#endif
> > +
> > #ifdef CONFIG_SCHED_SMT
> > DEFINE_STATIC_KEY_FALSE(sched_smt_present);
> > EXPORT_SYMBOL_GPL(sched_smt_present);
>
> This is a KVM wart, it needs to know because mitigation crap.
>
Ok.
> > @@ -6116,6 +6121,26 @@ static inline int select_idle_core(struct task_struct
> *p, int core, struct cpuma
> >
> > #endif /* CONFIG_SCHED_SMT */
> >
> > +static inline int _select_idle_cpu(bool smt, struct task_struct *p, int
> target, struct cpumask *cpus, int *idle_cpu, int *nr)
> > +{
> > + int cpu, i;
> > +
> > + for_each_cpu_wrap(cpu, cpus, target) {
> > + if (smt) {
> > + i = select_idle_core(p, cpu, cpus, idle_cpu);
> > + } else {
> > + if (!--*nr)
> > + return -1;
> > + i = __select_idle_cpu(cpu);
> > + }
> > +
> > + if ((unsigned int)i < nr_cpumask_bits)
> > + return i;
> > + }
> > +
> > + return -1;
> > +}
> > +
> > /*
> > * Scan the LLC domain for idle CPUs; this is dynamically regulated by
> > * comparing the average scan cost (tracked in sd->avg_scan_cost) against
> the
> > @@ -6124,7 +6149,7 @@ static inline int select_idle_core(struct task_struct
> *p, int core, struct cpuma
> > static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd,
> int target)
> > {
> > struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
> > - int i, cpu, idle_cpu = -1, nr = INT_MAX;
> > + int i, idle_cpu = -1, nr = INT_MAX;
> > bool smt = test_idle_cores(target, false);
> > int this = smp_processor_id();
> > struct sched_domain *this_sd;
> > @@ -6134,7 +6159,12 @@ static int select_idle_cpu(struct task_struct *p,
> struct sched_domain *sd, int t
> > if (!this_sd)
> > return -1;
> >
> > - cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
> > + if (!sched_cluster_active())
> > + cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
> > +#ifdef CONFIG_SCHED_CLUSTER
> > + if (sched_cluster_active())
> > + cpumask_and(cpus, cpu_cluster_mask(target), p->cpus_ptr);
> > +#endif
> >
> > if (sched_feat(SIS_PROP) && !smt) {
> > u64 avg_cost, avg_idle, span_avg;
> > @@ -6155,24 +6185,32 @@ static int select_idle_cpu(struct task_struct *p,
> struct sched_domain *sd, int t
> > time = cpu_clock(this);
> > }
> >
> > - for_each_cpu_wrap(cpu, cpus, target) {
> > - if (smt) {
> > - i = select_idle_core(p, cpu, cpus, &idle_cpu);
> > - if ((unsigned int)i < nr_cpumask_bits)
> > - return i;
> > + /* scan cluster before scanning the whole llc */
> > +#ifdef CONFIG_SCHED_CLUSTER
> > + if (sched_cluster_active()) {
> > + i = _select_idle_cpu(smt, p, target, cpus, &idle_cpu, &nr);
> > + if ((unsigned int) i < nr_cpumask_bits) {
> > + idle_cpu = i;
> > + goto done;
> > + } else if (nr <= 0)
> > + return -1;
> >
> > - } else {
> > - if (!--nr)
> > - return -1;
> > - idle_cpu = __select_idle_cpu(cpu);
> > - if ((unsigned int)idle_cpu < nr_cpumask_bits)
> > - break;
> > - }
> > + cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
> > + cpumask_andnot(cpus, cpus, cpu_cluster_mask(target));
> > }
> > +#endif
> > +
> > + i = _select_idle_cpu(smt, p, target, cpus, &idle_cpu, &nr);
> > + if ((unsigned int) i < nr_cpumask_bits) {
> > + idle_cpu = i;
> > + goto done;
> > + } else if (nr <= 0)
> > + return -1;
> >
> > if (smt)
> > set_idle_cores(this, false);
> >
> > +done:
> > if (sched_feat(SIS_PROP) && !smt) {
> > time = cpu_clock(this) - time;
> > update_avg(&this_sd->avg_scan_cost, time);
>
> And this is just horrific :-(
I was actually quite struggling with this part.
Had tried a couple of ways before sending this.
Still the sent one was quite ugly.
Thanks
Barry
Powered by blists - more mailing lists