[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <b8e200da-9d13-a194-5251-31f16ab882f2@bytedance.com>
Date: Mon, 15 Aug 2022 17:49:07 +0800
From: Abel Wu <wuyun.abel@...edance.com>
To: "Gautham R. Shenoy" <gautham.shenoy@....com>
Cc: Peter Zijlstra <peterz@...radead.org>,
Mel Gorman <mgorman@...e.de>,
Vincent Guittot <vincent.guittot@...aro.org>,
Josh Don <joshdon@...gle.com>, Chen Yu <yu.c.chen@...el.com>,
Tim Chen <tim.c.chen@...ux.intel.com>,
K Prateek Nayak <kprateek.nayak@....com>,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v4 5/7] sched/fair: skip SIS domain search if fully busy
Hi Gautham, thanks for your reviewing and sorry for my late reply..
On 7/20/22 11:34 PM, Gautham R. Shenoy Wrote:
>
> [..snip..]
>
>> @@ -6197,24 +6201,44 @@ static inline int __select_idle_cpu(int cpu, struct task_struct *p)
>> DEFINE_STATIC_KEY_FALSE(sched_smt_present);
>> EXPORT_SYMBOL_GPL(sched_smt_present);
>>
>> -static inline void set_idle_cores(int cpu, int val)
>> +static inline void sd_set_state(int cpu, enum sd_state state)
>
> Nit: We are setting the state of only the LLC domain and not any other
> domain via this function. So should we name it as
> set_llc_state()/get_llc_state() for better readability ?
>
Makes sense, will rename in next version.
>
>> {
>> struct sched_domain_shared *sds;
>>
>> sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
>> if (sds)
>> - WRITE_ONCE(sds->has_idle_cores, val);
>> + WRITE_ONCE(sds->state, state);
>> }
>>
>> -static inline bool test_idle_cores(int cpu)
>> +static inline enum sd_state sd_get_state(int cpu)
>> {
>> struct sched_domain_shared *sds;
>>
>> sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
>> if (sds)
>> - return READ_ONCE(sds->has_idle_cores);
>> + return READ_ONCE(sds->state);
>>
>> - return false;
>> + return sd_has_icpus;
>> +}
>> +
>> +static inline void set_idle_cores(int cpu, int idle)
> ^^^^^
> I agree with Josh. We can use core_idle instead of idle here.
OK, I will make the param more verbose...
>
>> +{
>> + sd_set_state(cpu, idle ? sd_has_icores : sd_has_icpus);
>> +}
>> +
>> +static inline bool test_idle_cores(int cpu)
>> +{
>> + return sd_get_state(cpu) == sd_has_icores;
>> +}
>> +
>> +static inline void set_idle_cpus(int cpu, int idle)
and this one too.
>> +{
>> + sd_set_state(cpu, idle ? sd_has_icpus : sd_is_busy);
>> +}
>> +
>> +static inline bool test_idle_cpus(int cpu)
>> +{
>> + return sd_get_state(cpu) != sd_is_busy;
>> }
>>
>> /*
>
> [...]
>
>
>> @@ -8661,6 +8702,12 @@ sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs
>> return sched_asym_prefer(env->dst_cpu, group->asym_prefer_cpu);
>> }
>>
>> +static inline void sd_classify(struct sd_lb_stats *sds, struct rq *rq)
>> +{
>> + if (sds->sd_state != sd_has_icpus && unoccupied_rq(rq))
>
> Nit: sds->sd_state can either be sd_has_icpus or sd_is_busy. So for
> better readability, we can just use the positive check
For now, yes. But sd_state can be expanded and once that happens, the
positive check could be error prone.
>
> if (sds->sd_state == sd_is_busy && unoccupied_rq(rq))
> sds->sd_state = sd_has_icpus;
>
>
>> + sds->sd_state = sd_has_icpus;
>> +}
>> +
>> /**
>> * update_sg_lb_stats - Update sched_group's statistics for load balancing.
>> * @env: The load balancing environment.
>> @@ -8675,11 +8722,12 @@ static inline void update_sg_lb_stats(struct lb_env *env,
>> struct sg_lb_stats *sgs,
>> int *sg_status)
>> {
>> - int i, nr_running, local_group;
>> + int i, nr_running, local_group, update_core;
>>
>> memset(sgs, 0, sizeof(*sgs));
>>
>> local_group = group == sds->local;
>> + update_core = env->sd->flags & SD_SHARE_CPUCAPACITY;
>>
>> for_each_cpu_and(i, sched_group_span(group), env->cpus) {
>> struct rq *rq = cpu_rq(i);
>> @@ -8692,6 +8740,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
>> nr_running = rq->nr_running;
>> sgs->sum_nr_running += nr_running;
>>
>> + if (update_core)
>> + sd_classify(sds, rq);
>> +
>> if (nr_running > 1)
>> *sg_status |= SG_OVERLOAD;
>>
>> @@ -9220,6 +9271,12 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
>> return idlest;
>> }
>>
>> +static void sd_update_state(struct lb_env *env, struct sd_lb_stats *sds)
>> +{
>> + if (sds->sd_state == sd_has_icpus && !test_idle_cpus(env->dst_cpu))
>> + set_idle_cpus(env->dst_cpu, true);
>
> We could enter this if condition when env->dst_cpu is the only idle
> CPU in the SMT domain (which is likely to be the case every time we do
> a NEW_IDLE balance). By the end of this load-balancing round, the
> env->dst_cpu can pull a task from some other CPU and thereby no longer
> remain idle but the LLC state would still be sd_has_icpus.
>
> That would mean that some CPU on this LLC would do a full scan during
> the wakeup only to find no idle CPU and reset the state to
> sd_is_busy. Have you seen instances where this false-positive pattern
> can result in wasteful scan thereby cause a performance degradation?
> Ideally it should not be worse that what we currently have.
Yes, indeed. We will talk about this later in the 7th patch.
>
> Apart from this, patch looks good to me.
Thanks!
>
> I would be worth the while to explore if the LLC state can be used
> early on in select_task_rq_fair() to determine if we need to do a
> wake-affine or allow the task to stick to its previous LLC depending
> on which among the previous LLC and the waker's LLC have an idle CPU.
>
Sounds like a good idea!
Best Regards,
Abel
Powered by blists - more mailing lists