lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <YtggcHGKVSbprXr3@BLR-5CG11610CF.amd.com>
Date:   Wed, 20 Jul 2022 21:04:08 +0530
From:   "Gautham R. Shenoy" <gautham.shenoy@....com>
To:     Abel Wu <wuyun.abel@...edance.com>
Cc:     Peter Zijlstra <peterz@...radead.org>,
        Mel Gorman <mgorman@...e.de>,
        Vincent Guittot <vincent.guittot@...aro.org>,
        Josh Don <joshdon@...gle.com>, Chen Yu <yu.c.chen@...el.com>,
        Tim Chen <tim.c.chen@...ux.intel.com>,
        K Prateek Nayak <kprateek.nayak@....com>,
        linux-kernel@...r.kernel.org
Subject: Re: [PATCH v4 5/7] sched/fair: skip SIS domain search if fully busy

Hello Abel,


On Sun, Jun 19, 2022 at 08:04:49PM +0800, Abel Wu wrote:
> If a full scan on SIS domain failed, then no unoccupied cpus available
> and the LLC is fully busy. In this case we'd better spend the time on
> something more useful, rather than wasting it trying to find an idle
> cpu that probably not exist.
> 
> The fully busy status will be re-evaluated when any core of this LLC
> domain enters load balancing, and cleared once idle cpus found.
> 
> Signed-off-by: Abel Wu <wuyun.abel@...edance.com>
> ---

[..snip..]

> @@ -6197,24 +6201,44 @@ static inline int __select_idle_cpu(int cpu, struct task_struct *p)
>  DEFINE_STATIC_KEY_FALSE(sched_smt_present);
>  EXPORT_SYMBOL_GPL(sched_smt_present);
>  
> -static inline void set_idle_cores(int cpu, int val)
> +static inline void sd_set_state(int cpu, enum sd_state state)

Nit: We are setting the state of only the LLC domain and not any other
domain via this function. So should we name it as
set_llc_state()/get_llc_state() for better readability ?


>  {
>  	struct sched_domain_shared *sds;
>  
>  	sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
>  	if (sds)
> -		WRITE_ONCE(sds->has_idle_cores, val);
> +		WRITE_ONCE(sds->state, state);
>  }
>  
> -static inline bool test_idle_cores(int cpu)
> +static inline enum sd_state sd_get_state(int cpu)
>  {
>  	struct sched_domain_shared *sds;
>  
>  	sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
>  	if (sds)
> -		return READ_ONCE(sds->has_idle_cores);
> +		return READ_ONCE(sds->state);
>  
> -	return false;
> +	return sd_has_icpus;
> +}
> +
> +static inline void set_idle_cores(int cpu, int idle)
                                                  ^^^^^
I agree with Josh. We can use core_idle instead of idle here.

> +{
> +	sd_set_state(cpu, idle ? sd_has_icores : sd_has_icpus);
> +}
> +
> +static inline bool test_idle_cores(int cpu)
> +{
> +	return sd_get_state(cpu) == sd_has_icores;
> +}
> +
> +static inline void set_idle_cpus(int cpu, int idle)
> +{
> +	sd_set_state(cpu, idle ? sd_has_icpus : sd_is_busy);
> +}
> +
> +static inline bool test_idle_cpus(int cpu)
> +{
> +	return sd_get_state(cpu) != sd_is_busy;
>  }
>  
>  /*

[...]


> @@ -8661,6 +8702,12 @@ sched_asym(struct lb_env *env, struct sd_lb_stats *sds,  struct sg_lb_stats *sgs
>  	return sched_asym_prefer(env->dst_cpu, group->asym_prefer_cpu);
>  }
>  
> +static inline void sd_classify(struct sd_lb_stats *sds, struct rq *rq)
> +{
> +	if (sds->sd_state != sd_has_icpus && unoccupied_rq(rq))

Nit: sds->sd_state can either be sd_has_icpus or sd_is_busy. So for
better readability, we can just use the positive check

        if (sds->sd_state == sd_is_busy && unoccupied_rq(rq))
	       sds->sd_state  = sd_has_icpus;


> +		sds->sd_state = sd_has_icpus;
> +}
> +
>  /**
>   * update_sg_lb_stats - Update sched_group's statistics for load balancing.
>   * @env: The load balancing environment.
> @@ -8675,11 +8722,12 @@ static inline void update_sg_lb_stats(struct lb_env *env,
>  				      struct sg_lb_stats *sgs,
>  				      int *sg_status)
>  {
> -	int i, nr_running, local_group;
> +	int i, nr_running, local_group, update_core;
>  
>  	memset(sgs, 0, sizeof(*sgs));
>  
>  	local_group = group == sds->local;
> +	update_core = env->sd->flags & SD_SHARE_CPUCAPACITY;
>  
>  	for_each_cpu_and(i, sched_group_span(group), env->cpus) {
>  		struct rq *rq = cpu_rq(i);
> @@ -8692,6 +8740,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
>  		nr_running = rq->nr_running;
>  		sgs->sum_nr_running += nr_running;
>  
> +		if (update_core)
> +			sd_classify(sds, rq);
> +
>  		if (nr_running > 1)
>  			*sg_status |= SG_OVERLOAD;
>  
> @@ -9220,6 +9271,12 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
>  	return idlest;
>  }
>  
> +static void sd_update_state(struct lb_env *env, struct sd_lb_stats *sds)
> +{
> +	if (sds->sd_state == sd_has_icpus && !test_idle_cpus(env->dst_cpu))
> +		set_idle_cpus(env->dst_cpu, true);

We could enter this if condition when env->dst_cpu is the only idle
CPU in the SMT domain (which is likely to be the case every time we do
a NEW_IDLE balance). By the end of this load-balancing round, the
env->dst_cpu can pull a task from some other CPU and thereby no longer
remain idle but the LLC state would still be sd_has_icpus.

That would mean that some CPU on this LLC would do a full scan during
the wakeup only to find no idle CPU and reset the state to
sd_is_busy. Have you seen instances where this false-positive pattern
can result in wasteful scan thereby cause a performance degradation?
Ideally it should not be worse that what we currently have.

Apart from this, patch looks good to me.

I would be worth the while to explore if the LLC state can be used
early on in select_task_rq_fair() to determine if we need to do a
wake-affine or allow the task to stick to its previous LLC depending
on which among the previous LLC and the waker's LLC have an idle CPU.

--
Thanks and Regards
gautham.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ