linux-kernel - Re: [PATCH v4 7/7] sched/fair: de-entropy for SIS filter

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220621182342.GA50611@chenyu5-mobl1>
Date:   Wed, 22 Jun 2022 02:23:42 +0800
From:   Chen Yu <yu.c.chen@...el.com>
To:     Abel Wu <wuyun.abel@...edance.com>
Cc:     Peter Zijlstra <peterz@...radead.org>,
        Mel Gorman <mgorman@...e.de>,
        Vincent Guittot <vincent.guittot@...aro.org>,
        Josh Don <joshdon@...gle.com>,
        Tim Chen <tim.c.chen@...ux.intel.com>,
        K Prateek Nayak <kprateek.nayak@....com>,
        "Gautham R . Shenoy" <gautham.shenoy@....com>,
        linux-kernel@...r.kernel.org
Subject: Re: [PATCH v4 7/7] sched/fair: de-entropy for SIS filter

On Sun, Jun 19, 2022 at 08:04:51PM +0800, Abel Wu wrote:
> Now when updating core state, there are two main problems that could
> pollute the SIS filter:
> 
>   - The updating is before task migration, so if dst_cpu is
>     selected to be propagated which might be fed with tasks
>     soon, the efforts we paid is no more than setting a busy
>     cpu into the SIS filter. While on the other hand it is
>     important that we update as early as possible to keep the
>     filter fresh, so it's not wise to delay the update to the
>     end of load balancing.
> 
>   - False negative propagation hurts performance since some
>     idle cpus could be out of reach. So in general we will
>     aggressively propagate idle cpus but allow false positive
>     continue to exist for a while, which may lead to filter
>     being fully polluted.
> 
> Pains can be relieved by a force correction when false positive
> continuously detected.
> 
> Signed-off-by: Abel Wu <wuyun.abel@...edance.com>
> ---
>  include/linux/sched/topology.h |  7 +++++
>  kernel/sched/fair.c            | 51 ++++++++++++++++++++++++++++++++--
>  2 files changed, 55 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
> index b93edf587d84..e3552ce192a9 100644
> --- a/include/linux/sched/topology.h
> +++ b/include/linux/sched/topology.h
> @@ -91,6 +91,12 @@ struct sched_group;
>   *	search, and is also used as a fallback state of the other
>   *	states.
>   *
> + * - sd_may_idle
> + *	This state implies the unstableness of the SIS filter, and
> + *	some bits of it may out of date. This state is only used in
> + *	SMT domains as an intermediate state between sd_has_icpus
> + *	and sd_is_busy.
> + *
>   * - sd_is_busy
>   *	This state indicates there are no unoccupied cpus in this
>   *	domain. So for LLC domains, it gives the hint on whether
> @@ -111,6 +117,7 @@ struct sched_group;
>  enum sd_state {
>  	sd_has_icores,
>  	sd_has_icpus,
> +	sd_may_idle,
>  	sd_is_busy
>  };
>  
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index d55fdcedf2c0..9713d183d35e 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -8768,6 +8768,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
>  
>  	for_each_cpu_and(i, sched_group_span(group), env->cpus) {
>  		struct rq *rq = cpu_rq(i);
> +		bool update = update_core && (env->dst_cpu != i);
>  
>  		sgs->group_load += cpu_load(rq);
>  		sgs->group_util += cpu_util_cfs(i);
> @@ -8777,7 +8778,11 @@ static inline void update_sg_lb_stats(struct lb_env *env,
>  		nr_running = rq->nr_running;
>  		sgs->sum_nr_running += nr_running;
>  
> -		if (update_core)
> +		/*
> +		 * The dst_cpu is not preferred since it might
> +		 * be fed with tasks soon.
> +		 */
> +		if (update)
maybe if (update_core && (env->dst_cpu != i)) so that the comment would
be near the code logic and meanwhile without introducing a update variable?
>  			sd_classify(sds, rq, i);
>  
>  		if (nr_running > 1)
> @@ -8801,7 +8806,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
>  			 * and fed with tasks, so we'd better choose
>  			 * a candidate in an opposite way.
>  			 */
> -			sds->idle_cpu = i;
> +			if (update)
> +				sds->idle_cpu = i;
>  			sgs->idle_cpus++;
>  
>  			/* Idle cpu can't have misfit task */
> @@ -9321,7 +9327,7 @@ static void sd_update_state(struct lb_env *env, struct sd_lb_stats *sds)
>  {
>  	struct sched_domain_shared *sd_smt_shared = env->sd->shared;
>  	enum sd_state new = sds->sd_state;
> -	int this = env->dst_cpu;
> +	int icpu = sds->idle_cpu, this = env->dst_cpu;
>  
>  	/*
>  	 * Parallel updating can hardly contribute accuracy to
> @@ -9331,6 +9337,22 @@ static void sd_update_state(struct lb_env *env, struct sd_lb_stats *sds)
>  	if (cmpxchg(&sd_smt_shared->updating, 0, 1))
>  		return;
>  
> +	/*
> +	 * The dst_cpu is likely to be fed with tasks soon.
> +	 * If it is the only unoccupied cpu in this domain,
> +	 * we still handle it the same way as as_has_icpus
> +	 * but turn the SMT into the unstable state, rather
> +	 * than waiting to the end of load balancing since
> +	 * it's also important that update the filter as
> +	 * early as possible to keep it fresh.
> +	 */
> +	if (new == sd_is_busy) {
> +		if (idle_cpu(this) || sched_idle_cpu(this)) {
available_idle_cpu()?

thanks,
Chenyu
> +			new = sd_may_idle;
> +			icpu = this;
> +		}
> +	}
> +
>  	/*
>  	 * There is at least one unoccupied cpu available, so
>  	 * propagate it to the filter to avoid false negative
> @@ -9338,6 +9360,12 @@ static void sd_update_state(struct lb_env *env, struct sd_lb_stats *sds)
>  	 * idle cpus thus throughupt downgraded.
>  	 */
>  	if (new != sd_is_busy) {
> +		/*
> +		 * The sd_may_idle state is taken into
> +		 * consideration as well because from
> +		 * here we couldn't actually know task
> +		 * migrations would happen or not.
> +		 */
>  		if (!test_idle_cpus(this))
>  			set_idle_cpus(this, true);
>  	} else {
> @@ -9347,9 +9375,26 @@ static void sd_update_state(struct lb_env *env, struct sd_lb_stats *sds)
>  		 */
>  		if (sd_smt_shared->state == sd_is_busy)
>  			goto out;
> +
> +		/*
> +		 * Allow false positive to exist for some time
> +		 * to make a tradeoff of accuracy of the filter
> +		 * for relieving cache traffic.
> +		 */
> +		if (sd_smt_shared->state == sd_has_icpus) {
> +			new = sd_may_idle;
> +			goto update;
> +		}
> +
> +		/*
> +		 * If the false positive issue has already been
> +		 * there for a while, a correction of the filter
> +		 * is needed.
> +		 */
>  	}
>  
>  	sd_update_icpus(this, sds->idle_cpu);
> +update:
>  	sd_smt_shared->state = new;
>  out:
>  	xchg(&sd_smt_shared->updating, 0);
> -- 
> 2.31.1
>