linux-kernel - Re: [PATCH] sched/fair: Fix ftq noise bench regression

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 21 Mar 2017 17:46:48 +0000
From:   Dietmar Eggemann <dietmar.eggemann@....com>
To:     Vincent Guittot <vincent.guittot@...aro.org>, peterz@...radead.org,
        mingo@...nel.org, linux-kernel@...r.kernel.org,
        ying.huang@...el.com
Subject: Re: [PATCH] sched/fair: Fix ftq noise bench regression

Hi Vincent,

On 17/03/17 13:47, Vincent Guittot wrote:

[...]

> Reported-by: ying.huang@...ux.intel.com
> Signed-off-by: Vincent Guittot <vincent.guittot@...aro.org>
> Fixes: 4e5160766fcc ("sched/fair: Propagate asynchrous detach")

I thought I can see a difference by running:

 perf stat --null --repeat 10 -- perf bench sched messaging -g 50 -l 
 5000 

on an Ubuntu 16.10 server system.

Number of entries in the rq->leaf_cfs_rq_list per cpu: ~40

Target: Intel i5-3320M (4 logical cpus)

tip/sched/core: 42.119140365 seconds time elapsed ( +-  0.33% )

+ patch       : 42.089557108 seconds time elapsed ( +-  0.37% )

Maybe I need a CPU with more 'logical cpus' or a different test case.
Anyway, couldn't spot any regression.

> ---
>  kernel/sched/fair.c | 39 ++++++++++++++++++++++++++++++++++++---
>  1 file changed, 36 insertions(+), 3 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 2805bd7..007df59 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -3173,6 +3173,36 @@ static inline int propagate_entity_load_avg(struct sched_entity *se)
>  	return 1;
>  }
>  
> +/*
> + * Check if we need to update the load and the utilization of a blocked
> + * group_entity
> + */
> +static inline bool skip_blocked_update(struct sched_entity *se)
> +{
> +	struct cfs_rq *gcfs_rq = group_cfs_rq(se);
> +
> +	/*
> +	 * If sched_entity still have not null load or utilization, we have to
> +	 * decay it.
> +	 */
> +	if (se->avg.load_avg || se->avg.util_avg)
> +		return false;
> +
> +	/*
> +	 * If there is a pending propagation, we have to update the load and
> +	 * the utilizaion of the sched_entity

nit pick: s/utilizaion/utilization

> +	 */
> +	if (gcfs_rq->propagate_avg)
> +		return false;
> +
> +	/*
> +	 * Other wise, the load and the utilization of the sched_entity is
> +	 * already null and there is no pending propagation so it will be a
> +	 * waste of time to try to decay it.
> +	 */
> +	return true;
> +}
> +
>  #else /* CONFIG_FAIR_GROUP_SCHED */
>  
>  static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
> @@ -6961,6 +6991,8 @@ static void update_blocked_averages(int cpu)
>  	 * list_add_leaf_cfs_rq() for details.
>  	 */
>  	for_each_leaf_cfs_rq(rq, cfs_rq) {
> +		struct sched_entity *se;
> +
>  		/* throttled entities do not contribute to load */
>  		if (throttled_hierarchy(cfs_rq))
>  			continue;
> @@ -6968,9 +7000,10 @@ static void update_blocked_averages(int cpu)
>  		if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true))
>  			update_tg_load_avg(cfs_rq, 0);
>  
> -		/* Propagate pending load changes to the parent */
> -		if (cfs_rq->tg->se[cpu])
> -			update_load_avg(cfs_rq->tg->se[cpu], 0);
> +		/* Propagate pending load changes to the parent if any */
> +		se = cfs_rq->tg->se[cpu];
> +		if (se && !skip_blocked_update(se))
> +			update_load_avg(se, 0);
>  	}
>  	rq_unlock_irqrestore(rq, &rf);
>  }
> 

Why not turn skip_blocked_update(se) into blocked_update_needed(cpu, cfs_rq)?
Saves a couple of patch lines:

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 007df5953d1a..8001eeb4ec18 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3177,30 +3177,34 @@ static inline int propagate_entity_load_avg(struct sched_entity *se)
  * Check if we need to update the load and the utilization of a blocked
  * group_entity
  */
-static inline bool skip_blocked_update(struct sched_entity *se)
+static inline bool blocked_update_needed(int cpu, struct cfs_rq *cfs_rq)
 {
-       struct cfs_rq *gcfs_rq = group_cfs_rq(se);
+       struct sched_entity *se = cfs_rq->tg->se[cpu];
+
+       /* cfs_rq of a root task_group has no sched_entity counterpart */
+       if (!se)
+               return false;
 
        /*
         * If sched_entity still have not null load or utilization, we have to
         * decay it.
         */
        if (se->avg.load_avg || se->avg.util_avg)
-               return false;
+               return true;
 
        /*
         * If there is a pending propagation, we have to update the load and
         * the utilizaion of the sched_entity
         */
-       if (gcfs_rq->propagate_avg)
-               return false;
+       if (cfs_rq->propagate_avg)
+               return true;
 
        /*
         * Other wise, the load and the utilization of the sched_entity is
         * already null and there is no pending propagation so it will be a
         * waste of time to try to decay it.
         */
-       return true;
+       return false;
 }
 
 #else /* CONFIG_FAIR_GROUP_SCHED */
@@ -6991,8 +6995,6 @@ static void update_blocked_averages(int cpu)
         * list_add_leaf_cfs_rq() for details.
         */
        for_each_leaf_cfs_rq(rq, cfs_rq) {
-               struct sched_entity *se;
-
                /* throttled entities do not contribute to load */
                if (throttled_hierarchy(cfs_rq))
                        continue;
@@ -7001,9 +7003,8 @@ static void update_blocked_averages(int cpu)
                        update_tg_load_avg(cfs_rq, 0);
 
                /* Propagate pending load changes to the parent if any */
-               se = cfs_rq->tg->se[cpu];
-               if (se && !skip_blocked_update(se))
-                       update_load_avg(se, 0);
+               if (blocked_update_needed(cpu, cfs_rq))
+                       update_load_avg(cfs_rq->tg->se[cpu], 0);
        }
        rq_unlock_irqrestore(rq, &rf);
 }