linux-kernel - Re: [PATCH 2/2] sched/fair: Simple runqueue order on migrate

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <3a24ba8c-b00d-2f9b-95c9-b8aba1b51493@bytedance.com>
Date:   Tue, 29 Mar 2022 18:09:36 +0800
From:   Abel Wu <wuyun.abel@...edance.com>
To:     Tim Chen <tim.c.chen@...ux.intel.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Vincent Guittot <vincent.guittot@...aro.org>,
        Ingo Molnar <mingo@...e.hu>, Juri Lelli <juri.lelli@...hat.com>
Cc:     Yu Chen <yu.c.chen@...el.com>, Walter Mack <walter.mack@...el.com>,
        Mel Gorman <mgorman@...e.de>, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 2/2] sched/fair: Simple runqueue order on migrate


On 3/26/22 6:54 AM, Tim Chen Wrote:
> From: "Peter Zijlstra (Intel)" <peterz@...radead.org>
> 
> From: Peter Zijlstra (Intel) <peterz@...radead.org>
> 
> There's a number of problems with SMP migration of fair tasks, but
> basically it boils down to a task not receiving equal service on each
> runqueue (consider the trivial 3 tasks 2 cpus infeasible weight
> scenario).
> 
> Fully solving that with vruntime placement is 'hard', not least
> because a task might be very under-services on a busy runqueue and
> would need to be placed so far left on the new runqueue that it would
> significantly impact latency on the existing tasks.
> 
> Instead do minimal / basic placement instead; when moving to a less
> busy queue place at the front of the queue to receive time sooner.
> When moving to a busier queue, place at the end of the queue to
> receive time later.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
> Signed-off-by: Tim Chen <tim.c.chen@...ux.intel.com>
> Tested-by: Chen Yu <yu.c.chen@...el.com>
> Tested-by: Walter Mack <walter.mack@...el.com>
> ---
>   kernel/sched/fair.c     | 33 +++++++++++++++++++++++++++++----
>   kernel/sched/features.h |  2 ++
>   2 files changed, 31 insertions(+), 4 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 2498e97804fd..c5d2cb3a8f42 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -4223,6 +4223,27 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
>   	se->vruntime = max_vruntime(se->vruntime, vruntime);
>   }
>   
> +static void place_entity_migrate(struct cfs_rq *cfs_rq, struct sched_entity *se)
> +{
> +	if (!sched_feat(PLACE_MIGRATE))
> +		return;
> +
> +	if (cfs_rq->nr_running < se->migrated) {
> +		/*
> +		 * Migrated to a shorter runqueue, go first because
> +		 * we were under-served on the old runqueue.
> +		 */
> +		se->vruntime = cfs_rq->min_vruntime;
> +		return;
> +	}
> +
> +	/*
> +	 * Migrated to a longer runqueue, go last because
> +	 * we got over-served on the old runqueue.
> +	 */
> +	se->vruntime = cfs_rq->min_vruntime + sched_vslice(cfs_rq, se);
> +}

Should se->migrated be cleared after place_entity_migrate?

> +
>   static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
>   
>   static inline bool cfs_bandwidth_used(void);
> @@ -4296,6 +4317,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
>   
>   	if (flags & ENQUEUE_WAKEUP)
>   		place_entity(cfs_rq, se, 0);
> +	else if (se->migrated)
> +		place_entity_migrate(cfs_rq, se);
>   
>   	check_schedstat_required();
>   	update_stats_enqueue_fair(cfs_rq, se, flags);
> @@ -6930,6 +6953,7 @@ static void detach_entity_cfs_rq(struct sched_entity *se);
>    */
>   static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
>   {
> +	struct sched_entity *se = &p->se;
>   	/*
>   	 * As blocked tasks retain absolute vruntime the migration needs to
>   	 * deal with this by subtracting the old and adding the new
> @@ -6962,7 +6986,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
>   		 * rq->lock and can modify state directly.
>   		 */
>   		lockdep_assert_rq_held(task_rq(p));
> -		detach_entity_cfs_rq(&p->se);
> +		detach_entity_cfs_rq(se);
>   
>   	} else {
>   		/*
> @@ -6973,14 +6997,15 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
>   		 * wakee task is less decayed, but giving the wakee more load
>   		 * sounds not bad.
>   		 */
> -		remove_entity_load_avg(&p->se);
> +		remove_entity_load_avg(se);
>   	}
>   
>   	/* Tell new CPU we are migrated */
> -	p->se.avg.last_update_time = 0;
> +	se->avg.last_update_time = 0;
>   
>   	/* We have migrated, no longer consider this task hot */
> -	p->se.migrated = 1;
> +	for_each_sched_entity(se)
> +		se->migrated = READ_ONCE(cfs_rq_of(se)->nr_running) + !se->on_rq;
>   
>   	update_scan_period(p, new_cpu);
>   }
> diff --git a/kernel/sched/features.h b/kernel/sched/features.h
> index 1cf435bbcd9c..681c84fd062c 100644
> --- a/kernel/sched/features.h
> +++ b/kernel/sched/features.h
> @@ -100,3 +100,5 @@ SCHED_FEAT(LATENCY_WARN, false)
>   
>   SCHED_FEAT(ALT_PERIOD, true)
>   SCHED_FEAT(BASE_SLICE, true)
> +
> +SCHED_FEAT(PLACE_MIGRATE, true)