linux-kernel - Re: [PATCH] sched: push force idled core

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <ab6e95da-2e17-5b17-9e3c-ba71aa951da0@linux.alibaba.com>
Date:   Fri, 24 Mar 2023 11:37:26 +0800
From:   cruzzhao <cruzzhao@...ux.alibaba.com>
To:     mingo@...hat.com, peterz@...radead.org, juri.lelli@...hat.com,
        vincent.guittot@...aro.org, dietmar.eggemann@....com,
        rostedt@...dmis.org, bsegall@...gle.com, mgorman@...e.de,
        bristot@...hat.com, vschneid@...hat.com
Cc:     linux-kernel@...r.kernel.org
Subject: Re: [PATCH] sched: push force idled core_pick task to another cpu

ping...
As core pick task is the max priority task, if it's forced
idle, it's better to find a suitable and idle cpu to run
rather than waiting for other cpus to steal. BTW, there's
no chance for uncookie'd tasks to be stolen.

Consider the following scenario:
Task A is cookie'd a, task B1 and B2 are cookie'd b.
A and B1 are running on core1, and B2 is running on core2
with sibling idle.
There's no chance for B1 to migrate to ht1 of B2 immidiately,
and will cause a lot of force idle.

	core1				core2
ht0		ht1		ht0		ht1
A		force idle	B2		real idle
force idle	B1		B2		real idle
A		force idle	B2		real idle
force idle	B1		B2		real idle

After applying this patch, B1 will be pushed immidiately, and
force idle will decrase.

在 2023/3/6 下午8:41, Cruz Zhao 写道:
> When a task with the max priority of its rq is force
> idled because of unmatched cookie, we'd better to find
> a suitable cpu for it to run as soon as possible, which
> is idle and cookie matched. In order to achieve this
> goal, we push the task in sched_core_balance(), after
> steal_cookie_task().
> 
> Signed-off-by: Cruz Zhao <CruzZhao@...ux.alibaba.com>
> ---
>  kernel/sched/core.c  | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++--
>  kernel/sched/sched.h |  1 +
>  2 files changed, 72 insertions(+), 2 deletions(-)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index a3f5147..2a2005a 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -246,6 +246,8 @@ void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags)
>  {
>  	rq->core->core_task_seq++;
>  
> +	if (p == rq->force_idled_core_pick)
> +		rq->force_idled_core_pick = NULL;
>  	if (sched_core_enqueued(p)) {
>  		rb_erase(&p->core_node, &rq->core_tree);
>  		RB_CLEAR_NODE(&p->core_node);
> @@ -346,9 +348,10 @@ static void __sched_core_flip(bool enabled)
>  
>  		sched_core_lock(cpu, &flags);
>  
> -		for_each_cpu(t, smt_mask)
> +		for_each_cpu(t, smt_mask) {
>  			cpu_rq(t)->core_enabled = enabled;
> -
> +			cpu_rq(t)->force_idled_core_pick = NULL;
> +		}
>  		cpu_rq(cpu)->core->core_forceidle_start = 0;
>  
>  		sched_core_unlock(cpu, &flags);
> @@ -6085,6 +6088,7 @@ static inline struct task_struct *pick_task(struct rq *rq)
>  		next = pick_task(rq);
>  		if (!next->core_cookie) {
>  			rq->core_pick = NULL;
> +			rq->force_idled_core_pick = NULL;
>  			/*
>  			 * For robustness, update the min_vruntime_fi for
>  			 * unconstrained picks as well.
> @@ -6135,6 +6139,8 @@ static inline struct task_struct *pick_task(struct rq *rq)
>  				p = idle_sched_class.pick_task(rq_i);
>  		}
>  
> +		if (p != rq_i->core_pick)
> +			rq_i->force_idled_core_pick = rq_i->core_pick;
>  		rq_i->core_pick = p;
>  
>  		if (p == rq_i->idle) {
> @@ -6288,10 +6294,61 @@ static bool steal_cookie_task(int cpu, struct sched_domain *sd)
>  	return false;
>  }
>  
> +static bool try_push_unmatch_task(struct task_struct *p, int this, int that)
> +{
> +	struct rq *src = cpu_rq(this), *dst = cpu_rq(that);
> +	bool success = false;
> +
> +	local_irq_disable();
> +	double_rq_lock(src, dst);
> +	if (!available_idle_cpu(that))
> +		goto unlock;
> +	if (!cpumask_test_cpu(that, &p->cpus_mask))
> +		goto unlock;
> +	if (!sched_cpu_cookie_match(dst, p))
> +		goto unlock;
> +	if (p->core_occupation > dst->idle->core_occupation)
> +		goto unlock;
> +
> +	deactivate_task(src, p, 0);
> +	set_task_cpu(p, that);
> +	src->force_idled_core_pick = NULL;
> +	activate_task(dst, p, 0);
> +
> +	resched_curr(dst);
> +
> +	success = true;
> +unlock:
> +	double_rq_unlock(src, dst);
> +	local_irq_enable();
> +
> +	return success;
> +}
> +
> +static bool push_unmatch_task(struct task_struct *p, int cpu, struct sched_domain *sd)
> +{
> +	int i;
> +	struct cpumask mask;
> +
> +	cpumask_and(&mask, sched_domain_span(sd), &p->cpus_mask);
> +	for_each_cpu_wrap(i, &mask, cpu) {
> +		if (i == cpu)
> +			continue;
> +
> +		if (need_resched())
> +			break;
> +
> +		if (try_push_unmatch_task(p, cpu, i))
> +			return true;
> +	}
> +	return false;
> +}
> +
>  static void sched_core_balance(struct rq *rq)
>  {
>  	struct sched_domain *sd;
>  	int cpu = cpu_of(rq);
> +	struct task_struct *p;
>  
>  	preempt_disable();
>  	rcu_read_lock();
> @@ -6303,6 +6360,18 @@ static void sched_core_balance(struct rq *rq)
>  		if (steal_cookie_task(cpu, sd))
>  			break;
>  	}
> +
> +	p = rq->force_idled_core_pick;	
> +	if (!p || p == rq->idle)
> +		goto unlock;
> +	for_each_domain(cpu, sd) {
> +		if (need_resched())
> +			break;
> +
> +		if (push_unmatch_task(p, cpu, sd))
> +			break;
> +	}
> +unlock:
>  	raw_spin_rq_lock_irq(rq);
>  	rcu_read_unlock();
>  	preempt_enable();
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 3e8df6d..f9e7988 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1140,6 +1140,7 @@ struct rq {
>  	/* per rq */
>  	struct rq		*core;
>  	struct task_struct	*core_pick;
> +	struct task_struct	*force_idled_core_pick;
>  	unsigned int		core_enabled;
>  	unsigned int		core_sched_seq;
>  	struct rb_root		core_tree;