linux-kernel - Re: [PATCH 1/3] sched/fair: Introduce scaled capacity awareness in find_idlest

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 25 Sep 2017 19:51:01 -0700
From:   joelaf <joelaf@...gle.com>
To:     Rohit Jain <rohit.k.jain@...cle.com>, linux-kernel@...r.kernel.org,
        eas-dev@...ts.linaro.org
Cc:     peterz@...radead.org, mingo@...hat.com, atish.patra@...cle.com,
        vincent.guittot@...aro.org, dietmar.eggemann@....com,
        morten.rasmussen@....com
Subject: Re: [PATCH 1/3] sched/fair: Introduce scaled capacity awareness in
 find_idlest_cpu code path

Hi Rohit,

Just some comments:

On 09/25/2017 05:02 PM, Rohit Jain wrote:
> While looking for idle CPUs for a waking task, we should also account
> for the delays caused due to the bandwidth reduction by RT/IRQ tasks.
> 
> This patch does that by trying to find a higher capacity CPU with
> minimum wake up latency.
> 
> 
> Signed-off-by: Rohit Jain <rohit.k.jain@...cle.com>
> ---
>  kernel/sched/fair.c | 27 ++++++++++++++++++++++++---
>  1 file changed, 24 insertions(+), 3 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index eca6a57..afb701f 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -5590,6 +5590,11 @@ static unsigned long capacity_orig_of(int cpu)
>  	return cpu_rq(cpu)->cpu_capacity_orig;
>  }
>  
> +static inline bool full_capacity(int cpu)
> +{
> +	return (capacity_of(cpu) >= (capacity_orig_of(cpu)*819 >> 10));

Wouldn't 768 be better for multiplication? gcc converts the expression to shifts and adds then.

> +}
> +
>  static unsigned long cpu_avg_load_per_task(int cpu)
>  {
>  	struct rq *rq = cpu_rq(cpu);
> @@ -5916,8 +5921,10 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
>  	unsigned long load, min_load = ULONG_MAX;
>  	unsigned int min_exit_latency = UINT_MAX;
>  	u64 latest_idle_timestamp = 0;
> +	unsigned int backup_cap = 0;
>  	int least_loaded_cpu = this_cpu;
>  	int shallowest_idle_cpu = -1;
> +	int shallowest_idle_cpu_backup = -1;
>  	int i;
>  
>  	/* Check if we have any choice: */
> @@ -5937,7 +5944,12 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
>  				 */
>  				min_exit_latency = idle->exit_latency;
>  				latest_idle_timestamp = rq->idle_stamp;
> -				shallowest_idle_cpu = i;
> +				if (full_capacity(i)) {
> +					shallowest_idle_cpu = i;
> +				} else if (capacity_of(i) > backup_cap) {
> +					shallowest_idle_cpu_backup = i;
> +					backup_cap = capacity_of(i);
> +				}

I'm a bit skeptical about this - if the CPU is idle, then is it likely that the capacity of the CPU is reduced due to RT pressure? I can see that it can matter, but I am wondering if you have any data for your usecase to show that it does (that is if you didn't consider RT pressure for idle CPUs, are you still seeing a big enough performance improvement to warrant the change?

>  			} else if ((!idle || idle->exit_latency == min_exit_latency) &&
>  				   rq->idle_stamp > latest_idle_timestamp) {
>  				/*
> @@ -5946,7 +5958,12 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
>  				 * a warmer cache.
>  				 */
>  				latest_idle_timestamp = rq->idle_stamp;
> -				shallowest_idle_cpu = i;
> +				if (full_capacity(i)) {
> +					shallowest_idle_cpu = i;
> +				} else if (capacity_of(i) > backup_cap) {
> +					shallowest_idle_cpu_backup = i;
> +					backup_cap = capacity_of(i);
> +				}
>  			}
>  		} else if (shallowest_idle_cpu == -1) {
>  			load = weighted_cpuload(cpu_rq(i));
> @@ -5957,7 +5974,11 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
>  		}
>  	}
>  
> -	return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu;
> +	if (shallowest_idle_cpu != -1)
> +		return shallowest_idle_cpu;
> +
> +	return (shallowest_idle_cpu_backup != -1 ?
> +		shallowest_idle_cpu_backup : least_loaded_cpu);
>  }
>  
>  #ifdef CONFIG_SCHED_SMT
> 

I see code duplication here which can be reduced by 7 lines compared to your original patch:

---
 kernel/sched/fair.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c95880e216f6..72fc8d18b251 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5528,6 +5528,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 	/* Traverse only the allowed CPUs */
 	for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) {
 		if (idle_cpu(i)) {
+			int idle_candidate = -1;
 			struct rq *rq = cpu_rq(i);
 			struct cpuidle_state *idle = idle_get_state(rq);
 			if (idle && idle->exit_latency < min_exit_latency) {
@@ -5538,7 +5539,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 				 */
 				min_exit_latency = idle->exit_latency;
 				latest_idle_timestamp = rq->idle_stamp;
-				shallowest_idle_cpu = i;
+				idle_candidate = i;
 			} else if ((!idle || idle->exit_latency == min_exit_latency) &&
 				   rq->idle_stamp > latest_idle_timestamp) {
 				/*
@@ -5547,7 +5548,16 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 				 * a warmer cache.
 				 */
 				latest_idle_timestamp = rq->idle_stamp;
-				shallowest_idle_cpu = i;
+				idle_candidate = i;
+			}
+
+			if (idle_candidate != -1) {
+				if (full_capacity(idle_candidate)) {
+					shallowest_idle_cpu = idle_candidate;
+				} else if (capacity_of(idle_candidate) > backup_cap) {
+					shallowest_idle_cpu_backup = idle_candidate;
+					backup_cap = capacity_of(idle_candidate);
+				}
 			}
 		} else if (shallowest_idle_cpu == -1) {
 			load = weighted_cpuload(i);
@@ -5558,7 +5568,11 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 		}
 	}
 
-	return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu;
+	if (shallowest_idle_cpu != -1)
+		return shallowest_idle_cpu;
+
+	return (shallowest_idle_cpu_backup != -1 ?
+			shallowest_idle_cpu_backup : least_loaded_cpu);
 }
 
 #ifdef CONFIG_SCHED_SMT
-- 
2.14.1.821.g8fa685d3b7-goog