linux-kernel - Re: [PATCH v2 2/5] sched/numa: Replace runnable_load_avg by load

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite for Android: free password hash cracker in your pocket

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <b67ae78b-17ba-8f3f-9052-fecefb848e3d@arm.com>
Date:   Tue, 18 Feb 2020 14:54:14 +0000
From:   Valentin Schneider <valentin.schneider@....com>
To:     Vincent Guittot <vincent.guittot@...aro.org>, mingo@...hat.com,
        peterz@...radead.org, juri.lelli@...hat.com,
        dietmar.eggemann@....com, rostedt@...dmis.org, bsegall@...gle.com,
        mgorman@...e.de, linux-kernel@...r.kernel.org
Cc:     pauld@...hat.com, parth@...ux.ibm.com, hdanton@...a.com
Subject: Re: [PATCH v2 2/5] sched/numa: Replace runnable_load_avg by load_avg

On 2/14/20 3:27 PM, Vincent Guittot wrote:
> @@ -1473,38 +1473,35 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
>  	       group_faults_cpu(ng, src_nid) * group_faults(p, dst_nid) * 4;
>  }
>  
> -static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq);
> -
> -static unsigned long cpu_runnable_load(struct rq *rq)
> -{
> -	return cfs_rq_runnable_load_avg(&rq->cfs);
> -}
> +/*
> + * 'numa_type' describes the node at the moment of load balancing.
> + */
> +enum numa_type {
> +	/* The node has spare capacity that can be used to run more tasks.  */
> +	node_has_spare = 0,
> +	/*
> +	 * The node is fully used and the tasks don't compete for more CPU
> +	 * cycles. Nevertheless, some tasks might wait before running.
> +	 */
> +	node_fully_busy,
> +	/*
> +	 * The node is overloaded and can't provide expected CPU cycles to all
> +	 * tasks.
> +	 */
> +	node_overloaded
> +};

Could we reuse group_type instead? The definitions are the same modulo
s/group/node/.

>  
>  /* Cached statistics for all CPUs within a node */
>  struct numa_stats {
>  	unsigned long load;
> -
> +	unsigned long util;
>  	/* Total compute capacity of CPUs on a node */
>  	unsigned long compute_capacity;
> +	unsigned int nr_running;
> +	unsigned int weight;
> +	enum numa_type node_type;
>  };
>  
> -/*
> - * XXX borrowed from update_sg_lb_stats
> - */
> -static void update_numa_stats(struct numa_stats *ns, int nid)
> -{
> -	int cpu;
> -
> -	memset(ns, 0, sizeof(*ns));
> -	for_each_cpu(cpu, cpumask_of_node(nid)) {
> -		struct rq *rq = cpu_rq(cpu);
> -
> -		ns->load += cpu_runnable_load(rq);
> -		ns->compute_capacity += capacity_of(cpu);
> -	}
> -
> -}
> -
>  struct task_numa_env {
>  	struct task_struct *p;
>  
> @@ -1521,6 +1518,47 @@ struct task_numa_env {
>  	int best_cpu;
>  };
>  
> +static unsigned long cpu_load(struct rq *rq);
> +static unsigned long cpu_util(int cpu);
> +
> +static inline enum
> +numa_type numa_classify(unsigned int imbalance_pct,
> +			 struct numa_stats *ns)
> +{
> +	if ((ns->nr_running > ns->weight) &&
> +	    ((ns->compute_capacity * 100) < (ns->util * imbalance_pct)))
> +		return node_overloaded;
> +
> +	if ((ns->nr_running < ns->weight) ||
> +	    ((ns->compute_capacity * 100) > (ns->util * imbalance_pct)))
> +		return node_has_spare;
> +
> +	return node_fully_busy;
> +}
> +

As Mel pointed out, this is group_is_overloaded() and group_has_capacity().
@Mel, you mentioned having a common helper, do you have that laying around?
I haven't seen it in your reconciliation series.

What I'm naively thinking here is that we could have either move the whole
thing to just sg_lb_stats (AFAICT the fields of numa_stats are a subset of it),
or if we really care about the stack we could tweak the ordering to ensure
we can cast one into the other (not too enticed by that one though).

> +/*
> + * XXX borrowed from update_sg_lb_stats
> + */
> +static void update_numa_stats(struct task_numa_env *env,
> +			      struct numa_stats *ns, int nid)
> +{
> +	int cpu;
> +
> +	memset(ns, 0, sizeof(*ns));
> +	for_each_cpu(cpu, cpumask_of_node(nid)) {
> +		struct rq *rq = cpu_rq(cpu);
> +
> +		ns->load += cpu_load(rq);
> +		ns->util += cpu_util(cpu);
> +		ns->nr_running += rq->cfs.h_nr_running;
> +		ns->compute_capacity += capacity_of(cpu);
> +	}
> +
> +	ns->weight = cpumask_weight(cpumask_of_node(nid));
> +
> +	ns->node_type = numa_classify(env->imbalance_pct, ns);
> +}
> +
>  static void task_numa_assign(struct task_numa_env *env,
>  			     struct task_struct *p, long imp)
>  {