linux-kernel - Re: [PATCH 00/14] sched/topology fixes

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <20170502144356.GB3377@worktop.programming.kicks-ass.net>
Date:   Tue, 2 May 2017 16:43:56 +0200
From:   Peter Zijlstra <peterz@...radead.org>
To:     mingo@...nel.org, lvenanci@...hat.com
Cc:     lwang@...hat.com, riel@...hat.com, efault@....de,
        tglx@...utronix.de, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 00/14] sched/topology fixes

On Fri, Apr 28, 2017 at 03:53:39PM +0200, Peter Zijlstra wrote:
> Also, the following occurred to me:
> 
>   sg_span & sg_mask == sg_mask
> 
> Therefore, we don't need to do the whole "sg_span &" business.
> 
> Hmm?

> @@ -856,7 +857,7 @@ build_sched_groups(struct sched_domain *
>  			continue;
>  
>  		group = get_group(i, sdd, &sg);
> -		cpumask_setall(sched_group_mask(sg));
> +		cpumask_copy(sched_group_mask(sg), sched_group_cpus(sg));
>  
>  		for_each_cpu(j, span) {
>  			if (get_group(j, sdd, NULL) != group)

OK, so this explodes mightily.

That code also hurt my brain bad, so I had to fix that a little.

The below seems to boot.

---
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7996,7 +7996,7 @@ static int active_load_balance_cpu_stop(
 static int should_we_balance(struct lb_env *env)
 {
 	struct sched_group *sg = env->sd->groups;
-	struct cpumask *sg_cpus, *sg_mask;
+	struct cpumask *sg_mask;
 	int cpu, balance_cpu = -1;
 
 	/*
@@ -8006,11 +8006,10 @@ static int should_we_balance(struct lb_e
 	if (env->idle == CPU_NEWLY_IDLE)
 		return 1;
 
-	sg_cpus = sched_group_cpus(sg);
 	sg_mask = sched_group_mask(sg);
 	/* Try to find first idle cpu */
-	for_each_cpu_and(cpu, sg_cpus, env->cpus) {
-		if (!cpumask_test_cpu(cpu, sg_mask) || !idle_cpu(cpu))
+	for_each_cpu_and(cpu, sg_mask, env->cpus) {
+		if (!idle_cpu(cpu))
 			continue;
 
 		balance_cpu = cpu;
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -85,7 +85,8 @@ static int sched_domain_debug_one(struct
 				group->sgc->id,
 				cpumask_pr_args(sched_group_cpus(group)));
 
-		if ((sd->flags & SD_OVERLAP) && !cpumask_full(sched_group_mask(group))) {
+		if ((sd->flags & SD_OVERLAP) &&
+		    !cpumask_equal(sched_group_mask(group), sched_group_cpus(group))) {
 			printk(KERN_CONT " mask=%*pbl",
 				cpumask_pr_args(sched_group_mask(group)));
 		}
@@ -505,7 +506,7 @@ enum s_alloc {
  */
 int group_balance_cpu(struct sched_group *sg)
 {
-	return cpumask_first_and(sched_group_cpus(sg), sched_group_mask(sg));
+	return cpumask_first(sched_group_mask(sg));
 }
 
 
@@ -833,23 +834,34 @@ build_overlap_sched_groups(struct sched_
  * [*] in other words, the first group of each domain is its child domain.
  */
 
-static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
+static struct sched_group *get_group(int cpu, struct sd_data *sdd)
 {
 	struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
 	struct sched_domain *child = sd->child;
+	struct sched_group *sg;
 
 	if (child)
 		cpu = cpumask_first(sched_domain_span(child));
 
-	if (sg) {
-		*sg = *per_cpu_ptr(sdd->sg, cpu);
-		(*sg)->sgc = *per_cpu_ptr(sdd->sgc, cpu);
+	sg = *per_cpu_ptr(sdd->sg, cpu);
+	sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);
 
-		/* For claim_allocations: */
-		atomic_set(&(*sg)->sgc->ref, 1);
+	/* For claim_allocations: */
+	atomic_inc(&sg->ref);
+	atomic_inc(&sg->sgc->ref);
+
+	if (child) {
+		cpumask_copy(sched_group_cpus(sg), sched_domain_span(child));
+		cpumask_copy(sched_group_mask(sg), sched_group_cpus(sg));
+	} else {
+		cpumask_set_cpu(cpu, sched_group_cpus(sg));
+		cpumask_set_cpu(cpu, sched_group_cpus(sg));
 	}
 
-	return cpu;
+	sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sched_group_cpus(sg));
+	sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
+
+	return sg;
 }
 
 /*
@@ -868,34 +880,20 @@ build_sched_groups(struct sched_domain *
 	struct cpumask *covered;
 	int i;
 
-	get_group(cpu, sdd, &sd->groups);
-	atomic_inc(&sd->groups->ref);
-
-	if (cpu != cpumask_first(span))
-		return 0;
-
 	lockdep_assert_held(&sched_domains_mutex);
 	covered = sched_domains_tmpmask;
 
 	cpumask_clear(covered);
 
-	for_each_cpu(i, span) {
+	for_each_cpu_wrap(i, span, cpu) {
 		struct sched_group *sg;
-		int group, j;
 
 		if (cpumask_test_cpu(i, covered))
 			continue;
 
-		group = get_group(i, sdd, &sg);
-		cpumask_setall(sched_group_mask(sg));
+		sg = get_group(i, sdd);
 
-		for_each_cpu(j, span) {
-			if (get_group(j, sdd, NULL) != group)
-				continue;
-
-			cpumask_set_cpu(j, covered);
-			cpumask_set_cpu(j, sched_group_cpus(sg));
-		}
+		cpumask_or(covered, covered, sched_group_cpus(sg));
 
 		if (!first)
 			first = sg;
@@ -904,6 +902,7 @@ build_sched_groups(struct sched_domain *
 		last = sg;
 	}
 	last->next = first;
+	sd->groups = first;
 
 	return 0;
 }