lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1504284108-6157-11-git-send-email-fweisbec@gmail.com>
Date:   Fri,  1 Sep 2017 18:41:45 +0200
From:   Frederic Weisbecker <fweisbec@...il.com>
To:     LKML <linux-kernel@...r.kernel.org>
Cc:     Frederic Weisbecker <fweisbec@...il.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Chris Metcalf <cmetcalf@...lanox.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Luiz Capitulino <lcapitulino@...hat.com>,
        Christoph Lameter <cl@...ux.com>,
        "Paul E . McKenney" <paulmck@...ux.vnet.ibm.com>,
        Ingo Molnar <mingo@...nel.org>, Mike Galbraith <efault@....de>,
        Rik van Riel <riel@...hat.com>,
        Wanpeng Li <kernellwp@...il.com>
Subject: [PATCH 10/12] housekeeping: Move isolcpus to housekeeping

We want to centralize the isolation features on the housekeeping
subsystem and scheduler domain isolation is a significant part of it.

No intended behaviour change, we just reuse the housekeeping cpumask
and core code.

Signed-off-by: Frederic Weisbecker <fweisbec@...il.com>
Cc: Chris Metcalf <cmetcalf@...lanox.com>
Cc: Rik van Riel <riel@...hat.com>
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Mike Galbraith <efault@....de>
Cc: Ingo Molnar <mingo@...nel.org>
Cc: Christoph Lameter <cl@...ux.com>
Cc: Paul E. McKenney <paulmck@...ux.vnet.ibm.com>
Cc: Wanpeng Li <kernellwp@...il.com>
Cc: Luiz Capitulino <lcapitulino@...hat.com>
---
 drivers/base/cpu.c           | 11 ++++++++-
 include/linux/housekeeping.h |  1 +
 include/linux/sched.h        |  2 --
 kernel/cgroup/cpuset.c       | 14 ++++-------
 kernel/housekeeping.c        | 57 +++++++++++++++++++++++++++++++++++---------
 kernel/sched/core.c          | 16 +------------
 kernel/sched/topology.c      | 21 ++++------------
 7 files changed, 67 insertions(+), 55 deletions(-)

diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 2c3b359..8e33b9e 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -18,6 +18,7 @@
 #include <linux/cpufeature.h>
 #include <linux/tick.h>
 #include <linux/pm_qos.h>
+#include <linux/housekeeping.h>
 
 #include "base.h"
 
@@ -271,8 +272,16 @@ static ssize_t print_cpus_isolated(struct device *dev,
 				  struct device_attribute *attr, char *buf)
 {
 	int n = 0, len = PAGE_SIZE-2;
+	cpumask_var_t isolated;
 
-	n = scnprintf(buf, len, "%*pbl\n", cpumask_pr_args(cpu_isolated_map));
+	if (!alloc_cpumask_var(&isolated, GFP_KERNEL))
+		return -ENOMEM;
+
+	cpumask_andnot(isolated, cpu_possible_mask,
+		       housekeeping_cpumask(HK_FLAG_DOMAIN));
+	n = scnprintf(buf, len, "%*pbl\n", cpumask_pr_args(isolated));
+
+	free_cpumask_var(isolated);
 
 	return n;
 }
diff --git a/include/linux/housekeeping.h b/include/linux/housekeeping.h
index 35fb197..a99466e 100644
--- a/include/linux/housekeeping.h
+++ b/include/linux/housekeeping.h
@@ -11,6 +11,7 @@ enum hk_flags {
 	HK_FLAG_MISC		= (1 << 2),
 	HK_FLAG_SCHED		= (1 << 3),
 	HK_FLAG_TICK		= (1 << 4),
+	HK_FLAG_DOMAIN		= (1 << 5),
 };
 
 #ifdef CONFIG_CPU_ISOLATION
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c28b182..816ff52 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -166,8 +166,6 @@ struct task_group;
 /* Task command name length: */
 #define TASK_COMM_LEN			16
 
-extern cpumask_var_t			cpu_isolated_map;
-
 extern void scheduler_tick(void);
 
 #define	MAX_SCHEDULE_TIMEOUT		LONG_MAX
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 8d51516..c65e73a 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -56,6 +56,7 @@
 #include <linux/time64.h>
 #include <linux/backing-dev.h>
 #include <linux/sort.h>
+#include <linux/housekeeping.h>
 
 #include <linux/uaccess.h>
 #include <linux/atomic.h>
@@ -639,7 +640,6 @@ static int generate_sched_domains(cpumask_var_t **domains,
 	int csn;		/* how many cpuset ptrs in csa so far */
 	int i, j, k;		/* indices for partition finding loops */
 	cpumask_var_t *doms;	/* resulting partition; i.e. sched domains */
-	cpumask_var_t non_isolated_cpus;  /* load balanced CPUs */
 	struct sched_domain_attr *dattr;  /* attributes for custom domains */
 	int ndoms = 0;		/* number of sched domains in result */
 	int nslot;		/* next empty doms[] struct cpumask slot */
@@ -649,10 +649,6 @@ static int generate_sched_domains(cpumask_var_t **domains,
 	dattr = NULL;
 	csa = NULL;
 
-	if (!alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL))
-		goto done;
-	cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
-
 	/* Special case for the 99% of systems with one, full, sched domain */
 	if (is_sched_load_balance(&top_cpuset)) {
 		ndoms = 1;
@@ -666,7 +662,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
 			update_domain_attr_tree(dattr, &top_cpuset);
 		}
 		cpumask_and(doms[0], top_cpuset.effective_cpus,
-				     non_isolated_cpus);
+			    housekeeping_cpumask(HK_FLAG_DOMAIN));
 
 		goto done;
 	}
@@ -690,7 +686,8 @@ static int generate_sched_domains(cpumask_var_t **domains,
 		 */
 		if (!cpumask_empty(cp->cpus_allowed) &&
 		    !(is_sched_load_balance(cp) &&
-		      cpumask_intersects(cp->cpus_allowed, non_isolated_cpus)))
+		      cpumask_intersects(cp->cpus_allowed,
+					 housekeeping_cpumask(HK_FLAG_DOMAIN))))
 			continue;
 
 		if (is_sched_load_balance(cp))
@@ -772,7 +769,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
 
 			if (apn == b->pn) {
 				cpumask_or(dp, dp, b->effective_cpus);
-				cpumask_and(dp, dp, non_isolated_cpus);
+				cpumask_and(dp, dp, housekeeping_cpumask(HK_FLAG_DOMAIN));
 				if (dattr)
 					update_domain_attr_tree(dattr + nslot, b);
 
@@ -785,7 +782,6 @@ static int generate_sched_domains(cpumask_var_t **domains,
 	BUG_ON(nslot != ndoms);
 
 done:
-	free_cpumask_var(non_isolated_cpus);
 	kfree(csa);
 
 	/*
diff --git a/kernel/housekeeping.c b/kernel/housekeeping.c
index b2006bd..5f0e7ec 100644
--- a/kernel/housekeeping.c
+++ b/kernel/housekeeping.c
@@ -58,32 +58,67 @@ void __init housekeeping_init(void)
 	WARN_ON_ONCE(cpumask_empty(housekeeping_mask));
 }
 
-#ifdef CONFIG_NO_HZ_FULL
-static int __init housekeeping_nohz_full_setup(char *str)
+static int __init housekeeping_setup(char *str, enum hk_flags flags)
 {
 	cpumask_var_t non_housekeeping_mask;
 
 	alloc_bootmem_cpumask_var(&non_housekeeping_mask);
 	if (cpulist_parse(str, non_housekeeping_mask) < 0) {
-		pr_warn("Housekeeping: Incorrect nohz_full cpumask\n");
+		pr_warn("Housekeeping: nohz_full= or isolcpus= incorrect CPU range\n");
 		free_bootmem_cpumask_var(non_housekeeping_mask);
 		return 0;
 	}
 
-	alloc_bootmem_cpumask_var(&housekeeping_mask);
-	cpumask_andnot(housekeeping_mask, cpu_possible_mask, non_housekeeping_mask);
+	if (!housekeeping_flags) {
+		alloc_bootmem_cpumask_var(&housekeeping_mask);
+		cpumask_andnot(housekeeping_mask,
+			       cpu_possible_mask, non_housekeeping_mask);
+		if (cpumask_empty(housekeeping_mask))
+			cpumask_set_cpu(smp_processor_id(), housekeeping_mask);
+	} else {
+		cpumask_var_t tmp;
 
-	if (cpumask_empty(housekeeping_mask))
-		cpumask_set_cpu(smp_processor_id(), housekeeping_mask);
+		alloc_bootmem_cpumask_var(&tmp);
+		cpumask_andnot(tmp, cpu_possible_mask, non_housekeeping_mask);
+		if (!cpumask_equal(tmp, housekeeping_mask)) {
+			pr_warn("Housekeeping: nohz_full= must match isolcpus=\n");
+			free_bootmem_cpumask_var(tmp);
+			free_bootmem_cpumask_var(non_housekeeping_mask);
+			return 0;
+		}
+		free_bootmem_cpumask_var(tmp);
+	}
 
-	housekeeping_flags = HK_FLAG_TICK | HK_FLAG_TIMER |
-				HK_FLAG_RCU | HK_FLAG_MISC;
+	if ((flags & HK_FLAG_TICK) && !(housekeeping_flags & HK_FLAG_TICK)) {
+		if (IS_ENABLED(CONFIG_NO_HZ_FULL)) {
+			tick_nohz_full_setup(non_housekeeping_mask);
+		} else {
+			pr_warn("Housekeeping: nohz unsupported."
+				" Build with CONFIG_NO_HZ_FULL\n");
+			free_bootmem_cpumask_var(non_housekeeping_mask);
+			return 0;
+		}
+	}
 
-	tick_nohz_full_setup(non_housekeeping_mask);
+	housekeeping_flags |= flags;
 
 	free_bootmem_cpumask_var(non_housekeeping_mask);
 
 	return 1;
 }
+
+static int __init housekeeping_nohz_full_setup(char *str)
+{
+	unsigned int flags;
+
+	flags = HK_FLAG_TICK | HK_FLAG_TIMER | HK_FLAG_RCU | HK_FLAG_MISC;
+
+	return housekeeping_setup(str, flags);
+}
 __setup("nohz_full=", housekeeping_nohz_full_setup);
-#endif
+
+static int __init housekeeping_isolcpus_setup(char *str)
+{
+	return housekeeping_setup(str, HK_FLAG_DOMAIN);
+}
+__setup("isolcpus=", housekeeping_isolcpus_setup);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 877c85d..de20aa1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -84,9 +84,6 @@ __read_mostly int scheduler_running;
  */
 int sysctl_sched_rt_runtime = 950000;
 
-/* CPUs with isolated domains */
-cpumask_var_t cpu_isolated_map;
-
 /*
  * __task_rq_lock - lock the rq @p resides on.
  */
@@ -5672,10 +5669,6 @@ static inline void sched_init_smt(void) { }
 
 void __init sched_init_smp(void)
 {
-	cpumask_var_t non_isolated_cpus;
-
-	alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
-
 	sched_init_numa();
 
 	/*
@@ -5685,16 +5678,12 @@ void __init sched_init_smp(void)
 	 */
 	mutex_lock(&sched_domains_mutex);
 	sched_init_domains(cpu_active_mask);
-	cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
-	if (cpumask_empty(non_isolated_cpus))
-		cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
 	mutex_unlock(&sched_domains_mutex);
 
 	/* Move init over to a non-isolated CPU */
-	if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0)
+	if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
 		BUG();
 	sched_init_granularity();
-	free_cpumask_var(non_isolated_cpus);
 
 	init_sched_rt_class();
 	init_sched_dl_class();
@@ -5898,9 +5887,6 @@ void __init sched_init(void)
 	calc_load_update = jiffies + LOAD_FREQ;
 
 #ifdef CONFIG_SMP
-	/* May be allocated at isolcpus cmdline parse time */
-	if (cpu_isolated_map == NULL)
-		zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
 	idle_thread_set_boot_cpu();
 	set_cpu_rq_start_time(smp_processor_id());
 #endif
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index bd8b6d6..1e03138 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -3,6 +3,7 @@
  */
 #include <linux/sched.h>
 #include <linux/mutex.h>
+#include <linux/housekeeping.h>
 
 #include "sched.h"
 
@@ -466,21 +467,6 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
 	update_top_cache_domain(cpu);
 }
 
-/* Setup the mask of CPUs configured for isolated domains */
-static int __init isolated_cpu_setup(char *str)
-{
-	int ret;
-
-	alloc_bootmem_cpumask_var(&cpu_isolated_map);
-	ret = cpulist_parse(str, cpu_isolated_map);
-	if (ret) {
-		pr_err("sched: Error, all isolcpus= values must be between 0 and %d\n", nr_cpu_ids);
-		return 0;
-	}
-	return 1;
-}
-__setup("isolcpus=", isolated_cpu_setup);
-
 struct s_data {
 	struct sched_domain ** __percpu sd;
 	struct root_domain	*rd;
@@ -1775,7 +1761,7 @@ int sched_init_domains(const struct cpumask *cpu_map)
 	doms_cur = alloc_sched_domains(ndoms_cur);
 	if (!doms_cur)
 		doms_cur = &fallback_doms;
-	cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
+	cpumask_and(doms_cur[0], cpu_map, housekeeping_cpumask(HK_FLAG_DOMAIN));
 	err = build_sched_domains(doms_cur[0], NULL);
 	register_sched_domain_sysctl();
 
@@ -1871,7 +1857,8 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
 	if (doms_new == NULL) {
 		n = 0;
 		doms_new = &fallback_doms;
-		cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map);
+		cpumask_and(doms_new[0], cpu_active_mask,
+			    housekeeping_cpumask(HK_FLAG_DOMAIN));
 		WARN_ON_ONCE(dattr_new);
 	}
 
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ