[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20071003062240.GA19027@elte.hu>
Date: Wed, 3 Oct 2007 08:22:40 +0200
From: Ingo Molnar <mingo@...e.hu>
To: Nick Piggin <nickpiggin@...oo.com.au>
Cc: Paul Jackson <pj@....com>, akpm@...ux-foundation.org,
menage@...gle.com, linux-kernel@...r.kernel.org, dino@...ibm.com,
cpw@....com
Subject: [patch] sched: fix sched-domains partitioning by cpusets
* Nick Piggin <nickpiggin@...oo.com.au> wrote:
> BTW. as far as the sched.c changes in your patch go, I much prefer the
> partition_sched_domains API: http://lkml.org/lkml/2006/10/19/85
>
> The caller should manage everything itself, rather than
> partition_sched_domains doing half of the memory allocation.
i've merged your patch to my scheduler queue - see the patch below. (And
could you send me your SoB line too?) Paul, if we went with the patch
below, what else would be needed for your purposes?
Ingo
--------------------------------->
Subject: sched: fix sched-domains partitioning by cpusets
From: Nick Piggin <nickpiggin@...oo.com.au>
Fix sched-domains partitioning by cpusets. Walk the whole cpusets tree after
something interesting changes, and recreate all partitions.
Signed-off-by: Ingo Molnar <mingo@...e.hu>
---
include/linux/cpuset.h | 2
include/linux/sched.h | 3 -
kernel/cpuset.c | 109 ++++++++++++++++++++++---------------------------
kernel/sched.c | 31 +++++++------
4 files changed, 70 insertions(+), 75 deletions(-)
Index: linux/include/linux/cpuset.h
===================================================================
--- linux.orig/include/linux/cpuset.h
+++ linux/include/linux/cpuset.h
@@ -14,6 +14,8 @@
#ifdef CONFIG_CPUSETS
+extern int cpuset_hotplug_update_sched_domains(void);
+
extern int number_of_cpusets; /* How many cpusets are defined in system? */
extern int cpuset_init_early(void);
Index: linux/include/linux/sched.h
===================================================================
--- linux.orig/include/linux/sched.h
+++ linux/include/linux/sched.h
@@ -798,8 +798,7 @@ struct sched_domain {
#endif
};
-extern int partition_sched_domains(cpumask_t *partition1,
- cpumask_t *partition2);
+extern int partition_sched_domains(cpumask_t *partition);
#endif /* CONFIG_SMP */
Index: linux/kernel/cpuset.c
===================================================================
--- linux.orig/kernel/cpuset.c
+++ linux/kernel/cpuset.c
@@ -752,6 +752,24 @@ static int validate_change(const struct
return 0;
}
+static void update_cpu_domains_children(struct cpuset *par,
+ cpumask_t *non_partitioned)
+{
+ struct cpuset *c;
+
+ list_for_each_entry(c, &par->children, sibling) {
+ if (cpus_empty(c->cpus_allowed))
+ continue;
+ if (is_cpu_exclusive(c)) {
+ if (!partition_sched_domains(&c->cpus_allowed)) {
+ cpus_andnot(*non_partitioned,
+ *non_partitioned, c->cpus_allowed);
+ }
+ } else
+ update_cpu_domains_children(c, non_partitioned);
+ }
+}
+
/*
* For a given cpuset cur, partition the system as follows
* a. All cpus in the parent cpuset's cpus_allowed that are not part of any
@@ -761,53 +779,38 @@ static int validate_change(const struct
* Build these two partitions by calling partition_sched_domains
*
* Call with manage_mutex held. May nest a call to the
- * lock_cpu_hotplug()/unlock_cpu_hotplug() pair.
- * Must not be called holding callback_mutex, because we must
- * not call lock_cpu_hotplug() while holding callback_mutex.
+ * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. Must not be called holding
+ * callback_mutex, because we must not call lock_cpu_hotplug() while holding
+ * callback_mutex.
*/
-static void update_cpu_domains(struct cpuset *cur)
+static void update_cpu_domains(void)
{
- struct cpuset *c, *par = cur->parent;
- cpumask_t pspan, cspan;
-
- if (par == NULL || cpus_empty(cur->cpus_allowed))
- return;
+ cpumask_t non_partitioned;
- /*
- * Get all cpus from parent's cpus_allowed not part of exclusive
- * children
- */
- pspan = par->cpus_allowed;
- list_for_each_entry(c, &par->children, sibling) {
- if (is_cpu_exclusive(c))
- cpus_andnot(pspan, pspan, c->cpus_allowed);
- }
- if (!is_cpu_exclusive(cur)) {
- cpus_or(pspan, pspan, cur->cpus_allowed);
- if (cpus_equal(pspan, cur->cpus_allowed))
- return;
- cspan = CPU_MASK_NONE;
- } else {
- if (cpus_empty(pspan))
- return;
- cspan = cur->cpus_allowed;
- /*
- * Get all cpus from current cpuset's cpus_allowed not part
- * of exclusive children
- */
- list_for_each_entry(c, &cur->children, sibling) {
- if (is_cpu_exclusive(c))
- cpus_andnot(cspan, cspan, c->cpus_allowed);
- }
- }
+ BUG_ON(!mutex_is_locked(&manage_mutex));
lock_cpu_hotplug();
- partition_sched_domains(&pspan, &cspan);
+ non_partitioned = top_cpuset.cpus_allowed;
+ update_cpu_domains_children(&top_cpuset, &non_partitioned);
+ partition_sched_domains(&non_partitioned);
unlock_cpu_hotplug();
}
/*
+ * Same as above except called with lock_cpu_hotplug and without manage_mutex.
+ */
+
+int cpuset_hotplug_update_sched_domains(void)
+{
+ cpumask_t non_partitioned;
+
+ non_partitioned = top_cpuset.cpus_allowed;
+ update_cpu_domains_children(&top_cpuset, &non_partitioned);
+ return partition_sched_domains(&non_partitioned);
+}
+
+/*
* Call with manage_mutex held. May take callback_mutex during call.
*/
@@ -845,8 +848,8 @@ static int update_cpumask(struct cpuset
mutex_lock(&callback_mutex);
cs->cpus_allowed = trialcs.cpus_allowed;
mutex_unlock(&callback_mutex);
- if (is_cpu_exclusive(cs) && !cpus_unchanged)
- update_cpu_domains(cs);
+ if (!cpus_unchanged)
+ update_cpu_domains();
return 0;
}
@@ -1087,7 +1090,7 @@ static int update_flag(cpuset_flagbits_t
mutex_unlock(&callback_mutex);
if (cpu_exclusive_changed)
- update_cpu_domains(cs);
+ update_cpu_domains();
return 0;
}
@@ -1947,19 +1950,9 @@ static int cpuset_mkdir(struct inode *di
return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR);
}
-/*
- * Locking note on the strange update_flag() call below:
- *
- * If the cpuset being removed is marked cpu_exclusive, then simulate
- * turning cpu_exclusive off, which will call update_cpu_domains().
- * The lock_cpu_hotplug() call in update_cpu_domains() must not be
- * made while holding callback_mutex. Elsewhere the kernel nests
- * callback_mutex inside lock_cpu_hotplug() calls. So the reverse
- * nesting would risk an ABBA deadlock.
- */
-
static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
{
+ int is_exclusive;
struct cpuset *cs = dentry->d_fsdata;
struct dentry *d;
struct cpuset *parent;
@@ -1977,13 +1970,8 @@ static int cpuset_rmdir(struct inode *un
mutex_unlock(&manage_mutex);
return -EBUSY;
}
- if (is_cpu_exclusive(cs)) {
- int retval = update_flag(CS_CPU_EXCLUSIVE, cs, "0");
- if (retval < 0) {
- mutex_unlock(&manage_mutex);
- return retval;
- }
- }
+ is_exclusive = is_cpu_exclusive(cs);
+
parent = cs->parent;
mutex_lock(&callback_mutex);
set_bit(CS_REMOVED, &cs->flags);
@@ -1998,8 +1986,13 @@ static int cpuset_rmdir(struct inode *un
mutex_unlock(&callback_mutex);
if (list_empty(&parent->children))
check_for_release(parent, &pathbuf);
+
+ if (is_exclusive)
+ update_cpu_domains();
+
mutex_unlock(&manage_mutex);
cpuset_release_agent(pathbuf);
+
return 0;
}
Index: linux/kernel/sched.c
===================================================================
--- linux.orig/kernel/sched.c
+++ linux/kernel/sched.c
@@ -6274,6 +6274,9 @@ error:
*/
static int arch_init_sched_domains(const cpumask_t *cpu_map)
{
+#ifdef CONFIG_CPUSETS
+ return cpuset_hotplug_update_sched_domains();
+#else
cpumask_t cpu_default_map;
int err;
@@ -6287,6 +6290,7 @@ static int arch_init_sched_domains(const
err = build_sched_domains(&cpu_default_map);
return err;
+#endif
}
static void arch_destroy_sched_domains(const cpumask_t *cpu_map)
@@ -6310,29 +6314,26 @@ static void detach_destroy_domains(const
/*
* Partition sched domains as specified by the cpumasks below.
- * This attaches all cpus from the cpumasks to the NULL domain,
+ * This attaches all cpus from the partition to the NULL domain,
* waits for a RCU quiescent period, recalculates sched
- * domain information and then attaches them back to the
- * correct sched domains
- * Call with hotplug lock held
+ * domain information and then attaches them back to their own
+ * isolated partition.
+ *
+ * Called with hotplug lock held
+ *
+ * Returns 0 on success.
*/
-int partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2)
+int partition_sched_domains(cpumask_t *partition)
{
+ cpumask_t non_isolated_cpus;
cpumask_t change_map;
- int err = 0;
- cpus_and(*partition1, *partition1, cpu_online_map);
- cpus_and(*partition2, *partition2, cpu_online_map);
- cpus_or(change_map, *partition1, *partition2);
+ cpus_andnot(non_isolated_cpus, cpu_online_map, cpu_isolated_map);
+ cpus_and(change_map, *partition, non_isolated_cpus);
/* Detach sched domains from all of the affected cpus */
detach_destroy_domains(&change_map);
- if (!cpus_empty(*partition1))
- err = build_sched_domains(partition1);
- if (!err && !cpus_empty(*partition2))
- err = build_sched_domains(partition2);
-
- return err;
+ return build_sched_domains(&change_map);
}
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists