lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:	Mon, 16 Jul 2012 17:16:39 +0800
From:	Michael Wang <wangyun@...ux.vnet.ibm.com>
To:	LKML <linux-kernel@...r.kernel.org>
CC:	Peter Zijlstra <peterz@...radead.org>,
	Ingo Molnar <mingo@...nel.org>
Subject: [RFC PATCH] sched: dynamically schedule domain configuration

From: Michael Wang <wangyun@...ux.vnet.ibm.com>

This patch is trying to provide a way for user to dynamically change
the behaviour of load balance by setting flags of schedule domain.

Currently it's rely on cpu cgroup and only SD_LOAD_BALANCE was
implemented, usage:

1. /sys/fs/cgroup/domain/domain.config_level
	the default config_level is 0, which means we currenlty configure
	the sibling domain for all cpus, we can use: 
		echo 'number' > /sys/fs/cgroup/domain/domain.config_level
	to change the level.

2. /sys/fs/cgroup/domain/domain.topology
	this will help to show the SD_LOAD_BALANCE status of all the cpu's
	all domain level, we can use:
		cat /sys/fs/cgroup/domain/domain.topology

3. /sys/fs/cgroup/domain/domain.SD_LOAD_BALANCE
	this will help us to change the bit SD_LOAD_BALANCE in the flag of
	schedule domain on level 'config_level', we can use:
		echo 1 > /sys/fs/cgroup/domain/domain.SD_LOAD_BALANCE
	to enable this bit, and:
		echo 0 > /sys/fs/cgroup/domain/domain.SD_LOAD_BALANCE
	to disable it.

It may not works well now(may be even not work at all as I can't see any
changes on my server even after disabled SD_LOAD_BALANCE on all domains),
but it is interesting and should be liked by some people who desire a
way to 'kill' the load balance by their own hands if we can implement it.

Comments and questions are very welcomed ;-)

Signed-off-by: Michael Wang <wangyun@...ux.vnet.ibm.com>
---
 include/linux/cgroup_subsys.h |    1 +
 kernel/sched/core.c           |  143 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 144 insertions(+), 0 deletions(-)

diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 0bd390c..25eb842 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -21,6 +21,7 @@ SUBSYS(debug)
 
 #ifdef CONFIG_CGROUP_SCHED
 SUBSYS(cpu_cgroup)
+SUBSYS(domain_cgroup)
 #endif
 
 /* */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3987b9d..544bf78 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8423,6 +8423,149 @@ struct cgroup_subsys cpu_cgroup_subsys = {
 	.early_init	= 1,
 };
 
+static struct cgroup_subsys_state domain_cgroup_css;
+static struct cgroup_subsys_state *domain_cgroup_create(struct cgroup *cgrp)
+{
+	if (!cgrp->parent) {
+		/* This is early initialization for the top cgroup */
+		return &domain_cgroup_css;
+	}
+
+	return ERR_PTR(-EPERM);
+}
+
+static void domain_cgroup_destroy(struct cgroup *cgrp)
+{
+	return;
+}
+
+static int domain_cgroup_can_attach(struct cgroup *cgrp,
+				    struct cgroup_taskset *tset)
+{
+	return -EINVAL;
+}
+
+static void domain_cgroup_attach(struct cgroup *cgrp,
+				 struct cgroup_taskset *tset)
+{
+	return;
+}
+
+static void domain_cgroup_exit(struct cgroup *cgrp,
+			       struct cgroup *old_cgrp,
+			       struct task_struct *task)
+{
+	return;
+}
+
+static int domain_config_level;
+
+static int domain_cl_write_u64(struct cgroup *cgrp,
+			       struct cftype *cftype,
+			       u64 shareval)
+{
+	domain_config_level = shareval;
+	return 0;
+}
+
+static u64 domain_cl_read_u64(struct cgroup *cgrp, struct cftype *cft)
+{
+	return (u64)domain_config_level;
+}
+
+static int domain_slb_write_u64(struct cgroup *cgrp,
+				struct cftype *cftype,
+				u64 shareval)
+{
+	int cpu;
+	struct sched_domain *sd;
+	if (shareval != 0 && shareval != 1)
+		return -EINVAL;
+
+	mutex_lock(&sched_domains_mutex);
+	for_each_cpu(cpu, cpu_active_mask) {
+		for (sd = cpu_rq(cpu)->sd; sd; sd = sd->parent) {
+			if (sd->level == domain_config_level) {
+				if (shareval)
+					sd->flags |= SD_LOAD_BALANCE;
+				else
+					sd->flags &= ~SD_LOAD_BALANCE;
+			}
+		}
+	}
+	mutex_unlock(&sched_domains_mutex);
+	return 0;
+}
+
+static u64 domain_slb_read_u64(struct cgroup *cgrp, struct cftype *cft)
+{
+	int cpu, ret = 0;
+	struct sched_domain *sd;
+	mutex_lock(&sched_domains_mutex);
+	for_each_cpu(cpu, cpu_active_mask) {
+		for (sd = cpu_rq(cpu)->sd; sd; sd = sd->parent) {
+			if (sd->level == domain_config_level) {
+				if (sd->flags & SD_LOAD_BALANCE)
+					ret = 1;
+				goto out;
+			}
+		}
+	}
+out:
+	mutex_unlock(&sched_domains_mutex);
+	return ret;
+}
+
+static int domain_topology_show(struct cgroup *cgrp,
+				struct cftype *cft,
+				struct cgroup_map_cb *cb)
+{
+	int cpu;
+	struct sched_domain *sd;
+	mutex_lock(&sched_domains_mutex);
+	for_each_cpu(cpu, cpu_active_mask) {
+		cb->fill(cb, "cpu", cpu);
+		for (sd = cpu_rq(cpu)->sd; sd; sd = sd->parent) {
+			cb->fill(cb, "\tlevel", sd->level);
+			cb->fill(cb, "\t\tSD_LOAD_BALANCE",
+						sd->flags & SD_LOAD_BALANCE);
+		}
+	}
+	mutex_unlock(&sched_domains_mutex);
+
+	return 0;
+}
+
+static struct cftype domain_files[] = {
+	{
+		.name = "config_level",
+		.read_u64 = domain_cl_read_u64,
+		.write_u64 = domain_cl_write_u64,
+	},
+	{
+		.name = "SD_LOAD_BALANCE",
+		.read_u64 = domain_slb_read_u64,
+		.write_u64 = domain_slb_write_u64,
+	},
+	{
+		.name = "topology",
+		.read_map = domain_topology_show,
+	},
+	{ }	/* terminate */
+};
+
+struct cgroup_subsys domain_cgroup_subsys = {
+	.name		= "domain",
+	.create		= domain_cgroup_create,
+	.destroy	= domain_cgroup_destroy,
+	.can_attach	= domain_cgroup_can_attach,
+	.attach		= domain_cgroup_attach,
+	.exit		= domain_cgroup_exit,
+	.subsys_id	= domain_cgroup_subsys_id,
+	.base_cftypes	= domain_files,
+	.early_init	= 1,
+};
+
 #endif	/* CONFIG_CGROUP_SCHED */
 
 #ifdef CONFIG_CGROUP_CPUACCT
-- 
1.7.4.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ