lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20210912041222.59480-1-yang.yang29@zte.com.cn>
Date:   Sun, 12 Sep 2021 04:12:23 +0000
From:   cgel.zte@...il.com
To:     peterz@...radead.org, yzaikin@...gle.com, liu.hailong6@....com.cn
Cc:     mingo@...hat.com, juri.lelli@...hat.com,
        vincent.guittot@...aro.org, dietmar.eggemann@....com,
        rostedt@...dmis.org, bsegall@...gle.com, mgorman@...e.de,
        bristot@...hat.com, mcgrof@...nel.org, keescook@...omium.org,
        pjt@...gle.com, yang.yang29@....com.cn, joshdon@...gle.com,
        linux-kernel@...r.kernel.org, linux-fsdevel@...r.kernel.org,
        Zeal Robot <zealci@....com.cm>
Subject: [PATCH] sched: Add a new version sysctl to control child runs first

From: Yang Yang <yang.yang29@....com.cn>

The old version sysctl has some problems. First, it allows set value
bigger than 1, which is unnecessary. Second, it didn't follow the
rule of capabilities. Thirdly, it didn't use static key. This new
version fixes all the problems.

Signed-off-by: Yang Yang <yang.yang29@....com.cn>
Reported-by: Zeal Robot <zealci@....com.cm>
---
 include/linux/sched/sysctl.h |  2 ++
 kernel/sched/core.c          | 35 +++++++++++++++++++++++++++++++++++
 kernel/sched/fair.c          |  3 ++-
 kernel/sched/sched.h         |  1 +
 kernel/sysctl.c              |  6 ++++--
 5 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 304f431178fd..0a194d0cf692 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -74,6 +74,8 @@ int sysctl_numa_balancing(struct ctl_table *table, int write, void *buffer,
 		size_t *lenp, loff_t *ppos);
 int sysctl_schedstats(struct ctl_table *table, int write, void *buffer,
 		size_t *lenp, loff_t *ppos);
+int sysctl_child_runs_first(struct ctl_table *table, int write,
+		void *buffer, size_t *lenp, loff_t *ppos);
 
 #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
 extern unsigned int sysctl_sched_energy_aware;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c4462c454ab9..bfea7ecf3b83 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4323,6 +4323,41 @@ int sysctl_schedstats(struct ctl_table *table, int write, void *buffer,
 #endif /* CONFIG_PROC_SYSCTL */
 #endif /* CONFIG_SCHEDSTATS */
 
+DEFINE_STATIC_KEY_FALSE(child_runs_first);
+
+static void set_child_runs_first(bool enabled)
+{
+	if (enabled) {
+		static_branch_enable(&child_runs_first);
+		sysctl_sched_child_runs_first = 1;
+	} else {
+		static_branch_disable(&child_runs_first);
+		sysctl_sched_child_runs_first = 0;
+	}
+}
+
+#ifdef CONFIG_PROC_SYSCTL
+int sysctl_child_runs_first(struct ctl_table *table, int write,
+		void *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ctl_table t;
+	int err;
+	int state = static_branch_likely(&child_runs_first);
+
+	if (write && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	t = *table;
+	t.data = &state;
+	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
+	if (err < 0)
+		return err;
+	if (write)
+		set_child_runs_first(state);
+	return err;
+}
+#endif /* CONFIG_PROC_SYSCTL */
+
 /*
  * fork()/clone()-time setup:
  */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ff69f245b939..f6d4307bd654 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -11099,7 +11099,8 @@ static void task_fork_fair(struct task_struct *p)
 	}
 	place_entity(cfs_rq, se, 1);
 
-	if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
+	if (static_branch_unlikely(&child_runs_first) &&
+	    curr && entity_before(curr, se)) {
 		/*
 		 * Upon rescheduling, sched_class::put_prev_task() will place
 		 * 'current' within the tree based on its new key value.
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 3d3e5793e117..89ac11e48173 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2002,6 +2002,7 @@ static const_debug __maybe_unused unsigned int sysctl_sched_features =
 
 extern struct static_key_false sched_numa_balancing;
 extern struct static_key_false sched_schedstats;
+DECLARE_STATIC_KEY_FALSE(child_runs_first);
 
 static inline u64 global_rt_period(void)
 {
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 083be6af29d7..72063cffc565 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1773,10 +1773,12 @@ int proc_do_static_key(struct ctl_table *table, int write,
 static struct ctl_table kern_table[] = {
 	{
 		.procname	= "sched_child_runs_first",
-		.data		= &sysctl_sched_child_runs_first,
+		.data		= NULL,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= sysctl_child_runs_first,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
 	},
 #ifdef CONFIG_SCHEDSTATS
 	{
-- 
2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ