lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 27 Oct 2016 18:41:06 +0100
From:   Patrick Bellasi <patrick.bellasi@....com>
To:     linux-kernel@...r.kernel.org
Cc:     Ingo Molnar <mingo@...nel.org>,
        Peter Zijlstra <peterz@...radead.org>,
        Vincent Guittot <vincent.guittot@...aro.org>,
        Steve Muckle <steve.muckle@...aro.org>,
        Leo Yan <leo.yan@...aro.org>,
        Viresh Kumar <viresh.kumar@...aro.org>,
        "Rafael J . Wysocki" <rjw@...ysocki.net>,
        Todd Kjos <tkjos@...gle.com>,
        Srinath Sridharan <srinathsr@...gle.com>,
        Andres Oportus <andresoportus@...gle.com>,
        Juri Lelli <juri.lelli@....com>,
        Morten Rasmussen <morten.rasmussen@....com>,
        Dietmar Eggemann <dietmar.eggemann@....com>,
        Chris Redpath <chris.redpath@....com>,
        Robin Randhawa <robin.randhawa@....com>,
        Patrick Bellasi <patrick.bellasi@....com>
Subject: [RFC v2 6/8] sched/tune: compute and keep track of per CPU boost value

When per task boosting is enabled, we can have multiple RUNNABLE tasks
which are concurrently scheduled on the same CPU but each one with a
different boost value.
For example, we could have a scenarios like this:

  Task   SchedTune CGroup   Boost Value
    T1               root            0
    T2       low-priority           10
    T3        interactive           90

In these conditions we expect a CPU to be configured according to a
proper "aggregation" of the required boost values for all the tasks
currently RUNNABLE on this CPU.

A simple aggregation function is the one which tracks the MAX boost
value for all the tasks RUNNABLE on a CPU. This approach allows to
always satisfy the most boost demanding task while at the same time:
 a) boosting all its co-scheduled tasks, thus reducing potential
    side-effects on most boost demanding tasks.
 b) reduces the number of frequency switch requested by schedutil,
    thus being more friendly to architectures with slow frequency
    switching times.

Every time a task enters/exits the RQ of a CPU the max boost value
should be updated considering all the boost groups currently "affecting"
that CPU, i.e. which have at least one RUNNABLE task currently allocated
on a RQ of that CPU.

This patch introduces the required support to keep track of the boost
groups currently affecting a CPU. Thanks to the limited number of boost
groups, a small and memory efficient per-cpu array of boost groups
values (cpu_boost_groups) is updated by schedtune_boostgroup_update()
but only when a schedtune CGroup boost value is changed. However, this
is expected to be an infrequent operation, perhaps done just one time at
system boot time, or whenever user-space need to tune the boost value
for a specific group of tasks (e.g. touch boost behavior on Android
systems).

Cc: Ingo Molnar <mingo@...nel.org>
Cc: Peter Zijlstra <peterz@...radead.org>
Signed-off-by: Patrick Bellasi <patrick.bellasi@....com>
---
 kernel/sched/fair.c |  2 +-
 kernel/sched/tune.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/tune.h | 14 ++++++++++
 3 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 26c3911..313a815 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5581,7 +5581,7 @@ schedtune_margin(unsigned long signal, unsigned int boost)
 static inline unsigned long
 schedtune_cpu_margin(unsigned long util, int cpu)
 {
-	unsigned int boost = get_sysctl_sched_cfs_boost();
+	unsigned int boost = schedtune_cpu_boost(cpu);
 
 	if (boost == 0)
 		return 0UL;
diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c
index 4eaea1d..6a51a4d 100644
--- a/kernel/sched/tune.c
+++ b/kernel/sched/tune.c
@@ -104,6 +104,73 @@ struct boost_groups {
 /* Boost groups affecting each CPU in the system */
 DEFINE_PER_CPU(struct boost_groups, cpu_boost_groups);
 
+static void
+schedtune_cpu_update(int cpu)
+{
+	struct boost_groups *bg;
+	unsigned int boost_max;
+	int idx;
+
+	bg = &per_cpu(cpu_boost_groups, cpu);
+
+	/* The root boost group is always active */
+	boost_max = bg->group[0].boost;
+	for (idx = 1; idx < boostgroups_max; ++idx) {
+		/*
+		 * A boost group affects a CPU only if it has
+		 * RUNNABLE tasks on that CPU
+		 */
+		if (bg->group[idx].tasks == 0)
+			continue;
+		boost_max = max(boost_max, bg->group[idx].boost);
+	}
+
+	bg->boost_max = boost_max;
+}
+
+static void
+schedtune_boostgroup_update(int idx, int boost)
+{
+	struct boost_groups *bg;
+	int cur_boost_max;
+	int old_boost;
+	int cpu;
+
+	/* Update per CPU boost groups */
+	for_each_possible_cpu(cpu) {
+		bg = &per_cpu(cpu_boost_groups, cpu);
+
+		/*
+		 * Keep track of current boost values to compute the per CPU
+		 * maximum only when it has been affected by the new value of
+		 * the updated boost group
+		 */
+		cur_boost_max = bg->boost_max;
+		old_boost = bg->group[idx].boost;
+
+		/* Update the boost value of this boost group */
+		bg->group[idx].boost = boost;
+
+		/* Check if this update increase current max */
+		if (boost > cur_boost_max && bg->group[idx].tasks) {
+			bg->boost_max = boost;
+			continue;
+		}
+
+		/* Check if this update has decreased current max */
+		if (cur_boost_max == old_boost && old_boost > boost)
+			schedtune_cpu_update(cpu);
+	}
+}
+
+int schedtune_cpu_boost(int cpu)
+{
+	struct boost_groups *bg;
+
+	bg = &per_cpu(cpu_boost_groups, cpu);
+	return bg->boost_max;
+}
+
 static u64
 boost_read(struct cgroup_subsys_state *css, struct cftype *cft)
 {
@@ -123,6 +190,9 @@ boost_write(struct cgroup_subsys_state *css, struct cftype *cft,
 	st->boost = boost;
 	if (css == &root_schedtune.css)
 		sysctl_sched_cfs_boost = boost;
+	/* Update CPU boost */
+	schedtune_boostgroup_update(st->idx, st->boost);
+
 	return 0;
 }
 
@@ -199,6 +269,9 @@ schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
 static void
 schedtune_boostgroup_release(struct schedtune *st)
 {
+	/* Reset this boost group */
+	schedtune_boostgroup_update(st->idx, 0);
+
 	/* Keep track of allocated boost groups */
 	allocated_group[st->idx] = NULL;
 }
diff --git a/kernel/sched/tune.h b/kernel/sched/tune.h
index 515d02a..e936b91 100644
--- a/kernel/sched/tune.h
+++ b/kernel/sched/tune.h
@@ -10,4 +10,18 @@
 
 extern struct reciprocal_value schedtune_spc_rdiv;
 
+#ifdef CONFIG_CGROUP_SCHED_TUNE
+
+int schedtune_cpu_boost(int cpu);
+
+#else /* CONFIG_CGROUP_SCHED_TUNE */
+
+#define schedtune_cpu_boost(cpu)  get_sysctl_sched_cfs_boost()
+
+#endif /* CONFIG_CGROUP_SCHED_TUNE */
+
+#else /* CONFIG_SCHED_TUNE */
+
+#define schedtune_cpu_boost(cpu)  0
+
 #endif /* CONFIG_SCHED_TUNE */
-- 
2.10.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ