[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241017052000.99200-3-cpru@amazon.com>
Date: Thu, 17 Oct 2024 00:20:00 -0500
From: Cristian Prundeanu <cpru@...zon.com>
To: <linux-tip-commits@...r.kernel.org>
CC: <linux-kernel@...r.kernel.org>, Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...hat.com>, <x86@...nel.org>,
<linux-arm-kernel@...ts.infradead.org>, Bjoern Doebel <doebel@...zon.com>,
Hazem Mohamed Abuelfotoh <abuehaze@...zon.com>, Geoff Blake
<blakgeof@...zon.com>, Ali Saidi <alisaidi@...zon.com>, Csaba Csoma
<csabac@...zon.com>, Cristian Prundeanu <cpru@...zon.com>,
<stable@...r.kernel.org>
Subject: [PATCH 2/2] [tip: sched/core] sched: Move PLACE_LAG and RUN_TO_PARITY to sysctl
These two scheduler features have a high impact on performance for some
database workloads. Move them to sysctl as they are likely to be modified
and persisted across reboots.
Cc: <stable@...r.kernel.org> # 6.6.x
Fixes: 86bfbb7ce4f6 ("sched/fair: Add lag based placement")
Fixes: 63304558ba5d ("sched/eevdf: Curb wakeup-preemption")
Signed-off-by: Cristian Prundeanu <cpru@...zon.com>
---
include/linux/sched/sysctl.h | 8 ++++++++
kernel/sched/core.c | 13 +++++++++++++
kernel/sched/fair.c | 5 +++--
kernel/sched/features.h | 10 ----------
kernel/sysctl.c | 20 ++++++++++++++++++++
5 files changed, 44 insertions(+), 12 deletions(-)
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 5a64582b086b..0258fba3896a 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -29,4 +29,12 @@ extern int sysctl_numa_balancing_mode;
#define sysctl_numa_balancing_mode 0
#endif
+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
+extern unsigned int sysctl_sched_place_lag_enabled;
+extern unsigned int sysctl_sched_run_to_parity_enabled;
+#else
+#define sysctl_sched_place_lag_enabled 0
+#define sysctl_sched_run_to_parity_enabled 0
+#endif
+
#endif /* _LINUX_SCHED_SYSCTL_H */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 43e453ab7e20..c6bd1bda8c7e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -134,6 +134,19 @@ const_debug unsigned int sysctl_sched_features =
0;
#undef SCHED_FEAT
+#ifdef CONFIG_SYSCTL
+/*
+ * Using the avg_vruntime, do the right thing and preserve lag across
+ * sleep+wake cycles. EEVDF placement strategy #1, #2 if disabled.
+ */
+__read_mostly unsigned int sysctl_sched_place_lag_enabled = 0;
+/*
+ * Inhibit (wakeup) preemption until the current task has either matched the
+ * 0-lag point or until is has exhausted it's slice.
+ */
+__read_mostly unsigned int sysctl_sched_run_to_parity_enabled = 0;
+#endif
+
/*
* Print a warning if need_resched is set for the given duration (if
* LATENCY_WARN is enabled).
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5a621210c9c1..c58b76233f59 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -925,7 +925,8 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
* Once selected, run a task until it either becomes non-eligible or
* until it gets a new slice. See the HACK in set_next_entity().
*/
- if (sched_feat(RUN_TO_PARITY) && curr && curr->vlag == curr->deadline)
+ if (sysctl_sched_run_to_parity_enabled &&
+ curr && curr->vlag == curr->deadline)
return curr;
/* Pick the leftmost entity if it's eligible */
@@ -5280,7 +5281,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
*
* EEVDF: placement strategy #1 / #2
*/
- if (sched_feat(PLACE_LAG) && cfs_rq->nr_running && se->vlag) {
+ if (sysctl_sched_place_lag_enabled && cfs_rq->nr_running && se->vlag) {
struct sched_entity *curr = cfs_rq->curr;
unsigned long load;
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 8a5ca80665b3..b39a9dde0b54 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -1,10 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Using the avg_vruntime, do the right thing and preserve lag across
- * sleep+wake cycles. EEVDF placement strategy #1, #2 if disabled.
- */
-SCHED_FEAT(PLACE_LAG, false)
/*
* Give new tasks half a slice to ease into the competition.
*/
@@ -13,11 +8,6 @@ SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
* Preserve relative virtual deadline on 'migration'.
*/
SCHED_FEAT(PLACE_REL_DEADLINE, true)
-/*
- * Inhibit (wakeup) preemption until the current task has either matched the
- * 0-lag point or until is has exhausted it's slice.
- */
-SCHED_FEAT(RUN_TO_PARITY, false)
/*
* Allow wakeup of tasks with a shorter slice to cancel RUN_TO_PARITY for
* current.
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 79e6cb1d5c48..f435b741654a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2029,6 +2029,26 @@ static struct ctl_table kern_table[] = {
.extra2 = SYSCTL_INT_MAX,
},
#endif
+#ifdef CONFIG_SCHED_DEBUG
+ {
+ .procname = "sched_place_lag_enabled",
+ .data = &sysctl_sched_place_lag_enabled,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "sched_run_to_parity_enabled",
+ .data = &sysctl_sched_run_to_parity_enabled,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+#endif
};
static struct ctl_table vm_table[] = {
--
2.40.1
Powered by blists - more mailing lists