[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250724161625.2360309-4-prakash.sangappa@oracle.com>
Date: Thu, 24 Jul 2025 16:16:17 +0000
From: Prakash Sangappa <prakash.sangappa@...cle.com>
To: linux-kernel@...r.kernel.org
Cc: peterz@...radead.org, rostedt@...dmis.org, mathieu.desnoyers@...icios.com,
tglx@...utronix.de, bigeasy@...utronix.de, kprateek.nayak@....com,
vineethr@...ux.ibm.com, prakash.sangappa@...cle.com
Subject: [PATCH V7 03/11] sched: Tunable to specify duration of time slice extension
Add a tunable to specify duration of scheduler time slice extension.
The default will be set to 30us and the max value that can be specified
is 100us. Setting it to 0, disables scheduler time slice extension.
Signed-off-by: Prakash Sangappa <prakash.sangappa@...cle.com>
---
.../admin-guide/kernel-parameters.txt | 8 ++++
Documentation/admin-guide/sysctl/kernel.rst | 8 ++++
include/linux/sched.h | 5 +++
include/uapi/linux/rseq.h | 5 ++-
kernel/rseq.c | 8 +++-
kernel/sched/core.c | 40 +++++++++++++++++++
6 files changed, 70 insertions(+), 4 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 0ee6c5314637..1e0f86cda0db 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6398,6 +6398,14 @@
sched_verbose [KNL,EARLY] Enables verbose scheduler debug messages.
+ sched_preempt_delay_us= [KNL]
+ Scheduler preemption delay in microseconds.
+ Allowed range is 0 to 100us. A thread can request
+ extending its scheduler time slice on the cpu by
+ delaying preemption. Duration of preemption delay
+ granted is specified by this parameter. Setting it
+ to 0 will disable this feature.
+
schedstats= [KNL,X86] Enable or disable scheduled statistics.
Allowed values are enable and disable. This feature
incurs a small amount of overhead in the scheduler
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index dd49a89a62d3..f446347215c3 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -1232,6 +1232,14 @@ Documentation/accounting/delay-accounting.rst. Enabling this feature incurs
a small amount of overhead in the scheduler but is useful for debugging
and performance tuning. It is required by some tools such as iotop.
+sched_preempt_delay_us
+======================
+
+Scheduler preemption delay in microseconds. Allowed range is 0 to 100us.
+A thread can request extending its scheduler time slice on the cpu by
+delaying preemption. Duration of preemption delay granted is specified by
+this parameter. Setting it to 0 will disable this feature.
+
sched_schedstats
================
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5df055f2dd9e..5ba3e33f6252 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -406,6 +406,11 @@ static inline void sched_domains_mutex_lock(void) { }
static inline void sched_domains_mutex_unlock(void) { }
#endif
+#ifdef CONFIG_RSEQ_RESCHED_DELAY
+/* Scheduler time slice extension duration */
+extern unsigned int sysctl_sched_preempt_delay_us;
+#endif
+
struct sched_param {
int sched_priority;
};
diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h
index f4813d931387..015534f064af 100644
--- a/include/uapi/linux/rseq.h
+++ b/include/uapi/linux/rseq.h
@@ -137,8 +137,9 @@ struct rseq {
* this thread.
* - RSEQ_CS_FLAG_DELAY_RESCHED
* Request by user thread to delay preemption. With use
- * of a timer, kernel grants extra cpu time upto 30us for this
- * thread before being rescheduled.
+ * of a timer, kernel grants extra cpu time upto the tunable
+ * 'sched_preempt_delay_us' value for this thread before it gets
+ * rescheduled.
* - RSEQ_CS_FLAG_RESCHEDULED
* Set by kernel if the thread was rescheduled in the extra time
* granted due to request RSEQ_CS_DELAY_RESCHED. This bit is
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 6331b653b402..3107bbc9b77c 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -454,6 +454,9 @@ bool __rseq_delay_resched(void)
struct task_struct *t = current;
u32 flags;
+ if (!sysctl_sched_preempt_delay_us)
+ return false;
+
if (copy_from_user_nofault(&flags, &t->rseq->flags, sizeof(flags)))
return false;
@@ -471,8 +474,9 @@ bool __rseq_delay_resched(void)
void rseq_delay_resched_arm_timer(void)
{
- if (unlikely(current->rseq_delay_resched == RSEQ_RESCHED_DELAY_REQUESTED))
- hrtick_local_start(30 * NSEC_PER_USEC);
+ if (unlikely(sysctl_sched_preempt_delay_us &&
+ current->rseq_delay_resched == RSEQ_RESCHED_DELAY_REQUESTED))
+ hrtick_local_start(sysctl_sched_preempt_delay_us * NSEC_PER_USEC);
}
void rseq_delay_resched_tick(void)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ba1e4f6981cd..03834ac426d0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -149,6 +149,16 @@ __read_mostly int sysctl_resched_latency_warn_once = 1;
*/
__read_mostly unsigned int sysctl_sched_nr_migrate = SCHED_NR_MIGRATE_BREAK;
+#ifdef CONFIG_RSEQ_RESCHED_DELAY
+/*
+ * Scheduler time slice extension, duration in microsecs.
+ * Max value allowed 100us, default is 30us.
+ * If set to 0, scheduler time slice extension is disabled.
+ */
+#define SCHED_PREEMPT_DELAY_DEFAULT_US 30
+__read_mostly unsigned int sysctl_sched_preempt_delay_us = SCHED_PREEMPT_DELAY_DEFAULT_US;
+#endif
+
__read_mostly int scheduler_running;
#ifdef CONFIG_SCHED_CORE
@@ -4678,6 +4688,25 @@ static int sysctl_schedstats(const struct ctl_table *table, int write, void *buf
#endif /* CONFIG_PROC_SYSCTL */
#endif /* CONFIG_SCHEDSTATS */
+#ifdef CONFIG_PROC_SYSCTL
+#ifdef CONFIG_RSEQ_RESCHED_DELAY
+static int sysctl_sched_preempt_delay(const struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int err;
+
+ err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (err < 0)
+ return err;
+ if (sysctl_sched_preempt_delay_us > SCHED_PREEMPT_DELAY_DEFAULT_US)
+ pr_warn("Sched preemption delay set to %d us is higher than the default value %d us\n",
+ sysctl_sched_preempt_delay_us, SCHED_PREEMPT_DELAY_DEFAULT_US);
+
+ return err;
+}
+#endif /* CONFIG_RSEQ_RESCHED_DELAY */
+#endif /* CONFIG_PROC_SYSCTL */
+
#ifdef CONFIG_SYSCTL
static const struct ctl_table sched_core_sysctls[] = {
#ifdef CONFIG_SCHEDSTATS
@@ -4725,6 +4754,17 @@ static const struct ctl_table sched_core_sysctls[] = {
.extra2 = SYSCTL_FOUR,
},
#endif /* CONFIG_NUMA_BALANCING */
+#ifdef CONFIG_RSEQ_RESCHED_DELAY
+ {
+ .procname = "sched_preempt_delay_us",
+ .data = &sysctl_sched_preempt_delay_us,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sysctl_sched_preempt_delay,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE_HUNDRED,
+ },
+#endif /* CONFIG_RSEQ_RESCHED_DELAY */
};
static int __init sched_core_sysctl_init(void)
{
--
2.43.5
Powered by blists - more mailing lists