[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250701003749.50525-8-prakash.sangappa@oracle.com>
Date: Tue, 1 Jul 2025 00:37:49 +0000
From: Prakash Sangappa <prakash.sangappa@...cle.com>
To: linux-kernel@...r.kernel.org
Cc: peterz@...radead.org, rostedt@...dmis.org, mathieu.desnoyers@...icios.com,
tglx@...utronix.de, bigeasy@...utronix.de, kprateek.nayak@....com,
vineethr@...ux.ibm.com
Subject: [PATCH V6 7/7] Introduce a config option for scheduler time slice extension feature
Add a config option to enable schedule time slice extension.
Signed-off-by: Prakash Sangappa <prakash.sangappa@...cle.com>
---
include/linux/entry-common.h | 2 +-
include/linux/sched.h | 8 ++++----
init/Kconfig | 7 +++++++
kernel/rseq.c | 5 ++++-
kernel/sched/core.c | 12 ++++++------
kernel/sched/debug.c | 2 +-
kernel/sched/syscalls.c | 3 ++-
7 files changed, 25 insertions(+), 14 deletions(-)
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index d4fa952e394e..351c9dc159bc 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -402,7 +402,7 @@ static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
CT_WARN_ON(ct_state() != CT_STATE_KERNEL);
/* reschedule if sched delay was granted */
- if (IS_ENABLED(CONFIG_RSEQ) && current->sched_time_delay)
+ if (IS_ENABLED(CONFIG_SCHED_PREEMPT_DELAY) && current->sched_time_delay)
set_tsk_need_resched(current);
if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 90d7989a0185..ca2b461b7662 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -339,7 +339,7 @@ extern void io_schedule_finish(int token);
extern long io_schedule_timeout(long timeout);
extern void io_schedule(void);
extern void hrtick_local_start(u64 delay);
-#ifdef CONFIG_RSEQ
+#ifdef CONFIG_SCHED_PREEMPT_DELAY
extern void update_stat_preempt_delayed(struct task_struct *t);
#endif
@@ -409,7 +409,7 @@ static inline void sched_domains_mutex_lock(void) { }
static inline void sched_domains_mutex_unlock(void) { }
#endif
-#ifdef CONFIG_RSEQ
+#ifdef CONFIG_SCHED_PREEMPT_DELAY
/* Scheduler time slice extension */
extern unsigned int sysctl_sched_preempt_delay_us;
#endif
@@ -572,7 +572,7 @@ struct sched_statistics {
u64 nr_wakeups_passive;
u64 nr_wakeups_idle;
-#ifdef CONFIG_RSEQ
+#ifdef CONFIG_SCHED_PREEMPT_DELAY
u64 nr_preempt_delay_granted;
#endif
@@ -2259,7 +2259,7 @@ static inline bool owner_on_cpu(struct task_struct *owner)
unsigned long sched_cpu_util(int cpu);
#endif /* CONFIG_SMP */
-#ifdef CONFIG_RSEQ
+#ifdef CONFIG_SCHED_PREEMPT_DELAY
extern bool rseq_delay_resched(void);
extern void rseq_delay_resched_fini(void);
diff --git a/init/Kconfig b/init/Kconfig
index ce76e913aa2b..2f5f603d175a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1130,6 +1130,13 @@ config SCHED_MM_CID
def_bool y
depends on SMP && RSEQ
+config SCHED_PREEMPT_DELAY
+ def_bool y
+ depends on SMP && RSEQ
+ help
+ This feature enables a thread to request extending its time slice on
+ the cpu by delaying preemption.
+
config UCLAMP_TASK_GROUP
bool "Utilization clamping per group of tasks"
depends on CGROUP_SCHED
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 7710a209433b..440fa4002be5 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -448,6 +448,7 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
force_sigsegv(sig);
}
+#ifdef CONFIG_SCHED_PREEMPT_DELAY
bool rseq_delay_resched(void)
{
struct task_struct *t = current;
@@ -526,6 +527,7 @@ void rseq_delay_schedule(struct task_struct *tsk)
}
#endif
}
+#endif /* CONFIG_SCHED_PREEMPT_DELAY */
#ifdef CONFIG_DEBUG_RSEQ
@@ -581,7 +583,8 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
if (flags & RSEQ_FLAG_QUERY_CS_FLAGS) {
u32 rseq_csflags = RSEQ_CS_FLAG_DELAY_RESCHED |
RSEQ_CS_FLAG_RESCHEDULED;
- if (!IS_ENABLED(CONFIG_SCHED_HRTICK))
+ if (!IS_ENABLED(CONFIG_SCHED_PREEMPT_DELAY) ||
+ !IS_ENABLED(CONFIG_SCHED_HRTICK))
return -EINVAL;
if (!rseq)
return -EINVAL;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fd572053a955..d28c0e75b4f3 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -149,7 +149,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1;
*/
__read_mostly unsigned int sysctl_sched_nr_migrate = SCHED_NR_MIGRATE_BREAK;
-#ifdef CONFIG_RSEQ
+#ifdef CONFIG_SCHED_PREEMPT_DELAY
/*
* Scheduler time slice extension, duration in microsecs.
* Max value allowed 100us, default is 30us.
@@ -941,7 +941,7 @@ void hrtick_local_start(u64 delay)
rq_unlock(rq, &rf);
}
-#ifdef CONFIG_RSEQ
+#ifdef CONFIG_SCHED_PREEMPT_DELAY
void update_stat_preempt_delayed(struct task_struct *t)
{
schedstat_inc(t->stats.nr_preempt_delay_granted);
@@ -4697,7 +4697,7 @@ static int sysctl_schedstats(const struct ctl_table *table, int write, void *buf
#endif /* CONFIG_SCHEDSTATS */
#ifdef CONFIG_PROC_SYSCTL
-#ifdef CONFIG_RSEQ
+#ifdef CONFIG_SCHED_PREEMPT_DELAY
static int sysctl_sched_preempt_delay(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
@@ -4711,7 +4711,7 @@ static int sysctl_sched_preempt_delay(const struct ctl_table *table, int write,
SCHED_PREEMPT_DELAY_DEFAULT_US);
return err;
}
-#endif /* CONFIG_RSEQ */
+#endif /* CONFIG_SCHED_PREEMPT_DELAY */
#endif /* CONFIG_PROC_SYSCTL */
#ifdef CONFIG_SYSCTL
@@ -4761,7 +4761,7 @@ static const struct ctl_table sched_core_sysctls[] = {
.extra2 = SYSCTL_FOUR,
},
#endif /* CONFIG_NUMA_BALANCING */
-#ifdef CONFIG_RSEQ
+#ifdef CONFIG_SCHED_PREEMPT_DELAY
{
.procname = "sched_preempt_delay_us",
.data = &sysctl_sched_preempt_delay_us,
@@ -4771,7 +4771,7 @@ static const struct ctl_table sched_core_sysctls[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE_HUNDRED,
},
-#endif /* CONFIG_RSEQ */
+#endif /* CONFIG_SCHED_PREEMPT_DELAY */
};
static int __init sched_core_sysctl_init(void)
{
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 3a2efd9505e1..45ae09447624 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -1225,7 +1225,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
P_SCHEDSTAT(nr_wakeups_passive);
P_SCHEDSTAT(nr_wakeups_idle);
-#ifdef CONFIG_RSEQ
+#ifdef CONFIG_SCHED_PREEMPT_DELAY
P_SCHEDSTAT(nr_preempt_delay_granted);
#endif
diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
index d9a4e3a2e064..f86eac7e2b43 100644
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -1379,7 +1379,8 @@ static void do_sched_yield(void)
*/
SYSCALL_DEFINE0(sched_yield)
{
- if (IS_ENABLED(CONFIG_RSEQ) && current->sched_time_delay) {
+ if (IS_ENABLED(CONFIG_SCHED_PREEMPT_DELAY) &&
+ current->sched_time_delay) {
schedule();
return 0;
}
--
2.43.5
Powered by blists - more mailing lists