[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20170529210302.26868-7-nicolas.pitre@linaro.org>
Date: Mon, 29 May 2017 17:03:01 -0400
From: Nicolas Pitre <nicolas.pitre@...aro.org>
To: Ingo Molnar <mingo@...hat.com>,
Peter Zijlstra <peterz@...radead.org>
Cc: linux-kernel@...r.kernel.org
Subject: [PATCH 6/7] sched/rt: make it configurable
On most small systems where user space is tightly controlled, the realtime
scheduling class can often be dispensed with to reduce the kernel footprint.
Let's make it configurable.
Signed-off-by: Nicolas Pitre <nico@...aro.org>
---
include/linux/init_task.h | 15 +++++++++++----
include/linux/sched.h | 2 ++
include/linux/sched/rt.h | 4 ++--
init/Kconfig | 14 ++++++++++++--
kernel/sched/Makefile | 4 ++--
kernel/sched/core.c | 42 +++++++++++++++++++++++++++++++++++++++---
kernel/sched/debug.c | 2 ++
kernel/sched/sched.h | 7 +++++--
kernel/sched/stop_task.c | 4 +++-
kernel/sysctl.c | 4 +++-
kernel/time/posix-cpu-timers.c | 6 +++++-
11 files changed, 86 insertions(+), 18 deletions(-)
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index e049526bc1..6befc0aa61 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -225,6 +225,16 @@ extern struct cred init_cred;
#define INIT_TASK_SECURITY
#endif
+#ifdef CONFIG_SCHED_RT
+#define INIT_TASK_RT(tsk) \
+ .rt = { \
+ .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \
+ .time_slice = RR_TIMESLICE, \
+ },
+#else
+#define INIT_TASK_RT(tsk)
+#endif
+
/*
* INIT_TASK is used to set up the first task table, touch at
* your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -250,10 +260,7 @@ extern struct cred init_cred;
.se = { \
.group_node = LIST_HEAD_INIT(tsk.se.group_node), \
}, \
- .rt = { \
- .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \
- .time_slice = RR_TIMESLICE, \
- }, \
+ INIT_TASK_RT(tsk) \
.tasks = LIST_HEAD_INIT(tsk.tasks), \
INIT_PUSHABLE_TASKS(tsk) \
INIT_CGROUP_SCHED(tsk) \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ba0c203669..71a43480ed 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -518,7 +518,9 @@ struct task_struct {
const struct sched_class *sched_class;
struct sched_entity se;
+#ifdef CONFIG_SCHED_RT
struct sched_rt_entity rt;
+#endif
#ifdef CONFIG_CGROUP_SCHED
struct task_group *sched_task_group;
#endif
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
index f93329aba3..f2d636582d 100644
--- a/include/linux/sched/rt.h
+++ b/include/linux/sched/rt.h
@@ -7,7 +7,7 @@ struct task_struct;
static inline int rt_prio(int prio)
{
- if (unlikely(prio < MAX_RT_PRIO))
+ if (IS_ENABLED(CONFIG_SCHED_RT) && unlikely(prio < MAX_RT_PRIO))
return 1;
return 0;
}
@@ -17,7 +17,7 @@ static inline int rt_task(struct task_struct *p)
return rt_prio(p->prio);
}
-#ifdef CONFIG_RT_MUTEXES
+#if defined(CONFIG_RT_MUTEXES) && defined(CONFIG_SCHED_RT)
/*
* Must hold either p->pi_lock or task_rq(p)->lock.
*/
diff --git a/init/Kconfig b/init/Kconfig
index f73e3f0940..3bcd49f576 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -687,7 +687,7 @@ config TREE_RCU_TRACE
config RCU_BOOST
bool "Enable RCU priority boosting"
- depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT
+ depends on SCHED_RT && RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT
default n
help
This option boosts the priority of preempted RCU readers that
@@ -1090,7 +1090,7 @@ config CFS_BANDWIDTH
config RT_GROUP_SCHED
bool "Group scheduling for SCHED_RR/FIFO"
- depends on CGROUP_SCHED
+ depends on CGROUP_SCHED && SCHED_RT
default n
help
This feature lets you explicitly allocate real CPU bandwidth
@@ -1303,8 +1303,17 @@ config SCHED_AUTOGROUP
desktop applications. Task group autogeneration is currently based
upon task session.
+config SCHED_RT
+ bool "Real Time Task Scheduling" if EXPERT
+ default y
+ help
+ This adds the sched_rt scheduling class to the kernel providing
+ support for the SCHED_FIFO and SCHED_RR policies. You might want
+ to disable this to reduce the kernel size. If unsure say y.
+
config SCHED_DL
bool "Deadline Task Scheduling" if EXPERT
+ depends on SCHED_RT
default y
help
This adds the sched_dl scheduling class to the kernel providing
@@ -1632,6 +1641,7 @@ config BASE_FULL
config FUTEX
bool "Enable futex support" if EXPERT
default y
+ depends on SCHED_RT
select RT_MUTEXES
help
Disabling this option will cause the kernel to be built without
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 3bd6a7c1cc..bccbef85e5 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -16,8 +16,8 @@ CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
endif
obj-y += core.o loadavg.o clock.o cputime.o
-obj-y += wait.o swait.o completion.o idle.o
-obj-y += idle_task.o fair.o rt.o
+obj-y += wait.o swait.o completion.o idle.o idle_task.o fair.o
+obj-$(CONFIG_SCHED_RT) += rt.o
obj-$(CONFIG_SCHED_DL) += deadline.o $(if $(CONFIG_SMP),cpudeadline.o)
obj-$(CONFIG_SMP) += cpupri.o topology.o stop_task.o
obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a7b004e440..3dd6fce750 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -640,6 +640,7 @@ bool sched_can_stop_tick(struct rq *rq)
return false;
#endif
+#ifdef CONFIG_SCHED_RT
/*
* If there are more than one RR tasks, we need the tick to effect the
* actual RR behaviour.
@@ -658,6 +659,7 @@ bool sched_can_stop_tick(struct rq *rq)
fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running;
if (fifo_nr_running)
return true;
+#endif
/*
* If there are no DL,RR/FIFO tasks, there must only be CFS tasks left;
@@ -1586,7 +1588,7 @@ void sched_set_stop_task(int cpu, struct task_struct *stop)
* Reset it back to a normal scheduling class so that
* it can die in pieces.
*/
- old_stop->sched_class = &rt_sched_class;
+ old_stop->sched_class = stop_sched_class.next;
}
}
@@ -2182,11 +2184,13 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
__dl_clear_params(p);
#endif
+#ifdef CONFIG_SCHED_RT
INIT_LIST_HEAD(&p->rt.run_list);
p->rt.timeout = 0;
p->rt.time_slice = sched_rr_timeslice;
p->rt.on_rq = 0;
p->rt.on_list = 0;
+#endif
#ifdef CONFIG_PREEMPT_NOTIFIERS
INIT_HLIST_HEAD(&p->preempt_notifiers);
@@ -3716,13 +3720,18 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
p->sched_class = &dl_sched_class;
} else
#endif
+#ifdef CONFIG_SCHED_RT
if (rt_prio(prio)) {
if (oldprio < prio)
queue_flag |= ENQUEUE_HEAD;
p->sched_class = &rt_sched_class;
- } else {
+ } else
+#endif
+ {
+#ifdef CONFIG_SCHED_RT
if (rt_prio(oldprio))
p->rt.timeout = 0;
+#endif
p->sched_class = &fair_sched_class;
}
@@ -3997,6 +4006,23 @@ static int __sched_setscheduler(struct task_struct *p,
/* May grab non-irq protected spin_locks: */
BUG_ON(in_interrupt());
+
+ /*
+ * When the RT scheduling class is disabled, let's make sure kernel threads
+ * wanting RT still get lowest nice value to give them highest available
+ * priority rather than simply returning an error. Obviously we can't test
+ * rt_policy() here as it is always false in that case.
+ */
+ if (!IS_ENABLED(CONFIG_SCHED_RT) && !user &&
+ (policy == SCHED_FIFO || policy == SCHED_RR)) {
+ static const struct sched_attr k_attr = {
+ .sched_policy = SCHED_NORMAL,
+ .sched_nice = MIN_NICE,
+ };
+ attr = &k_attr;
+ policy = SCHED_NORMAL;
+ }
+
recheck:
/* Double check policy once rq lock held: */
if (policy < 0) {
@@ -5726,7 +5752,9 @@ void __init sched_init_smp(void)
sched_init_granularity();
free_cpumask_var(non_isolated_cpus);
+#ifdef CONFIG_SCHED_RT
init_sched_rt_class();
+#endif
#ifdef CONFIG_SCHED_DL
init_sched_dl_class();
#endif
@@ -5832,7 +5860,9 @@ void __init sched_init(void)
}
#endif /* CONFIG_CPUMASK_OFFSTACK */
+#ifdef CONFIG_SCHED_RT
init_rt_bandwidth(&def_rt_bandwidth, global_rt_period(), global_rt_runtime());
+#endif
#ifdef CONFIG_SCHED_DL
init_dl_bandwidth(&def_dl_bandwidth, global_rt_period(), global_rt_runtime());
#endif
@@ -5864,7 +5894,10 @@ void __init sched_init(void)
rq->calc_load_active = 0;
rq->calc_load_update = jiffies + LOAD_FREQ;
init_cfs_rq(&rq->cfs);
+#ifdef CONFIG_SCHED_RT
init_rt_rq(&rq->rt);
+ rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
+#endif
#ifdef CONFIG_SCHED_DL
init_dl_rq(&rq->dl);
#endif
@@ -5895,7 +5928,6 @@ void __init sched_init(void)
init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
#endif /* CONFIG_FAIR_GROUP_SCHED */
- rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
#ifdef CONFIG_RT_GROUP_SCHED
init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
#endif
@@ -6132,7 +6164,9 @@ static DEFINE_SPINLOCK(task_group_lock);
static void sched_free_group(struct task_group *tg)
{
free_fair_sched_group(tg);
+#ifdef CONFIG_SCHED_RT
free_rt_sched_group(tg);
+#endif
autogroup_free(tg);
kmem_cache_free(task_group_cache, tg);
}
@@ -6149,8 +6183,10 @@ struct task_group *sched_create_group(struct task_group *parent)
if (!alloc_fair_sched_group(tg, parent))
goto err;
+#ifdef CONFIG_SCHED_RT
if (!alloc_rt_sched_group(tg, parent))
goto err;
+#endif
return tg;
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 84f80a81ab..c550723ce9 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -645,7 +645,9 @@ do { \
spin_lock_irqsave(&sched_debug_lock, flags);
print_cfs_stats(m, cpu);
+#ifdef CONFIG_SCHED_RT
print_rt_stats(m, cpu);
+#endif
#ifdef CONFIG_SCHED_DL
print_dl_stats(m, cpu);
#endif
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 41dc10b707..38439eefd3 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -132,7 +132,8 @@ static inline int fair_policy(int policy)
static inline int rt_policy(int policy)
{
- return policy == SCHED_FIFO || policy == SCHED_RR;
+ return IS_ENABLED(CONFIG_SCHED_RT) &&
+ (policy == SCHED_FIFO || policy == SCHED_RR);
}
static inline int dl_policy(int policy)
@@ -1447,8 +1448,10 @@ static inline void set_curr_task(struct rq *rq, struct task_struct *curr)
#define sched_class_highest (&stop_sched_class)
#elif defined(CONFIG_SCHED_DL)
#define sched_class_highest (&dl_sched_class)
-#else
+#elif defined(CONFIG_SCHED_RT)
#define sched_class_highest (&rt_sched_class)
+#else
+#define sched_class_highest (&fair_sched_class)
#endif
#define for_each_class(class) \
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 5632dc3e63..7cad8c1540 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -112,8 +112,10 @@ static void update_curr_stop(struct rq *rq)
const struct sched_class stop_sched_class = {
#ifdef CONFIG_SCHED_DL
.next = &dl_sched_class,
-#else
+#elif defined(CONFIG_SCHED_RT)
.next = &rt_sched_class,
+#else
+ .next = &fair_sched_class,
#endif
.enqueue_task = enqueue_task_stop,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4dfba1a76c..1c670f4053 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -401,6 +401,7 @@ static struct ctl_table kern_table[] = {
},
#endif /* CONFIG_NUMA_BALANCING */
#endif /* CONFIG_SCHED_DEBUG */
+#ifdef CONFIG_SCHED_RT
{
.procname = "sched_rt_period_us",
.data = &sysctl_sched_rt_period,
@@ -422,6 +423,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = sched_rr_handler,
},
+#endif
#ifdef CONFIG_SCHED_AUTOGROUP
{
.procname = "sched_autogroup_enabled",
@@ -1071,7 +1073,7 @@ static struct ctl_table kern_table[] = {
.extra1 = &neg_one,
},
#endif
-#ifdef CONFIG_RT_MUTEXES
+#if defined(CONFIG_RT_MUTEXES) && defined(CONFIG_SCHED_RT)
{
.procname = "max_lock_depth",
.data = &max_lock_depth,
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index d2a1e6dd02..010efb0e91 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -790,10 +790,12 @@ static void check_thread_timers(struct task_struct *tsk,
struct list_head *firing)
{
struct list_head *timers = tsk->cpu_timers;
- struct signal_struct *const sig = tsk->signal;
struct task_cputime *tsk_expires = &tsk->cputime_expires;
u64 expires;
+#ifdef CONFIG_SCHED_RT
+ struct signal_struct *const sig = tsk->signal;
unsigned long soft;
+#endif
/*
* If cputime_expires is zero, then there are no active
@@ -811,6 +813,7 @@ static void check_thread_timers(struct task_struct *tsk,
tsk_expires->sched_exp = check_timers_list(++timers, firing,
tsk->se.sum_exec_runtime);
+#ifdef CONFIG_SCHED_RT
/*
* Check for the special case thread timers.
*/
@@ -847,6 +850,7 @@ static void check_thread_timers(struct task_struct *tsk,
__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
}
}
+#endif
if (task_cputime_zero(tsk_expires))
tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER);
}
--
2.9.4
Powered by blists - more mailing lists