linux-kernel - Re: [PATCH v3] sched/deadline: make it configurable

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CANLsYkwD0SEewtOUY3hRC2CZfesB=L-T+L31vVUDYwpM9oNpYw@mail.gmail.com>
Date:   Thu, 5 Oct 2017 13:45:30 -0600
From:   Mathieu Poirier <mathieu.poirier@...aro.org>
To:     Nicolas Pitre <nicolas.pitre@...aro.org>
Cc:     Ingo Molnar <mingo@...nel.org>,
        Peter Zijlstra <peterz@...radead.org>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH v3] sched/deadline: make it configurable

On 3 October 2017 at 20:36, Nicolas Pitre <nicolas.pitre@...aro.org> wrote:
> On most small systems, the deadline scheduler class is a luxury that
> rarely gets used if at all. It is preferable to have the ability to
> configure it out to reduce the kernel size in that case.
>
> Before:
>
> $ size -t kernel/sched/built-in.o
>    text    data     bss     dec     hex filename
> [...]
>   24435    3452     108   27995    6d5b (TOTALS)
>
> With CONFIG_SCHED_DL=n:
>
> $ size -t kernel/sched/built-in.o
>    text    data     bss     dec     hex filename
> [...]
>   18336    3388      92   21816    5538 (TOTALS)
>

As expected deadline.o and cpudeadline.o aren't found in
kernel/sched/built-in.o once deadline scheduling has been configured
out.  You even get an nice error message when trying to spin off a DL
task on a non-DL system:

ERROR: could not set PID 4058 to E: SCHED_DEADLINE - value out of
range / policy not implemented

Compiled and tested on: ARM, ARM64 and x86_64

Tested-by: Mathieu Poirier <mathieu.poirier@...aro.org>

> Signed-off-by: Nicolas Pitre <nico@...aro.org>
> ---
>
> Changes from v2:
>
> - rebased to v4.14-rc2
>
> Changes from v1:
>
> - fix for a compilation error found by kbuild test robot
>
>
>  include/linux/sched.h          |  2 ++
>  include/linux/sched/deadline.h |  8 ++++++-
>  init/Kconfig                   |  8 +++++++
>  kernel/locking/rtmutex.c       |  6 ++---
>  kernel/sched/Makefile          |  5 ++--
>  kernel/sched/core.c            | 15 +++++++-----
>  kernel/sched/cpudeadline.h     |  7 +++++-
>  kernel/sched/debug.c           |  4 ++++
>  kernel/sched/rt.c              | 13 +++++++----
>  kernel/sched/sched.h           | 44 +++++++++++++++++++++++++++++-------
>  kernel/sched/stop_task.c       |  4 ++++
>  11 files changed, 90 insertions(+), 26 deletions(-)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 92fb8dd5a9..00b4bed170 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -565,7 +565,9 @@ struct task_struct {
>  #ifdef CONFIG_CGROUP_SCHED
>         struct task_group               *sched_task_group;
>  #endif
> +#ifdef CONFIG_SCHED_DL
>         struct sched_dl_entity          dl;
> +#endif
>
>  #ifdef CONFIG_PREEMPT_NOTIFIERS
>         /* List of struct preempt_notifier: */
> diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
> index 975be862e0..8f191a17dd 100644
> --- a/include/linux/sched/deadline.h
> +++ b/include/linux/sched/deadline.h
> @@ -13,7 +13,7 @@
>
>  static inline int dl_prio(int prio)
>  {
> -       if (unlikely(prio < MAX_DL_PRIO))
> +       if (IS_ENABLED(CONFIG_SCHED_DL) && unlikely(prio < MAX_DL_PRIO))
>                 return 1;
>         return 0;
>  }
> @@ -28,4 +28,10 @@ static inline bool dl_time_before(u64 a, u64 b)
>         return (s64)(a - b) < 0;
>  }
>
> +#ifdef CONFIG_SCHED_DL
> +#define dl_deadline(tsk)       (tsk)->dl.deadline
> +#else
> +#define dl_deadline(tsk)       0
> +#endif
> +
>  #endif /* _LINUX_SCHED_DEADLINE_H */
> diff --git a/init/Kconfig b/init/Kconfig
> index 78cb246101..f252e0dbee 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -959,6 +959,14 @@ config SCHED_AUTOGROUP
>           desktop applications.  Task group autogeneration is currently based
>           upon task session.
>
> +config SCHED_DL
> +       bool "Deadline Task Scheduling" if EXPERT
> +       default y
> +       help
> +         This adds the sched_dl scheduling class to the kernel providing
> +         support for the SCHED_DEADLINE policy. You might want to disable
> +         this to reduce the kernel size. If unsure say y.
> +
>  config SYSFS_DEPRECATED
>         bool "Enable deprecated sysfs features to support old userspace tools"
>         depends on SYSFS
> diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
> index 6f3dba6e4e..12f5eb1953 100644
> --- a/kernel/locking/rtmutex.c
> +++ b/kernel/locking/rtmutex.c
> @@ -228,7 +228,7 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
>   * Only use with rt_mutex_waiter_{less,equal}()
>   */
>  #define task_to_waiter(p)      \
> -       &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline }
> +       &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = dl_deadline(p) }
>
>  static inline int
>  rt_mutex_waiter_less(struct rt_mutex_waiter *left,
> @@ -680,7 +680,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
>          * the values of the node being removed.
>          */
>         waiter->prio = task->prio;
> -       waiter->deadline = task->dl.deadline;
> +       waiter->deadline = dl_deadline(task);
>
>         rt_mutex_enqueue(lock, waiter);
>
> @@ -954,7 +954,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
>         waiter->task = task;
>         waiter->lock = lock;
>         waiter->prio = task->prio;
> -       waiter->deadline = task->dl.deadline;
> +       waiter->deadline = dl_deadline(task);
>
>         /* Get the top priority waiter on the lock */
>         if (rt_mutex_has_waiters(lock))
> diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
> index 78f54932ea..0d3baba207 100644
> --- a/kernel/sched/Makefile
> +++ b/kernel/sched/Makefile
> @@ -16,9 +16,10 @@ CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
>  endif
>
>  obj-y += core.o loadavg.o clock.o cputime.o
> -obj-y += idle_task.o fair.o rt.o deadline.o
> +obj-y += idle_task.o fair.o rt.o
>  obj-y += wait.o wait_bit.o swait.o completion.o idle.o
> -obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o
> +obj-$(CONFIG_SCHED_DL) += deadline.o $(if $(CONFIG_SMP),cpudeadline.o)
> +obj-$(CONFIG_SMP) += cpupri.o topology.o stop_task.o
>  obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
>  obj-$(CONFIG_SCHEDSTATS) += stats.o
>  obj-$(CONFIG_SCHED_DEBUG) += debug.o
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 18a6966567..8dc1fd7bb7 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -636,7 +636,7 @@ bool sched_can_stop_tick(struct rq *rq)
>         int fifo_nr_running;
>
>         /* Deadline tasks, even if single, need the tick */
> -       if (rq->dl.dl_nr_running)
> +       if (dl_nr_running(rq))
>                 return false;
>
>         /*
> @@ -2175,10 +2175,12 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
>         memset(&p->se.statistics, 0, sizeof(p->se.statistics));
>  #endif
>
> +#ifdef CONFIG_SCHED_DL
>         RB_CLEAR_NODE(&p->dl.rb_node);
>         init_dl_task_timer(&p->dl);
>         init_dl_inactive_task_timer(&p->dl);
>         __dl_clear_params(p);
> +#endif
>
>         INIT_LIST_HEAD(&p->rt.run_list);
>         p->rt.timeout           = 0;
> @@ -3729,20 +3731,20 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
>         if (dl_prio(prio)) {
>                 if (!dl_prio(p->normal_prio) ||
>                     (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
> -                       p->dl.dl_boosted = 1;
> +                       dl_boosted(p) = 1;
>                         queue_flag |= ENQUEUE_REPLENISH;
>                 } else
> -                       p->dl.dl_boosted = 0;
> +                       dl_boosted(p) = 0;
>                 p->sched_class = &dl_sched_class;
>         } else if (rt_prio(prio)) {
>                 if (dl_prio(oldprio))
> -                       p->dl.dl_boosted = 0;
> +                       dl_boosted(p) = 0;
>                 if (oldprio < prio)
>                         queue_flag |= ENQUEUE_HEAD;
>                 p->sched_class = &rt_sched_class;
>         } else {
>                 if (dl_prio(oldprio))
> -                       p->dl.dl_boosted = 0;
> +                       dl_boosted(p) = 0;
>                 if (rt_prio(oldprio))
>                         p->rt.timeout = 0;
>                 p->sched_class = &fair_sched_class;
> @@ -5282,7 +5284,8 @@ int cpuset_cpumask_can_shrink(const struct cpumask *cur,
>         if (!cpumask_weight(cur))
>                 return ret;
>
> -       ret = dl_cpuset_cpumask_can_shrink(cur, trial);
> +       if (IS_ENABLED(CONFIG_SCHED_DL))
> +               ret = dl_cpuset_cpumask_can_shrink(cur, trial);
>
>         return ret;
>  }
> diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h
> index f7da8c55bb..5f4c10f837 100644
> --- a/kernel/sched/cpudeadline.h
> +++ b/kernel/sched/cpudeadline.h
> @@ -25,10 +25,15 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
>                struct cpumask *later_mask);
>  void cpudl_set(struct cpudl *cp, int cpu, u64 dl);
>  void cpudl_clear(struct cpudl *cp, int cpu);
> -int cpudl_init(struct cpudl *cp);
>  void cpudl_set_freecpu(struct cpudl *cp, int cpu);
>  void cpudl_clear_freecpu(struct cpudl *cp, int cpu);
> +#ifdef CONFIG_SCHED_DL
> +int cpudl_init(struct cpudl *cp);
>  void cpudl_cleanup(struct cpudl *cp);
> +#else
> +#define cpudl_init(cp)         0
> +#define cpudl_cleanup(cp)      do { } while (0)
> +#endif
>  #endif /* CONFIG_SMP */
>
>  #endif /* _LINUX_CPUDL_H */
> diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
> index 01217fb5a5..775fa98aec 100644
> --- a/kernel/sched/debug.c
> +++ b/kernel/sched/debug.c
> @@ -706,7 +706,9 @@ do {                                                                        \
>         spin_lock_irqsave(&sched_debug_lock, flags);
>         print_cfs_stats(m, cpu);
>         print_rt_stats(m, cpu);
> +#ifdef CONFIG_SCHED_DL
>         print_dl_stats(m, cpu);
> +#endif
>
>         print_rq(m, rq, cpu);
>         spin_unlock_irqrestore(&sched_debug_lock, flags);
> @@ -1015,10 +1017,12 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
>  #endif
>         P(policy);
>         P(prio);
> +#ifdef CONFIG_SCHED_DL
>         if (p->policy == SCHED_DEADLINE) {
>                 P(dl.runtime);
>                 P(dl.deadline);
>         }
> +#endif
>  #undef PN_SCHEDSTAT
>  #undef PN
>  #undef __PN
> diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
> index 0af5ca9e3e..49c1aa76a4 100644
> --- a/kernel/sched/rt.c
> +++ b/kernel/sched/rt.c
> @@ -1556,7 +1556,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
>                  * to re-start task selection.
>                  */
>                 if (unlikely((rq->stop && task_on_rq_queued(rq->stop)) ||
> -                            rq->dl.dl_nr_running))
> +                            dl_nr_running(rq)))
>                         return RETRY_TASK;
>         }
>
> @@ -2716,16 +2716,19 @@ int sched_rt_handler(struct ctl_table *table, int write,
>                 if (ret)
>                         goto undo;
>
> -               ret = sched_dl_global_validate();
> -               if (ret)
> -                       goto undo;
> +               if (IS_ENABLED(CONFIG_SCHED_DL)) {
> +                       ret = sched_dl_global_validate();
> +                       if (ret)
> +                               goto undo;
> +               }
>
>                 ret = sched_rt_global_constraints();
>                 if (ret)
>                         goto undo;
>
>                 sched_rt_do_global();
> -               sched_dl_do_global();
> +               if (IS_ENABLED(CONFIG_SCHED_DL))
> +                       sched_dl_do_global();
>         }
>         if (0) {
>  undo:
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 14db76cd49..2f40f09aae 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -137,7 +137,7 @@ static inline int rt_policy(int policy)
>
>  static inline int dl_policy(int policy)
>  {
> -       return policy == SCHED_DEADLINE;
> +       return IS_ENABLED(CONFIG_SCHED_DL) && policy == SCHED_DEADLINE;
>  }
>  static inline bool valid_policy(int policy)
>  {
> @@ -158,11 +158,15 @@ static inline int task_has_dl_policy(struct task_struct *p)
>  /*
>   * Tells if entity @a should preempt entity @b.
>   */
> +#ifdef CONFIG_SCHED_DL
>  static inline bool
>  dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b)
>  {
>         return dl_time_before(a->deadline, b->deadline);
>  }
> +#else
> +#define dl_entity_preempt(a, b)        false
> +#endif
>
>  /*
>   * This is the priority-queue data structure of the RT scheduling class:
> @@ -247,7 +251,6 @@ bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
>  }
>
>  void dl_change_utilization(struct task_struct *p, u64 new_bw);
> -extern void init_dl_bw(struct dl_bw *dl_b);
>  extern int sched_dl_global_validate(void);
>  extern void sched_dl_do_global(void);
>  extern int sched_dl_overflow(struct task_struct *p, int policy,
> @@ -261,7 +264,27 @@ extern int dl_task_can_attach(struct task_struct *p,
>                               const struct cpumask *cs_cpus_allowed);
>  extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
>                                         const struct cpumask *trial);
> +extern struct dl_bandwidth def_dl_bandwidth;
> +
> +struct dl_rq;
> +
> +#ifdef CONFIG_SCHED_DL
> +#define dl_nr_running(rq)      (rq)->dl.dl_nr_running
> +#define dl_boosted(tsk)                (tsk)->dl.dl_boosted
>  extern bool dl_cpu_busy(unsigned int cpu);
> +extern void init_dl_bw(struct dl_bw *dl_b);
> +extern void init_sched_dl_class(void);
> +extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
> +extern void init_dl_rq(struct dl_rq *dl_rq);
> +#else
> +#define dl_nr_running(rq)      0
> +#define dl_boosted(tsk)                (*(int *)0)
> +#define dl_cpu_busy(cpu)       false
> +#define init_dl_bw(dl_b)       do { } while (0)
> +#define init_sched_dl_class()  do { } while (0)
> +#define init_dl_bandwidth(...) do { } while (0)
> +#define init_dl_rq(dl_rq)      do { } while (0)
> +#endif
>
>  #ifdef CONFIG_CGROUP_SCHED
>
> @@ -694,7 +717,9 @@ struct rq {
>
>         struct cfs_rq cfs;
>         struct rt_rq rt;
> +#ifdef CONFIG_SCHED_DL
>         struct dl_rq dl;
> +#endif
>
>  #ifdef CONFIG_FAIR_GROUP_SCHED
>         /* list of leaf cfs_rq on this cpu: */
> @@ -1472,9 +1497,12 @@ static inline void set_curr_task(struct rq *rq, struct task_struct *curr)
>
>  #ifdef CONFIG_SMP
>  #define sched_class_highest (&stop_sched_class)
> -#else
> +#elif defined(CONFIG_SCHED_DL)
>  #define sched_class_highest (&dl_sched_class)
> +#else
> +#define sched_class_highest (&rt_sched_class)
>  #endif
> +
>  #define for_each_class(class) \
>     for (class = sched_class_highest; class; class = class->next)
>
> @@ -1525,7 +1553,6 @@ extern void sysrq_sched_debug_show(void);
>  extern void sched_init_granularity(void);
>  extern void update_max_interval(void);
>
> -extern void init_sched_dl_class(void);
>  extern void init_sched_rt_class(void);
>  extern void init_sched_fair_class(void);
>
> @@ -1535,8 +1562,6 @@ extern void resched_cpu(int cpu);
>  extern struct rt_bandwidth def_rt_bandwidth;
>  extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
>
> -extern struct dl_bandwidth def_dl_bandwidth;
> -extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
>  extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
>  extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
>  extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
> @@ -1969,7 +1994,6 @@ print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
>
>  extern void init_cfs_rq(struct cfs_rq *cfs_rq);
>  extern void init_rt_rq(struct rt_rq *rt_rq);
> -extern void init_dl_rq(struct dl_rq *dl_rq);
>
>  extern void cfs_bandwidth_usage_inc(void);
>  extern void cfs_bandwidth_usage_dec(void);
> @@ -1988,7 +2012,11 @@ static inline void nohz_balance_exit_idle(unsigned int cpu) { }
>  #endif
>
>
> -#ifdef CONFIG_SMP
> +#if !defined(CONFIG_SCHED_DL)
> +void __dl_update(struct dl_bw *dl_b, s64 bw)
> +{
> +}
> +#elif defined(CONFIG_SMP)
>  static inline
>  void __dl_update(struct dl_bw *dl_b, s64 bw)
>  {
> diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
> index 9f69fb6308..5632dc3e63 100644
> --- a/kernel/sched/stop_task.c
> +++ b/kernel/sched/stop_task.c
> @@ -110,7 +110,11 @@ static void update_curr_stop(struct rq *rq)
>   * Simple, special scheduling class for the per-CPU stop tasks:
>   */
>  const struct sched_class stop_sched_class = {
> +#ifdef CONFIG_SCHED_DL
>         .next                   = &dl_sched_class,
> +#else
> +       .next                   = &rt_sched_class,
> +#endif
>
>         .enqueue_task           = enqueue_task_stop,
>         .dequeue_task           = dequeue_task_stop,