linux-kernel - Re: [PATCH v2] sched/deadline: make it configurable

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <alpine.LFD.2.20.1708262154150.14288@knanqh.ubzr>
Date:   Sat, 26 Aug 2017 21:56:07 -0400 (EDT)
From:   Nicolas Pitre <nicolas.pitre@...aro.org>
To:     Ingo Molnar <mingo@...nel.org>
cc:     Ingo Molnar <mingo@...hat.com>,
        Peter Zijlstra <peterz@...radead.org>,
        linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2] sched/deadline: make it configurable

Ping.

On Thu, 17 Aug 2017, Nicolas Pitre wrote:

> Ping.
> 
> On Thu, 3 Aug 2017, Nicolas Pitre wrote:
> 
> > On most small systems, the deadline scheduler class is a luxury that
> > rarely gets used if at all. It is preferable to have the ability to
> > configure it out to reduce the kernel size in that case.
> > 
> > Before:
> > 
> > $ size -t kernel/sched/built-in.o
> >    text    data     bss     dec     hex filename
> > [...]
> >   24435    3452     108   27995    6d5b (TOTALS)
> > 
> > With CONFIG_SCHED_DL=n:
> > 
> > $ size -t kernel/sched/built-in.o
> >    text    data     bss     dec     hex filename
> > [...]
> >   18336    3388      92   21816    5538 (TOTALS)
> > 
> > Signed-off-by: Nicolas Pitre <nico@...aro.org>
> > ---
> > 
> > Changes from v1:
> > 
> > - fix for a compilation error found by kbuild test robot
> > 
> >  include/linux/sched.h          |  2 ++
> >  include/linux/sched/deadline.h |  8 +++++++-
> >  init/Kconfig                   |  8 ++++++++
> >  kernel/locking/rtmutex.c       |  6 +++---
> >  kernel/sched/Makefile          |  5 +++--
> >  kernel/sched/core.c            | 15 ++++++++------
> >  kernel/sched/cpudeadline.h     |  7 ++++++-
> >  kernel/sched/debug.c           |  4 ++++
> >  kernel/sched/rt.c              | 13 ++++++++-----
> >  kernel/sched/sched.h           | 44 ++++++++++++++++++++++++++++++++++--------
> >  kernel/sched/stop_task.c       |  4 ++++
> >  11 files changed, 90 insertions(+), 26 deletions(-)
> > 
> > diff --git a/include/linux/sched.h b/include/linux/sched.h
> > index 8337e2db0b..5240f8c0d3 100644
> > --- a/include/linux/sched.h
> > +++ b/include/linux/sched.h
> > @@ -565,7 +565,9 @@ struct task_struct {
> >  #ifdef CONFIG_CGROUP_SCHED
> >  	struct task_group		*sched_task_group;
> >  #endif
> > +#ifdef CONFIG_SCHED_DL
> >  	struct sched_dl_entity		dl;
> > +#endif
> >  
> >  #ifdef CONFIG_PREEMPT_NOTIFIERS
> >  	/* List of struct preempt_notifier: */
> > diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
> > index 975be862e0..8f191a17dd 100644
> > --- a/include/linux/sched/deadline.h
> > +++ b/include/linux/sched/deadline.h
> > @@ -13,7 +13,7 @@
> >  
> >  static inline int dl_prio(int prio)
> >  {
> > -	if (unlikely(prio < MAX_DL_PRIO))
> > +	if (IS_ENABLED(CONFIG_SCHED_DL) && unlikely(prio < MAX_DL_PRIO))
> >  		return 1;
> >  	return 0;
> >  }
> > @@ -28,4 +28,10 @@ static inline bool dl_time_before(u64 a, u64 b)
> >  	return (s64)(a - b) < 0;
> >  }
> >  
> > +#ifdef CONFIG_SCHED_DL
> > +#define dl_deadline(tsk)	(tsk)->dl.deadline
> > +#else
> > +#define dl_deadline(tsk)	0
> > +#endif
> > +
> >  #endif /* _LINUX_SCHED_DEADLINE_H */
> > diff --git a/init/Kconfig b/init/Kconfig
> > index 5f0ef850e8..8a430de9f4 100644
> > --- a/init/Kconfig
> > +++ b/init/Kconfig
> > @@ -959,6 +959,14 @@ config SCHED_AUTOGROUP
> >  	  desktop applications.  Task group autogeneration is currently based
> >  	  upon task session.
> >  
> > +config SCHED_DL
> > +	bool "Deadline Task Scheduling" if EXPERT
> > +	default y
> > +	help
> > +	  This adds the sched_dl scheduling class to the kernel providing
> > +	  support for the SCHED_DEADLINE policy. You might want to disable
> > +	  this to reduce the kernel size. If unsure say y.
> > +
> >  config SYSFS_DEPRECATED
> >  	bool "Enable deprecated sysfs features to support old userspace tools"
> >  	depends on SYSFS
> > diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
> > index 649dc9d395..ebc7c3a610 100644
> > --- a/kernel/locking/rtmutex.c
> > +++ b/kernel/locking/rtmutex.c
> > @@ -228,7 +228,7 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
> >   * Only use with rt_mutex_waiter_{less,equal}()
> >   */
> >  #define task_to_waiter(p)	\
> > -	&(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline }
> > +	&(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = dl_deadline(p) }
> >  
> >  static inline int
> >  rt_mutex_waiter_less(struct rt_mutex_waiter *left,
> > @@ -692,7 +692,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
> >  	 * the values of the node being removed.
> >  	 */
> >  	waiter->prio = task->prio;
> > -	waiter->deadline = task->dl.deadline;
> > +	waiter->deadline = dl_deadline(task);
> >  
> >  	rt_mutex_enqueue(lock, waiter);
> >  
> > @@ -966,7 +966,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
> >  	waiter->task = task;
> >  	waiter->lock = lock;
> >  	waiter->prio = task->prio;
> > -	waiter->deadline = task->dl.deadline;
> > +	waiter->deadline = dl_deadline(task);
> >  
> >  	/* Get the top priority waiter on the lock */
> >  	if (rt_mutex_has_waiters(lock))
> > diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
> > index 53f0164ed3..6ba5664cfb 100644
> > --- a/kernel/sched/Makefile
> > +++ b/kernel/sched/Makefile
> > @@ -16,9 +16,10 @@ CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
> >  endif
> >  
> >  obj-y += core.o loadavg.o clock.o cputime.o
> > -obj-y += idle_task.o fair.o rt.o deadline.o
> > +obj-y += idle_task.o fair.o rt.o
> >  obj-y += wait.o wait_bit.o swait.o completion.o idle.o
> > -obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o
> > +obj-$(CONFIG_SCHED_DL) += deadline.o $(if $(CONFIG_SMP),cpudeadline.o)
> > +obj-$(CONFIG_SMP) += cpupri.o topology.o stop_task.o
> >  obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
> >  obj-$(CONFIG_SCHEDSTATS) += stats.o
> >  obj-$(CONFIG_SCHED_DEBUG) += debug.o
> > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > index 0869b20fba..2fc053f06a 100644
> > --- a/kernel/sched/core.c
> > +++ b/kernel/sched/core.c
> > @@ -636,7 +636,7 @@ bool sched_can_stop_tick(struct rq *rq)
> >  	int fifo_nr_running;
> >  
> >  	/* Deadline tasks, even if single, need the tick */
> > -	if (rq->dl.dl_nr_running)
> > +	if (dl_nr_running(rq))
> >  		return false;
> >  
> >  	/*
> > @@ -2166,10 +2166,12 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
> >  	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
> >  #endif
> >  
> > +#ifdef CONFIG_SCHED_DL
> >  	RB_CLEAR_NODE(&p->dl.rb_node);
> >  	init_dl_task_timer(&p->dl);
> >  	init_dl_inactive_task_timer(&p->dl);
> >  	__dl_clear_params(p);
> > +#endif
> >  
> >  	INIT_LIST_HEAD(&p->rt.run_list);
> >  	p->rt.timeout		= 0;
> > @@ -3695,20 +3697,20 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
> >  	if (dl_prio(prio)) {
> >  		if (!dl_prio(p->normal_prio) ||
> >  		    (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
> > -			p->dl.dl_boosted = 1;
> > +			dl_boosted(p) = 1;
> >  			queue_flag |= ENQUEUE_REPLENISH;
> >  		} else
> > -			p->dl.dl_boosted = 0;
> > +			dl_boosted(p) = 0;
> >  		p->sched_class = &dl_sched_class;
> >  	} else if (rt_prio(prio)) {
> >  		if (dl_prio(oldprio))
> > -			p->dl.dl_boosted = 0;
> > +			dl_boosted(p) = 0;
> >  		if (oldprio < prio)
> >  			queue_flag |= ENQUEUE_HEAD;
> >  		p->sched_class = &rt_sched_class;
> >  	} else {
> >  		if (dl_prio(oldprio))
> > -			p->dl.dl_boosted = 0;
> > +			dl_boosted(p) = 0;
> >  		if (rt_prio(oldprio))
> >  			p->rt.timeout = 0;
> >  		p->sched_class = &fair_sched_class;
> > @@ -5260,7 +5262,8 @@ int cpuset_cpumask_can_shrink(const struct cpumask *cur,
> >  	if (!cpumask_weight(cur))
> >  		return ret;
> >  
> > -	ret = dl_cpuset_cpumask_can_shrink(cur, trial);
> > +	if (IS_ENABLED(CONFIG_SCHED_DL))
> > +		ret = dl_cpuset_cpumask_can_shrink(cur, trial);
> >  
> >  	return ret;
> >  }
> > diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h
> > index f7da8c55bb..5f4c10f837 100644
> > --- a/kernel/sched/cpudeadline.h
> > +++ b/kernel/sched/cpudeadline.h
> > @@ -25,10 +25,15 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
> >  	       struct cpumask *later_mask);
> >  void cpudl_set(struct cpudl *cp, int cpu, u64 dl);
> >  void cpudl_clear(struct cpudl *cp, int cpu);
> > -int cpudl_init(struct cpudl *cp);
> >  void cpudl_set_freecpu(struct cpudl *cp, int cpu);
> >  void cpudl_clear_freecpu(struct cpudl *cp, int cpu);
> > +#ifdef CONFIG_SCHED_DL
> > +int cpudl_init(struct cpudl *cp);
> >  void cpudl_cleanup(struct cpudl *cp);
> > +#else
> > +#define cpudl_init(cp)		0
> > +#define cpudl_cleanup(cp)	do { } while (0)
> > +#endif
> >  #endif /* CONFIG_SMP */
> >  
> >  #endif /* _LINUX_CPUDL_H */
> > diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
> > index 4fa66de52b..f144d50da4 100644
> > --- a/kernel/sched/debug.c
> > +++ b/kernel/sched/debug.c
> > @@ -659,7 +659,9 @@ do {									\
> >  	spin_lock_irqsave(&sched_debug_lock, flags);
> >  	print_cfs_stats(m, cpu);
> >  	print_rt_stats(m, cpu);
> > +#ifdef CONFIG_SCHED_DL
> >  	print_dl_stats(m, cpu);
> > +#endif
> >  
> >  	print_rq(m, rq, cpu);
> >  	spin_unlock_irqrestore(&sched_debug_lock, flags);
> > @@ -967,10 +969,12 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
> >  #endif
> >  	P(policy);
> >  	P(prio);
> > +#ifdef CONFIG_SCHED_DL
> >  	if (p->policy == SCHED_DEADLINE) {
> >  		P(dl.runtime);
> >  		P(dl.deadline);
> >  	}
> > +#endif
> >  #undef PN_SCHEDSTAT
> >  #undef PN
> >  #undef __PN
> > diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
> > index 45caf937ef..d48e75a4a8 100644
> > --- a/kernel/sched/rt.c
> > +++ b/kernel/sched/rt.c
> > @@ -1556,7 +1556,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
> >  		 * to re-start task selection.
> >  		 */
> >  		if (unlikely((rq->stop && task_on_rq_queued(rq->stop)) ||
> > -			     rq->dl.dl_nr_running))
> > +			     dl_nr_running(rq)))
> >  			return RETRY_TASK;
> >  	}
> >  
> > @@ -2716,16 +2716,19 @@ int sched_rt_handler(struct ctl_table *table, int write,
> >  		if (ret)
> >  			goto undo;
> >  
> > -		ret = sched_dl_global_validate();
> > -		if (ret)
> > -			goto undo;
> > +		if (IS_ENABLED(CONFIG_SCHED_DL)) {
> > +			ret = sched_dl_global_validate();
> > +			if (ret)
> > +				goto undo;
> > +		}
> >  
> >  		ret = sched_rt_global_constraints();
> >  		if (ret)
> >  			goto undo;
> >  
> >  		sched_rt_do_global();
> > -		sched_dl_do_global();
> > +		if (IS_ENABLED(CONFIG_SCHED_DL))
> > +			sched_dl_do_global();
> >  	}
> >  	if (0) {
> >  undo:
> > diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> > index eeef1a3086..33a3391943 100644
> > --- a/kernel/sched/sched.h
> > +++ b/kernel/sched/sched.h
> > @@ -137,7 +137,7 @@ static inline int rt_policy(int policy)
> >  
> >  static inline int dl_policy(int policy)
> >  {
> > -	return policy == SCHED_DEADLINE;
> > +	return IS_ENABLED(CONFIG_SCHED_DL) && policy == SCHED_DEADLINE;
> >  }
> >  static inline bool valid_policy(int policy)
> >  {
> > @@ -158,11 +158,15 @@ static inline int task_has_dl_policy(struct task_struct *p)
> >  /*
> >   * Tells if entity @a should preempt entity @b.
> >   */
> > +#ifdef CONFIG_SCHED_DL
> >  static inline bool
> >  dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b)
> >  {
> >  	return dl_time_before(a->deadline, b->deadline);
> >  }
> > +#else
> > +#define dl_entity_preempt(a, b)	false
> > +#endif
> >  
> >  /*
> >   * This is the priority-queue data structure of the RT scheduling class:
> > @@ -247,7 +251,6 @@ bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
> >  }
> >  
> >  void dl_change_utilization(struct task_struct *p, u64 new_bw);
> > -extern void init_dl_bw(struct dl_bw *dl_b);
> >  extern int sched_dl_global_validate(void);
> >  extern void sched_dl_do_global(void);
> >  extern int sched_dl_overflow(struct task_struct *p, int policy,
> > @@ -261,7 +264,27 @@ extern int dl_task_can_attach(struct task_struct *p,
> >  			      const struct cpumask *cs_cpus_allowed);
> >  extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
> >  					const struct cpumask *trial);
> > +extern struct dl_bandwidth def_dl_bandwidth;
> > +
> > +struct dl_rq;
> > +
> > +#ifdef CONFIG_SCHED_DL
> > +#define dl_nr_running(rq)	(rq)->dl.dl_nr_running
> > +#define dl_boosted(tsk)		(tsk)->dl.dl_boosted
> >  extern bool dl_cpu_busy(unsigned int cpu);
> > +extern void init_dl_bw(struct dl_bw *dl_b);
> > +extern void init_sched_dl_class(void);
> > +extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
> > +extern void init_dl_rq(struct dl_rq *dl_rq);
> > +#else
> > +#define dl_nr_running(rq)	0
> > +#define dl_boosted(tsk)		(*(int *)0)
> > +#define dl_cpu_busy(cpu)	false
> > +#define init_dl_bw(dl_b)	do { } while (0)
> > +#define init_sched_dl_class()	do { } while (0)
> > +#define init_dl_bandwidth(...)	do { } while (0)
> > +#define init_dl_rq(dl_rq)	do { } while (0)
> > +#endif
> >  
> >  #ifdef CONFIG_CGROUP_SCHED
> >  
> > @@ -697,7 +720,9 @@ struct rq {
> >  
> >  	struct cfs_rq cfs;
> >  	struct rt_rq rt;
> > +#ifdef CONFIG_SCHED_DL
> >  	struct dl_rq dl;
> > +#endif
> >  
> >  #ifdef CONFIG_FAIR_GROUP_SCHED
> >  	/* list of leaf cfs_rq on this cpu: */
> > @@ -1471,9 +1496,12 @@ static inline void set_curr_task(struct rq *rq, struct task_struct *curr)
> >  
> >  #ifdef CONFIG_SMP
> >  #define sched_class_highest (&stop_sched_class)
> > -#else
> > +#elif defined(CONFIG_SCHED_DL)
> >  #define sched_class_highest (&dl_sched_class)
> > +#else
> > +#define sched_class_highest (&rt_sched_class)
> >  #endif
> > +
> >  #define for_each_class(class) \
> >     for (class = sched_class_highest; class; class = class->next)
> >  
> > @@ -1524,7 +1552,6 @@ extern void sysrq_sched_debug_show(void);
> >  extern void sched_init_granularity(void);
> >  extern void update_max_interval(void);
> >  
> > -extern void init_sched_dl_class(void);
> >  extern void init_sched_rt_class(void);
> >  extern void init_sched_fair_class(void);
> >  
> > @@ -1534,8 +1561,6 @@ extern void resched_cpu(int cpu);
> >  extern struct rt_bandwidth def_rt_bandwidth;
> >  extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
> >  
> > -extern struct dl_bandwidth def_dl_bandwidth;
> > -extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
> >  extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
> >  extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
> >  extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
> > @@ -1966,7 +1991,6 @@ print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
> >  
> >  extern void init_cfs_rq(struct cfs_rq *cfs_rq);
> >  extern void init_rt_rq(struct rt_rq *rt_rq);
> > -extern void init_dl_rq(struct dl_rq *dl_rq);
> >  
> >  extern void cfs_bandwidth_usage_inc(void);
> >  extern void cfs_bandwidth_usage_dec(void);
> > @@ -1985,7 +2009,11 @@ static inline void nohz_balance_exit_idle(unsigned int cpu) { }
> >  #endif
> >  
> >  
> > -#ifdef CONFIG_SMP
> > +#if !defined(CONFIG_SCHED_DL)
> > +void __dl_update(struct dl_bw *dl_b, s64 bw)
> > +{
> > +}
> > +#elif defined(CONFIG_SMP)
> >  static inline
> >  void __dl_update(struct dl_bw *dl_b, s64 bw)
> >  {
> > diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
> > index 9f69fb6308..5632dc3e63 100644
> > --- a/kernel/sched/stop_task.c
> > +++ b/kernel/sched/stop_task.c
> > @@ -110,7 +110,11 @@ static void update_curr_stop(struct rq *rq)
> >   * Simple, special scheduling class for the per-CPU stop tasks:
> >   */
> >  const struct sched_class stop_sched_class = {
> > +#ifdef CONFIG_SCHED_DL
> >  	.next			= &dl_sched_class,
> > +#else
> > +	.next			= &rt_sched_class,
> > +#endif
> >  
> >  	.enqueue_task		= enqueue_task_stop,
> >  	.dequeue_task		= dequeue_task_stop,
> > 
>