[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e334aff9-248c-4a00-98e1-7bcb7cdd5e90@linux.ibm.com>
Date: Tue, 22 Oct 2024 22:14:41 +0530
From: Shrikanth Hegde <sshegde@...ux.ibm.com>
To: Peter Zijlstra <peterz@...radead.org>
Cc: linux-kernel@...r.kernel.org, juri.lelli@...hat.com,
vincent.guittot@...aro.org, dietmar.eggemann@....com,
rostedt@...dmis.org, bsegall@...gle.com, mgorman@...e.de,
vschneid@...hat.com, ankur.a.arora@...cle.com, efault@....de,
bigeasy@...utronix.de, tglx@...utronix.de, mingo@...nel.org
Subject: Re: [PATCH 2/5] sched: Add Lazy preemption model
On 10/7/24 13:16, Peter Zijlstra wrote:
> Change fair to use resched_curr_lazy(), which, when the lazy
> preemption model is selected, will set TIF_NEED_RESCHED_LAZY.
>
> This LAZY bit will be promoted to the full NEED_RESCHED bit on tick.
> As such, the average delay between setting LAZY and actually
> rescheduling will be TICK_NSEC/2.
>
> In short, Lazy preemption will delay preemption for fair class but
> will function as Full preemption for all the other classes, most
> notably the realtime (RR/FIFO/DEADLINE) classes.
>
> The goal is to bridge the performance gap with Voluntary, such that we
> might eventually remove that option entirely.
>
> Suggested-by: Thomas Gleixner <tglx@...utronix.de>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
> ---
> include/linux/preempt.h | 8 ++++-
> kernel/Kconfig.preempt | 15 +++++++++
> kernel/sched/core.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++--
> kernel/sched/debug.c | 5 +--
> kernel/sched/fair.c | 6 +--
> kernel/sched/sched.h | 1
> 6 files changed, 103 insertions(+), 8 deletions(-)
>
> --- a/include/linux/preempt.h
> +++ b/include/linux/preempt.h
> @@ -486,6 +486,7 @@ DEFINE_LOCK_GUARD_0(migrate, migrate_dis
> extern bool preempt_model_none(void);
> extern bool preempt_model_voluntary(void);
> extern bool preempt_model_full(void);
> +extern bool preempt_model_lazy(void);
>
> #else
>
> @@ -502,6 +503,11 @@ static inline bool preempt_model_full(vo
> return IS_ENABLED(CONFIG_PREEMPT);
> }
>
> +static inline bool preempt_model_lazy(void)
> +{
> + return IS_ENABLED(CONFIG_PREEMPT_LAZY);
> +}
> +
> #endif
>
> static inline bool preempt_model_rt(void)
> @@ -519,7 +525,7 @@ static inline bool preempt_model_rt(void
> */
> static inline bool preempt_model_preemptible(void)
> {
> - return preempt_model_full() || preempt_model_rt();
> + return preempt_model_full() || preempt_model_lazy() || preempt_model_rt();
> }
>
> #endif /* __LINUX_PREEMPT_H */
> --- a/kernel/Kconfig.preempt
> +++ b/kernel/Kconfig.preempt
> @@ -11,6 +11,9 @@ config PREEMPT_BUILD
> select PREEMPTION
> select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
>
> +config ARCH_HAS_PREEMPT_LAZY
> + bool
> +
> choice
> prompt "Preemption Model"
> default PREEMPT_NONE
> @@ -67,6 +70,18 @@ config PREEMPT
> embedded system with latency requirements in the milliseconds
> range.
>
> +config PREEMPT_LAZY
> + bool "Scheduler controlled preemption model"
> + depends on !ARCH_NO_PREEMPT
> + depends on ARCH_HAS_PREEMPT_LAZY
> + select PREEMPT_BUILD
> + help
> + This option provides a scheduler driven preemption model that
> + is fundamentally similar to full preemption, but is less
> + eager to preempt SCHED_NORMAL tasks in an attempt to
> + reduce lock holder preemption and recover some of the performance
> + gains seen from using Voluntary preemption.
> +
> config PREEMPT_RT
> bool "Fully Preemptible Kernel (Real-Time)"
> depends on EXPERT && ARCH_SUPPORTS_RT
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -1078,6 +1078,9 @@ static void __resched_curr(struct rq *rq
>
> lockdep_assert_rq_held(rq);
>
> + if (is_idle_task(curr) && tif == TIF_NEED_RESCHED_LAZY)
> + tif = TIF_NEED_RESCHED;
> +
> if (cti->flags & ((1 << tif) | _TIF_NEED_RESCHED))
> return;
>
> @@ -1103,6 +1106,32 @@ void resched_curr(struct rq *rq)
> __resched_curr(rq, TIF_NEED_RESCHED);
> }
>
> +#ifdef CONFIG_PREEMPT_DYNAMIC
> +static DEFINE_STATIC_KEY_FALSE(sk_dynamic_preempt_lazy);
> +static __always_inline bool dynamic_preempt_lazy(void)
> +{
> + return static_branch_unlikely(&sk_dynamic_preempt_lazy);
> +}
> +#else
> +static __always_inline bool dynamic_preempt_lazy(void)
> +{
> + return IS_ENABLED(PREEMPT_LAZY);
I had to make it CONFIG_PREEMPT_LAZY for lazy preemption to work
on systems where CONFIG_PREEMPT_DYNAMIC=n.
> +}
> +#endif
> +
> +static __always_inline int tif_need_resched_lazy(void)
> +{
> + if (dynamic_preempt_lazy())
> + return TIF_NEED_RESCHED_LAZY;
> +
> + return TIF_NEED_RESCHED;
> +}
> +
> +void resched_curr_lazy(struct rq *rq)
> +{
> + __resched_curr(rq, tif_need_resched_lazy());
> +}
> +
> void resched_cpu(int cpu)
> {
> struct rq *rq = cpu_rq(cpu);
> @@ -5598,6 +5627,10 @@ void sched_tick(void)
> update_rq_clock(rq);
> hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
> update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure);
> +
> + if (dynamic_preempt_lazy() && tif_test_bit(TIF_NEED_RESCHED_LAZY))
> + resched_curr(rq);
> +
> curr->sched_class->task_tick(rq, curr, 0);
> if (sched_feat(LATENCY_WARN))
> resched_latency = cpu_resched_latency(rq);
> @@ -7334,6 +7367,7 @@ EXPORT_SYMBOL(__cond_resched_rwlock_writ
> * preempt_schedule <- NOP
> * preempt_schedule_notrace <- NOP
> * irqentry_exit_cond_resched <- NOP
> + * dynamic_preempt_lazy <- false
> *
> * VOLUNTARY:
> * cond_resched <- __cond_resched
> @@ -7341,6 +7375,7 @@ EXPORT_SYMBOL(__cond_resched_rwlock_writ
> * preempt_schedule <- NOP
> * preempt_schedule_notrace <- NOP
> * irqentry_exit_cond_resched <- NOP
> + * dynamic_preempt_lazy <- false
> *
> * FULL:
> * cond_resched <- RET0
> @@ -7348,6 +7383,15 @@ EXPORT_SYMBOL(__cond_resched_rwlock_writ
> * preempt_schedule <- preempt_schedule
> * preempt_schedule_notrace <- preempt_schedule_notrace
> * irqentry_exit_cond_resched <- irqentry_exit_cond_resched
> + * dynamic_preempt_lazy <- false
> + *
> + * LAZY:
> + * cond_resched <- RET0
> + * might_resched <- RET0
> + * preempt_schedule <- preempt_schedule
> + * preempt_schedule_notrace <- preempt_schedule_notrace
> + * irqentry_exit_cond_resched <- irqentry_exit_cond_resched
> + * dynamic_preempt_lazy <- true
> */
>
> enum {
> @@ -7355,6 +7399,7 @@ enum {
> preempt_dynamic_none,
> preempt_dynamic_voluntary,
> preempt_dynamic_full,
> + preempt_dynamic_lazy,
> };
>
> int preempt_dynamic_mode = preempt_dynamic_undefined;
> @@ -7370,15 +7415,23 @@ int sched_dynamic_mode(const char *str)
> if (!strcmp(str, "full"))
> return preempt_dynamic_full;
>
> +#ifdef CONFIG_ARCH_HAS_PREEMPT_LAZY
> + if (!strcmp(str, "lazy"))
> + return preempt_dynamic_lazy;
> +#endif
> +
> return -EINVAL;
> }
>
> +#define preempt_dynamic_key_enable(f) static_key_enable(&sk_dynamic_##f.key)
> +#define preempt_dynamic_key_disable(f) static_key_disable(&sk_dynamic_##f.key)
> +
> #if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
> #define preempt_dynamic_enable(f) static_call_update(f, f##_dynamic_enabled)
> #define preempt_dynamic_disable(f) static_call_update(f, f##_dynamic_disabled)
> #elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
> -#define preempt_dynamic_enable(f) static_key_enable(&sk_dynamic_##f.key)
> -#define preempt_dynamic_disable(f) static_key_disable(&sk_dynamic_##f.key)
> +#define preempt_dynamic_enable(f) preempt_dynamic_key_enable(f)
> +#define preempt_dynamic_disable(f) preempt_dynamic_key_disable(f)
> #else
> #error "Unsupported PREEMPT_DYNAMIC mechanism"
> #endif
> @@ -7398,6 +7451,7 @@ static void __sched_dynamic_update(int m
> preempt_dynamic_enable(preempt_schedule);
> preempt_dynamic_enable(preempt_schedule_notrace);
> preempt_dynamic_enable(irqentry_exit_cond_resched);
> + preempt_dynamic_key_disable(preempt_lazy);
>
> switch (mode) {
> case preempt_dynamic_none:
> @@ -7407,6 +7461,7 @@ static void __sched_dynamic_update(int m
> preempt_dynamic_disable(preempt_schedule);
> preempt_dynamic_disable(preempt_schedule_notrace);
> preempt_dynamic_disable(irqentry_exit_cond_resched);
> + preempt_dynamic_key_disable(preempt_lazy);
> if (mode != preempt_dynamic_mode)
> pr_info("Dynamic Preempt: none\n");
> break;
> @@ -7418,6 +7473,7 @@ static void __sched_dynamic_update(int m
> preempt_dynamic_disable(preempt_schedule);
> preempt_dynamic_disable(preempt_schedule_notrace);
> preempt_dynamic_disable(irqentry_exit_cond_resched);
> + preempt_dynamic_key_disable(preempt_lazy);
> if (mode != preempt_dynamic_mode)
> pr_info("Dynamic Preempt: voluntary\n");
> break;
> @@ -7429,9 +7485,22 @@ static void __sched_dynamic_update(int m
> preempt_dynamic_enable(preempt_schedule);
> preempt_dynamic_enable(preempt_schedule_notrace);
> preempt_dynamic_enable(irqentry_exit_cond_resched);
> + preempt_dynamic_key_disable(preempt_lazy);
> if (mode != preempt_dynamic_mode)
> pr_info("Dynamic Preempt: full\n");
> break;
> +
> + case preempt_dynamic_lazy:
> + if (!klp_override)
> + preempt_dynamic_disable(cond_resched);
> + preempt_dynamic_disable(might_resched);
> + preempt_dynamic_enable(preempt_schedule);
> + preempt_dynamic_enable(preempt_schedule_notrace);
> + preempt_dynamic_enable(irqentry_exit_cond_resched);
> + preempt_dynamic_key_enable(preempt_lazy);
> + if (mode != preempt_dynamic_mode)
> + pr_info("Dynamic Preempt: lazy\n");
> + break;
> }
>
> preempt_dynamic_mode = mode;
> @@ -7494,6 +7563,8 @@ static void __init preempt_dynamic_init(
> sched_dynamic_update(preempt_dynamic_none);
> } else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) {
> sched_dynamic_update(preempt_dynamic_voluntary);
> + } else if (IS_ENABLED(CONFIG_PREEMPT_LAZY)) {
> + sched_dynamic_update(preempt_dynamic_lazy);
> } else {
> /* Default static call setting, nothing to do */
> WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT));
> @@ -7514,6 +7585,7 @@ static void __init preempt_dynamic_init(
> PREEMPT_MODEL_ACCESSOR(none);
> PREEMPT_MODEL_ACCESSOR(voluntary);
> PREEMPT_MODEL_ACCESSOR(full);
> +PREEMPT_MODEL_ACCESSOR(lazy);
>
> #else /* !CONFIG_PREEMPT_DYNAMIC: */
>
> --- a/kernel/sched/debug.c
> +++ b/kernel/sched/debug.c
> @@ -245,11 +245,12 @@ static ssize_t sched_dynamic_write(struc
> static int sched_dynamic_show(struct seq_file *m, void *v)
> {
> static const char * preempt_modes[] = {
> - "none", "voluntary", "full"
> + "none", "voluntary", "full", "lazy",
> };
> + int j = ARRAY_SIZE(preempt_modes) - !IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY);
> int i;
>
> - for (i = 0; i < ARRAY_SIZE(preempt_modes); i++) {
> + for (i = 0; i < j; i++) {
> if (preempt_dynamic_mode == i)
> seq_puts(m, "(");
> seq_puts(m, preempt_modes[i]);
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -1251,7 +1251,7 @@ static void update_curr(struct cfs_rq *c
> return;
>
> if (resched || did_preempt_short(cfs_rq, curr)) {
If there is a long running task, only after it is not eligible, LAZY would be set and
subsequent tick would upgrade it to NR. If one sets sysctl_sched_base_slice to a large
value (max 4seconds), LAZY would set thereafter(max 4 seconds) if there in no wakeup in
that CPU.
If i set sysctl_sched_base_slice=300ms, spawn 2 stress-ng on one CPU, then LAZY bit is
set usually after 300ms of sched_switch if there are no wakeups. Subsequent tick NR is set.
Initially I was thinking, if there is a long running process, then LAZY would be set after
one tick and on subsequent tick NR would set. I was wrong. It might take a long time for LAZY
to be set, and On subsequent tick NR would be set.
That would be expected behavior since one setting sysctl_sched_base_slice know what to expect?
> - resched_curr(rq);
> + resched_curr_lazy(rq);
> clear_buddies(cfs_rq, curr);
> }
> }
> @@ -5677,7 +5677,7 @@ entity_tick(struct cfs_rq *cfs_rq, struc
> * validating it and just reschedule.
> */
> if (queued) {
What's this queued used for? hrtick seems to set it. I haven't understood how it works.
> - resched_curr(rq_of(cfs_rq));
> + resched_curr_lazy(rq_of(cfs_rq));
> return;
> }
> /*
> @@ -8832,7 +8832,7 @@ static void check_preempt_wakeup_fair(st
> return;
>
> preempt:
> - resched_curr(rq);
Is it better to call resched_curr here? When the code arrives here, it wants to
run pse as soon as possible right?
> + resched_curr_lazy(rq);
> }
>
> static struct task_struct *pick_task_fair(struct rq *rq)
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -2692,6 +2692,7 @@ extern void init_sched_rt_class(void);
> extern void init_sched_fair_class(void);
>
> extern void resched_curr(struct rq *rq);
> +extern void resched_curr_lazy(struct rq *rq);
> extern void resched_cpu(int cpu);
>
> extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
>
>
>
Powered by blists - more mailing lists