[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ddc80d60-0cea-4e07-a4ef-fb21d5f5a0fa@linux.ibm.com>
Date: Wed, 14 May 2025 16:28:43 +0530
From: Madadi Vineeth Reddy <vineethr@...ux.ibm.com>
To: Prakash Sangappa <prakash.sangappa@...cle.com>
Cc: peterz@...radead.org, rostedt@...dmis.org, mathieu.desnoyers@...icios.com,
tglx@...utronix.de, bigeasy@...utronix.de, kprateek.nayak@....com,
linux-kernel@...r.kernel.org,
Madadi Vineeth Reddy <vineethr@...ux.ibm.com>
Subject: Re: [PATCH V4 1/6] Sched: Scheduler time slice extension
Hi Prakash,
On 14/05/25 03:15, Prakash Sangappa wrote:
> Add support for a thread to request extending its execution time slice on
> the cpu. The extra cpu time granted would help in allowing the thread to
> complete executing the critical section and drop any locks without getting
> preempted. The thread would request this cpu time extension, by setting a
> bit in the restartable sequences(rseq) structure registered with the kernel.
>
> Kernel will grant a 30us extension on the cpu, when it sees the bit set.
> With the help of a timer, kernel force preempts the thread if it is still
> running on the cpu when the 30us timer expires. The thread should yield
> the cpu by making a system call after completing the critical section.
>
> Suggested-by: Peter Ziljstra <peterz@...radead.org>
> Signed-off-by: Prakash Sangappa <prakash.sangappa@...cle.com>
> ---
> include/linux/entry-common.h | 11 +++++--
> include/linux/sched.h | 16 +++++++++++
> include/uapi/linux/rseq.h | 7 +++++
> kernel/entry/common.c | 19 ++++++++----
> kernel/rseq.c | 56 ++++++++++++++++++++++++++++++++++++
> kernel/sched/core.c | 14 +++++++++
> kernel/sched/syscalls.c | 5 ++++
> 7 files changed, 120 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
> index fc61d0205c97..cec343f95210 100644
> --- a/include/linux/entry-common.h
> +++ b/include/linux/entry-common.h
> @@ -303,7 +303,8 @@ void arch_do_signal_or_restart(struct pt_regs *regs);
> * exit_to_user_mode_loop - do any pending work before leaving to user space
> */
> unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
> - unsigned long ti_work);
> + unsigned long ti_work,
> + bool irq);
>
> /**
> * exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required
> @@ -315,7 +316,8 @@ unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
> * EXIT_TO_USER_MODE_WORK are set
> * 4) check that interrupts are still disabled
> */
> -static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
> +static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs,
> + bool irq)
> {
> unsigned long ti_work;
>
> @@ -326,7 +328,10 @@ static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
>
> ti_work = read_thread_flags();
> if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
> - ti_work = exit_to_user_mode_loop(regs, ti_work);
> + ti_work = exit_to_user_mode_loop(regs, ti_work, irq);
> +
> + if (irq)
> + rseq_delay_resched_fini();
>
> arch_exit_to_user_mode_prepare(regs, ti_work);
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index c08fd199be4e..14bf0508bfca 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -339,6 +339,7 @@ extern int __must_check io_schedule_prepare(void);
> extern void io_schedule_finish(int token);
> extern long io_schedule_timeout(long timeout);
> extern void io_schedule(void);
> +extern void hrtick_local_start(u64 delay);
>
> /* wrapper function to trace from this header file */
> DECLARE_TRACEPOINT(sched_set_state_tp);
> @@ -1044,6 +1045,7 @@ struct task_struct {
> /* delay due to memory thrashing */
> unsigned in_thrashing:1;
> #endif
> + unsigned sched_time_delay:1;
Can this be placed in #ifdef CONFIG_RSEQ?
> #ifdef CONFIG_PREEMPT_RT
> struct netdev_xmit net_xmit;
> #endif
[..snip..]
> diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
> index cd38f4e9899d..1b2b64fe0fb1 100644
> --- a/kernel/sched/syscalls.c
> +++ b/kernel/sched/syscalls.c
> @@ -1378,6 +1378,11 @@ static void do_sched_yield(void)
> */
> SYSCALL_DEFINE0(sched_yield)
> {
> + if (IS_ENABLED(CONFIG_RSEQ) && current->sched_time_delay) {
> + schedule();
> + return 0;
> + }
> +
> do_sched_yield();
> return 0;
> }
As mentioned in previous versions, does this not change the semantics for
sched_yield()? Why is this necessary to immediately call schedule() and skip
going through do_sched_yield()?
For a task if a delay is granted on CPU A, but the task migrates to CPU B before
the IRQ-return hook, the timer never fires and the thread might overrun its bonus?
Any thoughts on this?
Thanks,
Madadi Vineeth Reddy
Powered by blists - more mailing lists