[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <2d059175-cfc0-fee2-5bc1-fb750aa5acf9@amd.com>
Date: Wed, 13 Nov 2024 09:27:51 +0530
From: K Prateek Nayak <kprateek.nayak@....com>
To: Prakash Sangappa <prakash.sangappa@...cle.com>,
<linux-kernel@...r.kernel.org>
CC: <rostedt@...dmis.org>, <peterz@...radead.org>, <tglx@...utronix.de>,
<daniel.m.jordan@...cle.com>
Subject: Re: [RFC PATCH 2/4] Scheduler time extention
Hello Prakash,
Full disclaimer: I haven't looked closely at the complete series but ...
On 11/13/2024 5:31 AM, Prakash Sangappa wrote:
> [..snip..]
> @@ -99,8 +100,12 @@ __always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
>
> local_irq_enable_exit_to_user(ti_work);
>
> - if (ti_work & _TIF_NEED_RESCHED)
> - schedule();
> + if (ti_work & _TIF_NEED_RESCHED) {
> + if (irq && taskshrd_delay_resched())
> + clear_tsk_need_resched(current);
Suppose the current task had requested for a delayed resched but an RT
task's wakeup sets the TIF_NEED_RESCHED flag via an IPI, doesn't this
clear the flag indiscriminately and allow the task to run for an
extended amount of time? Am I missing something?
> + else
> + schedule();
> + }
>
> if (ti_work & _TIF_UPROBE)
> uprobe_notify_resume(regs);
> @@ -208,7 +213,7 @@ static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *reg
> {
> syscall_exit_to_user_mode_prepare(regs);
> local_irq_disable_exit_to_user();
> - exit_to_user_mode_prepare(regs);
> + exit_to_user_mode_prepare(regs, false);
> }
>
> void syscall_exit_to_user_mode_work(struct pt_regs *regs)
> @@ -232,7 +237,7 @@ noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
> noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs)
> {
> instrumentation_begin();
> - exit_to_user_mode_prepare(regs);
> + exit_to_user_mode_prepare(regs, true);
> instrumentation_end();
> exit_to_user_mode();
> }
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 71b6396db118..713c43491403 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -815,6 +815,7 @@ void update_rq_clock(struct rq *rq)
>
> static void hrtick_clear(struct rq *rq)
> {
> + taskshrd_delay_resched_tick();
> if (hrtimer_active(&rq->hrtick_timer))
> hrtimer_cancel(&rq->hrtick_timer);
> }
> @@ -830,6 +831,8 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
>
> WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
>
> + taskshrd_delay_resched_tick();
> +
> rq_lock(rq, &rf);
> update_rq_clock(rq);
> rq->curr->sched_class->task_tick(rq, rq->curr, 1);
> @@ -903,6 +906,16 @@ void hrtick_start(struct rq *rq, u64 delay)
>
> #endif /* CONFIG_SMP */
>
> +void hrtick_local_start(u64 delay)
> +{
> + struct rq *rq = this_rq();
> + struct rq_flags rf;
> +
> + rq_lock(rq, &rf);
You can use guard(rq_lock)(rq) and avoid declaring rf.
> + hrtick_start(rq, delay);
> + rq_unlock(rq, &rf);
> +}
> +
> static void hrtick_rq_init(struct rq *rq)
> {
> #ifdef CONFIG_SMP
> @@ -6645,6 +6658,9 @@ static void __sched notrace __schedule(int sched_mode)
> picked:
> clear_tsk_need_resched(prev);
> clear_preempt_need_resched();
> +#ifdef CONFIG_TASKSHARED
> + prev->taskshrd_sched_delay = 0;
> +#endif
> #ifdef CONFIG_SCHED_DEBUG
> rq->last_seen_need_resched_ns = 0;
> #endif
> diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
> index d23c34b8b3eb..0904667924d8 100644
> --- a/kernel/sched/syscalls.c
> +++ b/kernel/sched/syscalls.c
> @@ -1419,6 +1419,13 @@ static void do_sched_yield(void)
> */
> SYSCALL_DEFINE0(sched_yield)
> {
> +
> +#ifdef CONFIG_TASKSHARED
> + if (current->taskshrd_sched_delay) {
> + schedule();
> + return 0;
> + }
> +#endif
> do_sched_yield();
> return 0;
> }
> diff --git a/mm/task_shared.c b/mm/task_shared.c
> index cea45d913b91..575b335d6879 100644
> --- a/mm/task_shared.c
> +++ b/mm/task_shared.c
> @@ -268,6 +268,56 @@ static int task_ushared_alloc(void)
> return ret;
> }
>
> +bool taskshrd_delay_resched(void)
> +{
> + struct task_struct *t = current;
> + struct task_ushrd_struct *shrdp = t->task_ushrd;
> +
> + if (!IS_ENABLED(CONFIG_SCHED_HRTICK))
> + return false;
> +
> + if(shrdp == NULL || shrdp->kaddr == NULL)
> + return false;
> +
> + if (t->taskshrd_sched_delay)
> + return false;
> +
> + if (!(shrdp->kaddr->ts.sched_delay))
> + return false;
> +
> + shrdp->kaddr->ts.sched_delay = 0;
> + t->taskshrd_sched_delay = 1;
> +
> + return true;
Perhaps this needs to also check
"rq->nr_running == rq->cfs.h_nr_running" since I believe it only makes
sense for fair tasks to request that extra slice?
--
Thanks and Regards,
Prateek
> +}
> +
> +void taskshrd_delay_resched_fini(void)
> +{
> +#ifdef CONFIG_SCHED_HRTICK
> + struct task_struct *t = current;
> + /*
> + * IRQs off, guaranteed to return to userspace, start timer on this CPU
> + * to limit the resched-overdraft.
> + *
> + * If your critical section is longer than 50 us you get to keep the
> + * pieces.
> + */
> + if (t->taskshrd_sched_delay)
> + hrtick_local_start(50 * NSEC_PER_USEC);
> +#endif
> +}
> +
> +void taskshrd_delay_resched_tick(void)
> +{
> +#ifdef CONFIG_SCHED_HRTICK
> + struct task_struct *t = current;
> +
> + if (t->taskshrd_sched_delay) {
> + set_tsk_need_resched(t);
> + }
> +#endif
> +}
> +
>
> /*
> * Get Task Shared structure, allocate if needed and return mapped user address.
Powered by blists - more mailing lists