linux-kernel - Re: [RFC PATCH 2/4] Scheduler time extention

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <2d059175-cfc0-fee2-5bc1-fb750aa5acf9@amd.com>
Date: Wed, 13 Nov 2024 09:27:51 +0530
From: K Prateek Nayak <kprateek.nayak@....com>
To: Prakash Sangappa <prakash.sangappa@...cle.com>,
	<linux-kernel@...r.kernel.org>
CC: <rostedt@...dmis.org>, <peterz@...radead.org>, <tglx@...utronix.de>,
	<daniel.m.jordan@...cle.com>
Subject: Re: [RFC PATCH 2/4] Scheduler time extention

Hello Prakash,

Full disclaimer: I haven't looked closely at the complete series but ...

On 11/13/2024 5:31 AM, Prakash Sangappa wrote:
> [..snip..]
> @@ -99,8 +100,12 @@ __always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
>   
>   		local_irq_enable_exit_to_user(ti_work);
>   
> -		if (ti_work & _TIF_NEED_RESCHED)
> -			schedule();
> +		if (ti_work & _TIF_NEED_RESCHED) {
> +			if (irq && taskshrd_delay_resched())
> +				clear_tsk_need_resched(current);

Suppose the current task had requested for a delayed resched but an RT
task's wakeup sets the TIF_NEED_RESCHED flag via an IPI, doesn't this
clear the flag indiscriminately and allow the task to run for an
extended amount of time? Am I missing something?

> +			else
> +				schedule();
> +		}
>   
>   		if (ti_work & _TIF_UPROBE)
>   			uprobe_notify_resume(regs);
> @@ -208,7 +213,7 @@ static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *reg
>   {
>   	syscall_exit_to_user_mode_prepare(regs);
>   	local_irq_disable_exit_to_user();
> -	exit_to_user_mode_prepare(regs);
> +	exit_to_user_mode_prepare(regs, false);
>   }
>   
>   void syscall_exit_to_user_mode_work(struct pt_regs *regs)
> @@ -232,7 +237,7 @@ noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
>   noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs)
>   {
>   	instrumentation_begin();
> -	exit_to_user_mode_prepare(regs);
> +	exit_to_user_mode_prepare(regs, true);
>   	instrumentation_end();
>   	exit_to_user_mode();
>   }
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 71b6396db118..713c43491403 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -815,6 +815,7 @@ void update_rq_clock(struct rq *rq)
>   
>   static void hrtick_clear(struct rq *rq)
>   {
> +	taskshrd_delay_resched_tick();
>   	if (hrtimer_active(&rq->hrtick_timer))
>   		hrtimer_cancel(&rq->hrtick_timer);
>   }
> @@ -830,6 +831,8 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
>   
>   	WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
>   
> +	taskshrd_delay_resched_tick();
> +
>   	rq_lock(rq, &rf);
>   	update_rq_clock(rq);
>   	rq->curr->sched_class->task_tick(rq, rq->curr, 1);
> @@ -903,6 +906,16 @@ void hrtick_start(struct rq *rq, u64 delay)
>   
>   #endif /* CONFIG_SMP */
>   
> +void hrtick_local_start(u64 delay)
> +{
> +	struct rq *rq = this_rq();
> +	struct rq_flags rf;
> +
> +	rq_lock(rq, &rf);

You can use guard(rq_lock)(rq) and avoid declaring rf.

> +	hrtick_start(rq, delay);
> +	rq_unlock(rq, &rf);
> +}
> +
>   static void hrtick_rq_init(struct rq *rq)
>   {
>   #ifdef CONFIG_SMP
> @@ -6645,6 +6658,9 @@ static void __sched notrace __schedule(int sched_mode)
>   picked:
>   	clear_tsk_need_resched(prev);
>   	clear_preempt_need_resched();
> +#ifdef CONFIG_TASKSHARED
> +	prev->taskshrd_sched_delay = 0;
> +#endif
>   #ifdef CONFIG_SCHED_DEBUG
>   	rq->last_seen_need_resched_ns = 0;
>   #endif
> diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
> index d23c34b8b3eb..0904667924d8 100644
> --- a/kernel/sched/syscalls.c
> +++ b/kernel/sched/syscalls.c
> @@ -1419,6 +1419,13 @@ static void do_sched_yield(void)
>    */
>   SYSCALL_DEFINE0(sched_yield)
>   {
> +
> +#ifdef  CONFIG_TASKSHARED
> +	if (current->taskshrd_sched_delay) {
> +		schedule();
> +		return 0;
> +	}
> +#endif
>   	do_sched_yield();
>   	return 0;
>   }
> diff --git a/mm/task_shared.c b/mm/task_shared.c
> index cea45d913b91..575b335d6879 100644
> --- a/mm/task_shared.c
> +++ b/mm/task_shared.c
> @@ -268,6 +268,56 @@ static int task_ushared_alloc(void)
>   	return ret;
>   }
>   
> +bool taskshrd_delay_resched(void)
> +{
> +	struct task_struct *t = current;
> +	struct task_ushrd_struct *shrdp = t->task_ushrd;
> +
> +	if (!IS_ENABLED(CONFIG_SCHED_HRTICK))
> +		return false;
> +
> +	if(shrdp == NULL || shrdp->kaddr == NULL)
> +		return false;
> +
> +	if (t->taskshrd_sched_delay)
> +		return false;
> +
> +	if (!(shrdp->kaddr->ts.sched_delay))
> +		return false;
> +
> +	shrdp->kaddr->ts.sched_delay = 0;
> +	t->taskshrd_sched_delay = 1;
> +
> +	return true;

Perhaps this needs to also check
"rq->nr_running == rq->cfs.h_nr_running" since I believe it only makes
sense for fair tasks to request that extra slice?

-- 
Thanks and Regards,
Prateek

> +}
> +
> +void taskshrd_delay_resched_fini(void)
> +{
> +#ifdef CONFIG_SCHED_HRTICK
> +	struct task_struct *t = current;
> +	/*
> +	* IRQs off, guaranteed to return to userspace, start timer on this CPU
> +	* to limit the resched-overdraft.
> +	*
> +	* If your critical section is longer than 50 us you get to keep the
> +	* pieces.
> +	*/
> +	if (t->taskshrd_sched_delay)
> +		hrtick_local_start(50 * NSEC_PER_USEC);
> +#endif
> +}
> +
> +void taskshrd_delay_resched_tick(void)
> +{
> +#ifdef CONFIG_SCHED_HRTICK
> +	struct task_struct *t = current;
> +
> +	if (t->taskshrd_sched_delay) {
> +		set_tsk_need_resched(t);
> +	}
> +#endif
> +}
> +
>   
>   /*
>    * Get Task Shared structure, allocate if needed and return mapped user address.