lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <8faca6ec-26fe-4da5-b632-6562c519f7fe@paulmck-laptop>
Date: Thu, 3 Oct 2024 12:12:14 -0700
From: "Paul E. McKenney" <paulmck@...nel.org>
To: Peter Zijlstra <peterz@...radead.org>
Cc: vschneid@...hat.com, linux-kernel@...r.kernel.org, sfr@...b.auug.org.au,
	linux-next@...r.kernel.org, kernel-team@...a.com
Subject: Re: [BUG almost bisected] Splat in dequeue_rt_stack() and build error

On Thu, Oct 03, 2024 at 08:50:37PM +0200, Peter Zijlstra wrote:
> On Thu, Oct 03, 2024 at 09:04:30AM -0700, Paul E. McKenney wrote:
> > On Thu, Oct 03, 2024 at 04:22:40PM +0200, Peter Zijlstra wrote:
> > > On Thu, Oct 03, 2024 at 05:45:47AM -0700, Paul E. McKenney wrote:
> > > 
> > > > I ran 100*TREE03 for 18 hours each, and got 23 instances of *something*
> > > > happening (and I need to suppress stalls on the repeat).  One of the
> > > > earlier bugs happened early, but sadly not this one.
> > > 
> > > Damn, I don't have the amount of CPU hours available you mention in your
> > > later email. I'll just go up the rounds to 20 minutes and see if
> > > something wants to go bang before I have to shut down the noise
> > > pollution for the day...
> > 
> > Indeed, this was one reason I was soliciting debug patches.  ;-)
> 
> Sooo... I was contemplating if something like the below might perhaps
> help some. It's a bit of a mess (I'll try and clean up if/when it
> actually proves to work), but it compiles and survives a hand full of 1m
> runs.

Thank you very much!  I will give it a spin.

Unless you tell me otherwise, I will allow the current test to complete
(about 12 hours from now), collect any data from it, then start this one.

> I'll try and give it more runs tomorrow when I can power up the big
> machines again -- unless you've already told me it's crap by then :-)

18-hour runs here, so even if I immediately kill the old run and start the
new one, I won't know until 6AM Pacific Time on Friday at the earliest.  ;-)

							Thanx, Paul

> ---
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 43e453ab7e20..1fe850788195 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -7010,20 +7010,20 @@ int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flag
>  }
>  EXPORT_SYMBOL(default_wake_function);
>  
> -void __setscheduler_prio(struct task_struct *p, int prio)
> +const struct sched_class *__setscheduler_class(struct task_struct *p, int prio)
>  {
>  	if (dl_prio(prio))
> -		p->sched_class = &dl_sched_class;
> -	else if (rt_prio(prio))
> -		p->sched_class = &rt_sched_class;
> +		return &dl_sched_class;
> +
> +	if (rt_prio(prio))
> +		return &rt_sched_class;
> +
>  #ifdef CONFIG_SCHED_CLASS_EXT
> -	else if (task_should_scx(p))
> -		p->sched_class = &ext_sched_class;
> +	if (task_should_scx(p))
> +		return &ext_sched_class;
>  #endif
> -	else
> -		p->sched_class = &fair_sched_class;
>  
> -	p->prio = prio;
> +	return &fair_sched_class;
>  }
>  
>  #ifdef CONFIG_RT_MUTEXES
> @@ -7069,7 +7069,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
>  {
>  	int prio, oldprio, queued, running, queue_flag =
>  		DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
> -	const struct sched_class *prev_class;
> +	const struct sched_class *prev_class, *next_class;
>  	struct rq_flags rf;
>  	struct rq *rq;
>  
> @@ -7127,6 +7127,11 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
>  		queue_flag &= ~DEQUEUE_MOVE;
>  
>  	prev_class = p->sched_class;
> +	next_class = __setscheduler_class(p, prio);
> +
> +	if (prev_class != next_class && p->se.sched_delayed)
> +		dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
> +
>  	queued = task_on_rq_queued(p);
>  	running = task_current(rq, p);
>  	if (queued)
> @@ -7164,7 +7169,9 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
>  			p->rt.timeout = 0;
>  	}
>  
> -	__setscheduler_prio(p, prio);
> +	p->sched_class = next_class;
> +	p->prio = prio;
> +
>  	check_class_changing(rq, p, prev_class);
>  
>  	if (queued)
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index ab497fafa7be..c157d4860a3b 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -13177,22 +13177,6 @@ static void attach_task_cfs_rq(struct task_struct *p)
>  static void switched_from_fair(struct rq *rq, struct task_struct *p)
>  {
>  	detach_task_cfs_rq(p);
> -	/*
> -	 * Since this is called after changing class, this is a little weird
> -	 * and we cannot use DEQUEUE_DELAYED.
> -	 */
> -	if (p->se.sched_delayed) {
> -		/* First, dequeue it from its new class' structures */
> -		dequeue_task(rq, p, DEQUEUE_NOCLOCK | DEQUEUE_SLEEP);
> -		/*
> -		 * Now, clean up the fair_sched_class side of things
> -		 * related to sched_delayed being true and that wasn't done
> -		 * due to the generic dequeue not using DEQUEUE_DELAYED.
> -		 */
> -		finish_delayed_dequeue_entity(&p->se);
> -		p->se.rel_deadline = 0;
> -		__block_task(rq, p);
> -	}
>  }
>  
>  static void switched_to_fair(struct rq *rq, struct task_struct *p)
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index b1c3588a8f00..fba524c81c63 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -3797,7 +3797,7 @@ static inline int rt_effective_prio(struct task_struct *p, int prio)
>  
>  extern int __sched_setscheduler(struct task_struct *p, const struct sched_attr *attr, bool user, bool pi);
>  extern int __sched_setaffinity(struct task_struct *p, struct affinity_context *ctx);
> -extern void __setscheduler_prio(struct task_struct *p, int prio);
> +extern const struct sched_class *__setscheduler_class(struct task_struct *p, int prio);
>  extern void set_load_weight(struct task_struct *p, bool update_load);
>  extern void enqueue_task(struct rq *rq, struct task_struct *p, int flags);
>  extern bool dequeue_task(struct rq *rq, struct task_struct *p, int flags);
> diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
> index aa70beee9895..0470bcc3d204 100644
> --- a/kernel/sched/syscalls.c
> +++ b/kernel/sched/syscalls.c
> @@ -529,7 +529,7 @@ int __sched_setscheduler(struct task_struct *p,
>  {
>  	int oldpolicy = -1, policy = attr->sched_policy;
>  	int retval, oldprio, newprio, queued, running;
> -	const struct sched_class *prev_class;
> +	const struct sched_class *prev_class, *next_class;
>  	struct balance_callback *head;
>  	struct rq_flags rf;
>  	int reset_on_fork;
> @@ -706,6 +706,12 @@ int __sched_setscheduler(struct task_struct *p,
>  			queue_flags &= ~DEQUEUE_MOVE;
>  	}
>  
> +	prev_class = p->sched_class;
> +	next_class = __setscheduler_class(p, newprio);
> +
> +	if (prev_class != next_class && p->se.sched_delayed)
> +		dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
> +
>  	queued = task_on_rq_queued(p);
>  	running = task_current(rq, p);
>  	if (queued)
> @@ -713,11 +719,10 @@ int __sched_setscheduler(struct task_struct *p,
>  	if (running)
>  		put_prev_task(rq, p);
>  
> -	prev_class = p->sched_class;
> -
>  	if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
>  		__setscheduler_params(p, attr);
> -		__setscheduler_prio(p, newprio);
> +		p->sched_class = next_class;
> +		p->prio = newprio;
>  	}
>  	__setscheduler_uclamp(p, attr);
>  	check_class_changing(rq, p, prev_class);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ