linux-kernel - Re: sched_setscheduler() vs idle

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 28 May 2015 19:59:20 +0300
From:	Kirill Tkhai <tkhai@...dex.ru>
To:	Peter Zijlstra <peterz@...radead.org>,
	Mike Galbraith <umgwanakikbuti@...il.com>
Cc:	LKML <linux-kernel@...r.kernel.org>, Ingo Molnar <mingo@...e.hu>,
	"ktkhai@...allels.com" <ktkhai@...allels.com>
Subject: Re: sched_setscheduler() vs idle_balance() race

В Чт, 28/05/2015 в 15:53 +0200, Peter Zijlstra пишет:
On Thu, May 28, 2015 at 09:43:52AM +0200, Mike Galbraith wrote:
> > Hi Peter,
> > 
> > I'm not seeing what prevents pull_task() from yanking a task out from
> > under __sched_setscheduler().  A box sprinkling smoldering 3.0 kernel
> > wreckage all over my bugzilla mbox isn't seeing it either ;-)
> 
> Say, how easy can that thing be reproduced?
> 
> The below is compile tested only, but it might just work if I didn't
> miss anything :-)
> 
> 
> ---
>  kernel/sched/core.c | 137 +++++++++++++++++++++++++++++-----------------------
>  1 file changed, 77 insertions(+), 60 deletions(-)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 4eec60757b16..28f1ddc0bef2 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -1000,22 +1000,6 @@ inline int task_curr(const struct task_struct *p)
>  	return cpu_curr(task_cpu(p)) == p;
>  }
>  
> -/*
> - * Can drop rq->lock because from sched_class::switched_from() methods drop it.
> - */
> -static inline void check_class_changed(struct rq *rq, struct task_struct *p,
> -				       const struct sched_class *prev_class,
> -				       int oldprio)
> -{
> -	if (prev_class != p->sched_class) {
> -		if (prev_class->switched_from)
> -			prev_class->switched_from(rq, p);
> -		/* Possble rq->lock 'hole'.  */
> -		p->sched_class->switched_to(rq, p);
> -	} else if (oldprio != p->prio || dl_task(p))
> -		p->sched_class->prio_changed(rq, p, oldprio);
> -}
> -
>  void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
>  {
>  	const struct sched_class *class;
> @@ -3075,12 +3059,38 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
>  
>  	p->prio = prio;
>  
> +	if (prev_class != p->sched_class) {
> +		prev_class->switched_from(rq, p);
>
 
switched_from_XXX() doesn't depend on on_YYY_rq state, so, yeah, this should be safe.

+		/*
> +		 * switched_from() is allowed to drop @rq->lock; which opens a
> +		 * race against load-balancing, however since @p is not
> +		 * currently enqueued it is invisible to the load-balancer.
> +		 *
> +		 * double check @p is still where we thought it was.
> +		 */
> +		WARN_ON_ONCE(task_rq(p) != rq);
> +	}
> +
>  	if (running)
>  		p->sched_class->set_curr_task(rq);
>  	if (queued)
>  		enqueue_task(rq, p, enqueue_flag);
>  
> -	check_class_changed(rq, p, prev_class, oldprio);
> +	/*
> +	 * Both switched_to() and prio_changed() are allowed to drop @rq->lock;
> +	 * which opens a race against load-balancing, and since @p is now
> +	 * enqueued it can indeed be subject to this.
> +	 *
> +	 * This means that any balancing done by these functions must double
> +	 * check a task's rq.
> +	 */
> +	if (prev_class != p->sched_class)
> +		p->sched_class->switched_to(rq, p);
> +	else if (oldprio != p->prio || dl_task(p))
> +		p->sched_class->prio_changed(rq, p, oldprio);
> +	/*
> +	 * It further means we should not rely on @p's rq from here on.
> +	 */
>  out_unlock:
>  	__task_rq_unlock(rq);
>  }
> @@ -3420,7 +3430,7 @@ static bool dl_param_changed(struct task_struct *p,
>  
>  static int __sched_setscheduler(struct task_struct *p,
>  				const struct sched_attr *attr,
> -				bool user)
> +				bool user, bool pi)
>  {
>  	int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
>  		      MAX_RT_PRIO - 1 - attr->sched_priority;
> @@ -3606,18 +3616,20 @@ static int __sched_setscheduler(struct task_struct *p,
>  	p->sched_reset_on_fork = reset_on_fork;
>  	oldprio = p->prio;
>  
> -	/*
> -	 * Take priority boosted tasks into account. If the new
> -	 * effective priority is unchanged, we just store the new
> -	 * normal parameters and do not touch the scheduler class and
> -	 * the runqueue. This will be done when the task deboost
> -	 * itself.
> -	 */
> -	new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
> -	if (new_effective_prio == oldprio) {
> -		__setscheduler_params(p, attr);
> -		task_rq_unlock(rq, p, &flags);
> -		return 0;
> +	if (pi) {
> +		/*
> +		 * Take priority boosted tasks into account. If the new
> +		 * effective priority is unchanged, we just store the new
> +		 * normal parameters and do not touch the scheduler class and
> +		 * the runqueue. This will be done when the task deboost
> +		 * itself.
> +		 */
> +		new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
> +		if (new_effective_prio == oldprio) {
> +			__setscheduler_params(p, attr);
> +			task_rq_unlock(rq, p, &flags);
> +			return 0;
> +		}
>  	}
>  
>  	queued = task_on_rq_queued(p);
> @@ -3628,7 +3640,19 @@ static int __sched_setscheduler(struct task_struct *p,
>  		put_prev_task(rq, p);
>  
>  	prev_class = p->sched_class;
> -	__setscheduler(rq, p, attr, true);
> +	__setscheduler(rq, p, attr, pi);
> +
> +	if (prev_class != p->sched_class) {
> +		prev_class->switched_from(rq, p);
> +		/*
> +		 * switched_from() is allowed to drop @rq->lock; which opens a
> +		 * race against load-balancing, however since @p is not
> +		 * currently enqueued it is invisible to the load-balancer.
> +		 *
> +		 * double check @p is still where we thought it was.
> +		 */
> +		WARN_ON_ONCE(task_rq(p) != rq);
> +	}
>  
>  	if (running)
>  		p->sched_class->set_curr_task(rq);
> @@ -3640,10 +3664,25 @@ static int __sched_setscheduler(struct task_struct *p,
>  		enqueue_task(rq, p, oldprio <= p->prio ? ENQUEUE_HEAD : 0);
>  	}
>  
> -	check_class_changed(rq, p, prev_class, oldprio);
> +	/*
> +	 * Both switched_to() and prio_changed() are allowed to drop @rq->lock;
> +	 * which opens a race against load-balancing, and since @p is now
> +	 * enqueued it can indeed be subject to this.
> +	 *
> +	 * This means that any balancing done by these functions must double
> +	 * check a task's rq.
> +	 */
> +	if (prev_class != p->sched_class)
> +		p->sched_class->switched_to(rq, p);
> +	else if (oldprio != p->prio || dl_task(p))
> +		p->sched_class->prio_changed(rq, p, oldprio);
> +	/*
> +	 * It further means we should not rely on @p's rq from here on.
> +	 */
>  	task_rq_unlock(rq, p, &flags);
>  
> -	rt_mutex_adjust_pi(p);
> +	if (pi)
> +		rt_mutex_adjust_pi(p);
>  
>  	return 0;
>  }
> @@ -3664,7 +3703,7 @@ static int _sched_setscheduler(struct task_struct *p, int policy,
>  		attr.sched_policy = policy;
>  	}
>  
> -	return __sched_setscheduler(p, &attr, check);
> +	return __sched_setscheduler(p, &attr, check, true);
>  }
>  /**
>   * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
> @@ -3685,7 +3724,7 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);
>  
>  int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
>  {
> -	return __sched_setscheduler(p, attr, true);
> +	return __sched_setscheduler(p, attr, true, true);
>  }
>  EXPORT_SYMBOL_GPL(sched_setattr);
>  
> @@ -7346,32 +7385,12 @@ EXPORT_SYMBOL(___might_sleep);
>  #endif
>  
>  #ifdef CONFIG_MAGIC_SYSRQ
> -static void normalize_task(struct rq *rq, struct task_struct *p)
> +void normalize_rt_tasks(void)
>  {
> -	const struct sched_class *prev_class = p->sched_class;
> +	struct task_struct *g, *p;
>  	struct sched_attr attr = {
>  		.sched_policy = SCHED_NORMAL,
>  	};
> -	int old_prio = p->prio;
> -	int queued;
> -
> -	queued = task_on_rq_queued(p);
> -	if (queued)
> -		dequeue_task(rq, p, 0);
> -	__setscheduler(rq, p, &attr, false);
> -	if (queued) {
> -		enqueue_task(rq, p, 0);
> -		resched_curr(rq);
> -	}
> -
> -	check_class_changed(rq, p, prev_class, old_prio);
> -}
> -
> -void normalize_rt_tasks(void)
> -{
> -	struct task_struct *g, *p;
> -	unsigned long flags;
> -	struct rq *rq;
>  
>  	read_lock(&tasklist_lock);
>  	for_each_process_thread(g, p) {
> @@ -7398,9 +7417,7 @@ void normalize_rt_tasks(void)
>  			continue;
>  		}
>  
> -		rq = task_rq_lock(p, &flags);
> -		normalize_task(rq, p);
> -		task_rq_unlock(rq, p, &flags);
> +		__sched_setscheduler(p, &attr, false, false);
>  	}
>  	read_unlock(&tasklist_lock);
>  }
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/