linux-kernel - Re: [tip:sched/locking] sched: Add p->pi_lock to task_rq

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 01 Jun 2011 15:58:46 +0200
From:	Arne Jansen <lists@...-jansens.de>
To:	mingo@...hat.com, hpa@...or.com, linux-kernel@...r.kernel.org,
	a.p.zijlstra@...llo.nl, torvalds@...ux-foundation.org,
	efault@....de, npiggin@...nel.dk, akpm@...ux-foundation.org,
	frank.rowand@...sony.com, tglx@...utronix.de, mingo@...e.hu
CC:	linux-tip-commits@...r.kernel.org
Subject: Re: [tip:sched/locking] sched: Add p->pi_lock to task_rq_lock()

Hi,

git bisect blames this commit for a problem I have with v3.0-rc1:
If I printk large amounts of data, the machine locks up.
As the commit does not revert cleanly on top of 3.0, I haven't been
able to double check.
The test I use is simple, just add something like

for (i=0; i < 10000; ++i) printk("test %d\n", i);

and trigger it, in most cases I can see the first 10 printks before
I have to power cycle the machine (sysrq-b does not work anymore).
Attached my .config.

-Arne



On 14.04.2011 10:36, tip-bot for Peter Zijlstra wrote:
> Commit-ID:  0122ec5b02f766c355b3168df53a6c038a24fa0d
> Gitweb:     http://git.kernel.org/tip/0122ec5b02f766c355b3168df53a6c038a24fa0d
> Author:     Peter Zijlstra <a.p.zijlstra@...llo.nl>
> AuthorDate: Tue, 5 Apr 2011 17:23:51 +0200
> Committer:  Ingo Molnar <mingo@...e.hu>
> CommitDate: Thu, 14 Apr 2011 08:52:38 +0200
> 
> sched: Add p->pi_lock to task_rq_lock()
> 
> In order to be able to call set_task_cpu() while either holding
> p->pi_lock or task_rq(p)->lock we need to hold both locks in order to
> stabilize task_rq().
> 
> This makes task_rq_lock() acquire both locks, and have
> __task_rq_lock() validate that p->pi_lock is held. This increases the
> locking overhead for most scheduler syscalls but allows reduction of
> rq->lock contention for some scheduler hot paths (ttwu).
> 
> Reviewed-by: Frank Rowand <frank.rowand@...sony.com>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@...llo.nl>
> Cc: Mike Galbraith <efault@....de>
> Cc: Nick Piggin <npiggin@...nel.dk>
> Cc: Linus Torvalds <torvalds@...ux-foundation.org>
> Cc: Andrew Morton <akpm@...ux-foundation.org>
> Link: http://lkml.kernel.org/r/20110405152729.232781355@chello.nl
> Signed-off-by: Ingo Molnar <mingo@...e.hu>
> ---
>  kernel/sched.c |  103 +++++++++++++++++++++++++------------------------------
>  1 files changed, 47 insertions(+), 56 deletions(-)
> 
> diff --git a/kernel/sched.c b/kernel/sched.c
> index 6b269b7..f155127 100644
> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -599,7 +599,7 @@ static inline int cpu_of(struct rq *rq)
>   * Return the group to which this tasks belongs.
>   *
>   * We use task_subsys_state_check() and extend the RCU verification
> - * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach()
> + * with lockdep_is_held(&p->pi_lock) because cpu_cgroup_attach()
>   * holds that lock for each task it moves into the cgroup. Therefore
>   * by holding that lock, we pin the task to the current cgroup.
>   */
> @@ -609,7 +609,7 @@ static inline struct task_group *task_group(struct task_struct *p)
>  	struct cgroup_subsys_state *css;
>  
>  	css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
> -			lockdep_is_held(&task_rq(p)->lock));
> +			lockdep_is_held(&p->pi_lock));
>  	tg = container_of(css, struct task_group, css);
>  
>  	return autogroup_task_group(p, tg);
> @@ -924,23 +924,15 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
>  #endif /* __ARCH_WANT_UNLOCKED_CTXSW */
>  
>  /*
> - * Check whether the task is waking, we use this to synchronize ->cpus_allowed
> - * against ttwu().
> - */
> -static inline int task_is_waking(struct task_struct *p)
> -{
> -	return unlikely(p->state == TASK_WAKING);
> -}
> -
> -/*
> - * __task_rq_lock - lock the runqueue a given task resides on.
> - * Must be called interrupts disabled.
> + * __task_rq_lock - lock the rq @p resides on.
>   */
>  static inline struct rq *__task_rq_lock(struct task_struct *p)
>  	__acquires(rq->lock)
>  {
>  	struct rq *rq;
>  
> +	lockdep_assert_held(&p->pi_lock);
> +
>  	for (;;) {
>  		rq = task_rq(p);
>  		raw_spin_lock(&rq->lock);
> @@ -951,22 +943,22 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
>  }
>  
>  /*
> - * task_rq_lock - lock the runqueue a given task resides on and disable
> - * interrupts. Note the ordering: we can safely lookup the task_rq without
> - * explicitly disabling preemption.
> + * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
>   */
>  static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
> +	__acquires(p->pi_lock)
>  	__acquires(rq->lock)
>  {
>  	struct rq *rq;
>  
>  	for (;;) {
> -		local_irq_save(*flags);
> +		raw_spin_lock_irqsave(&p->pi_lock, *flags);
>  		rq = task_rq(p);
>  		raw_spin_lock(&rq->lock);
>  		if (likely(rq == task_rq(p)))
>  			return rq;
> -		raw_spin_unlock_irqrestore(&rq->lock, *flags);
> +		raw_spin_unlock(&rq->lock);
> +		raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
>  	}
>  }
>  
> @@ -976,10 +968,13 @@ static void __task_rq_unlock(struct rq *rq)
>  	raw_spin_unlock(&rq->lock);
>  }
>  
> -static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
> +static inline void
> +task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
>  	__releases(rq->lock)
> +	__releases(p->pi_lock)
>  {
> -	raw_spin_unlock_irqrestore(&rq->lock, *flags);
> +	raw_spin_unlock(&rq->lock);
> +	raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
>  }
>  
>  /*
> @@ -2175,6 +2170,11 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
>  	 */
>  	WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
>  			!(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
> +
> +#ifdef CONFIG_LOCKDEP
> +	WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
> +				      lockdep_is_held(&task_rq(p)->lock)));
> +#endif
>  #endif
>  
>  	trace_sched_migrate_task(p, new_cpu);
> @@ -2270,7 +2270,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
>  		ncsw = 0;
>  		if (!match_state || p->state == match_state)
>  			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
> -		task_rq_unlock(rq, &flags);
> +		task_rq_unlock(rq, p, &flags);
>  
>  		/*
>  		 * If it changed from the expected state, bail out now.
> @@ -2652,6 +2652,7 @@ static void __sched_fork(struct task_struct *p)
>   */
>  void sched_fork(struct task_struct *p, int clone_flags)
>  {
> +	unsigned long flags;
>  	int cpu = get_cpu();
>  
>  	__sched_fork(p);
> @@ -2702,9 +2703,9 @@ void sched_fork(struct task_struct *p, int clone_flags)
>  	 *
>  	 * Silence PROVE_RCU.
>  	 */
> -	rcu_read_lock();
> +	raw_spin_lock_irqsave(&p->pi_lock, flags);
>  	set_task_cpu(p, cpu);
> -	rcu_read_unlock();
> +	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
>  
>  #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
>  	if (likely(sched_info_on()))
> @@ -2753,7 +2754,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
>  	set_task_cpu(p, cpu);
>  
>  	p->state = TASK_RUNNING;
> -	task_rq_unlock(rq, &flags);
> +	task_rq_unlock(rq, p, &flags);
>  #endif
>  
>  	rq = task_rq_lock(p, &flags);
> @@ -2765,7 +2766,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
>  	if (p->sched_class->task_woken)
>  		p->sched_class->task_woken(rq, p);
>  #endif
> -	task_rq_unlock(rq, &flags);
> +	task_rq_unlock(rq, p, &flags);
>  	put_cpu();
>  }
>  
> @@ -3490,12 +3491,12 @@ void sched_exec(void)
>  	    likely(cpu_active(dest_cpu)) && need_migrate_task(p)) {
>  		struct migration_arg arg = { p, dest_cpu };
>  
> -		task_rq_unlock(rq, &flags);
> +		task_rq_unlock(rq, p, &flags);
>  		stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
>  		return;
>  	}
>  unlock:
> -	task_rq_unlock(rq, &flags);
> +	task_rq_unlock(rq, p, &flags);
>  }
>  
>  #endif
> @@ -3532,7 +3533,7 @@ unsigned long long task_delta_exec(struct task_struct *p)
>  
>  	rq = task_rq_lock(p, &flags);
>  	ns = do_task_delta_exec(p, rq);
> -	task_rq_unlock(rq, &flags);
> +	task_rq_unlock(rq, p, &flags);
>  
>  	return ns;
>  }
> @@ -3550,7 +3551,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
>  
>  	rq = task_rq_lock(p, &flags);
>  	ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
> -	task_rq_unlock(rq, &flags);
> +	task_rq_unlock(rq, p, &flags);
>  
>  	return ns;
>  }
> @@ -3574,7 +3575,7 @@ unsigned long long thread_group_sched_runtime(struct task_struct *p)
>  	rq = task_rq_lock(p, &flags);
>  	thread_group_cputime(p, &totals);
>  	ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
> -	task_rq_unlock(rq, &flags);
> +	task_rq_unlock(rq, p, &flags);
>  
>  	return ns;
>  }
> @@ -4693,16 +4694,13 @@ EXPORT_SYMBOL(sleep_on_timeout);
>   */
>  void rt_mutex_setprio(struct task_struct *p, int prio)
>  {
> -	unsigned long flags;
>  	int oldprio, on_rq, running;
>  	struct rq *rq;
>  	const struct sched_class *prev_class;
>  
>  	BUG_ON(prio < 0 || prio > MAX_PRIO);
>  
> -	lockdep_assert_held(&p->pi_lock);
> -
> -	rq = task_rq_lock(p, &flags);
> +	rq = __task_rq_lock(p);
>  
>  	trace_sched_pi_setprio(p, prio);
>  	oldprio = p->prio;
> @@ -4727,7 +4725,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
>  		enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
>  
>  	check_class_changed(rq, p, prev_class, oldprio);
> -	task_rq_unlock(rq, &flags);
> +	__task_rq_unlock(rq);
>  }
>  
>  #endif
> @@ -4775,7 +4773,7 @@ void set_user_nice(struct task_struct *p, long nice)
>  			resched_task(rq->curr);
>  	}
>  out_unlock:
> -	task_rq_unlock(rq, &flags);
> +	task_rq_unlock(rq, p, &flags);
>  }
>  EXPORT_SYMBOL(set_user_nice);
>  
> @@ -5003,20 +5001,17 @@ recheck:
>  	/*
>  	 * make sure no PI-waiters arrive (or leave) while we are
>  	 * changing the priority of the task:
> -	 */
> -	raw_spin_lock_irqsave(&p->pi_lock, flags);
> -	/*
> +	 *
>  	 * To be able to change p->policy safely, the appropriate
>  	 * runqueue lock must be held.
>  	 */
> -	rq = __task_rq_lock(p);
> +	rq = task_rq_lock(p, &flags);
>  
>  	/*
>  	 * Changing the policy of the stop threads its a very bad idea
>  	 */
>  	if (p == rq->stop) {
> -		__task_rq_unlock(rq);
> -		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
> +		task_rq_unlock(rq, p, &flags);
>  		return -EINVAL;
>  	}
>  
> @@ -5040,8 +5035,7 @@ recheck:
>  		if (rt_bandwidth_enabled() && rt_policy(policy) &&
>  				task_group(p)->rt_bandwidth.rt_runtime == 0 &&
>  				!task_group_is_autogroup(task_group(p))) {
> -			__task_rq_unlock(rq);
> -			raw_spin_unlock_irqrestore(&p->pi_lock, flags);
> +			task_rq_unlock(rq, p, &flags);
>  			return -EPERM;
>  		}
>  	}
> @@ -5050,8 +5044,7 @@ recheck:
>  	/* recheck policy now with rq lock held */
>  	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
>  		policy = oldpolicy = -1;
> -		__task_rq_unlock(rq);
> -		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
> +		task_rq_unlock(rq, p, &flags);
>  		goto recheck;
>  	}
>  	on_rq = p->on_rq;
> @@ -5073,8 +5066,7 @@ recheck:
>  		activate_task(rq, p, 0);
>  
>  	check_class_changed(rq, p, prev_class, oldprio);
> -	__task_rq_unlock(rq);
> -	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
> +	task_rq_unlock(rq, p, &flags);
>  
>  	rt_mutex_adjust_pi(p);
>  
> @@ -5666,7 +5658,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
>  
>  	rq = task_rq_lock(p, &flags);
>  	time_slice = p->sched_class->get_rr_interval(rq, p);
> -	task_rq_unlock(rq, &flags);
> +	task_rq_unlock(rq, p, &flags);
>  
>  	rcu_read_unlock();
>  	jiffies_to_timespec(time_slice, &t);
> @@ -5889,8 +5881,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
>  	unsigned int dest_cpu;
>  	int ret = 0;
>  
> -	raw_spin_lock_irqsave(&p->pi_lock, flags);
> -	rq = __task_rq_lock(p);
> +	rq = task_rq_lock(p, &flags);
>  
>  	if (!cpumask_intersects(new_mask, cpu_active_mask)) {
>  		ret = -EINVAL;
> @@ -5918,15 +5909,13 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
>  	if (need_migrate_task(p)) {
>  		struct migration_arg arg = { p, dest_cpu };
>  		/* Need help from migration thread: drop lock and wait. */
> -		__task_rq_unlock(rq);
> -		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
> +		task_rq_unlock(rq, p, &flags);
>  		stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
>  		tlb_migrate_finish(p->mm);
>  		return 0;
>  	}
>  out:
> -	__task_rq_unlock(rq);
> -	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
> +	task_rq_unlock(rq, p, &flags);
>  
>  	return ret;
>  }
> @@ -5954,6 +5943,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
>  	rq_src = cpu_rq(src_cpu);
>  	rq_dest = cpu_rq(dest_cpu);
>  
> +	raw_spin_lock(&p->pi_lock);
>  	double_rq_lock(rq_src, rq_dest);
>  	/* Already moved. */
>  	if (task_cpu(p) != src_cpu)
> @@ -5976,6 +5966,7 @@ done:
>  	ret = 1;
>  fail:
>  	double_rq_unlock(rq_src, rq_dest);
> +	raw_spin_unlock(&p->pi_lock);
>  	return ret;
>  }
>  
> @@ -8702,7 +8693,7 @@ void sched_move_task(struct task_struct *tsk)
>  	if (on_rq)
>  		enqueue_task(rq, tsk, 0);
>  
> -	task_rq_unlock(rq, &flags);
> +	task_rq_unlock(rq, tsk, &flags);
>  }
>  #endif /* CONFIG_CGROUP_SCHED */
>  
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/


View attachment "config" of type "text/plain" (79610 bytes)