linux-kernel - Re: processes hung after sys_renameat, and 'missing' processes

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20120608144602.GA17251@fieldses.org>
Date:	Fri, 8 Jun 2012 10:46:02 -0400
From:	"J. Bruce Fields" <bfields@...ldses.org>
To:	Peter Zijlstra <peterz@...radead.org>
Cc:	Linus Torvalds <torvalds@...ux-foundation.org>,
	Dave Jones <davej@...hat.com>,
	Al Viro <viro@...iv.linux.org.uk>,
	Linux Kernel <linux-kernel@...r.kernel.org>,
	Miklos Szeredi <mszeredi@...e.cz>, Jan Kara <jack@...e.cz>
Subject: Re: processes hung after sys_renameat, and 'missing' processes

On Fri, Jun 08, 2012 at 09:31:38AM +0200, Peter Zijlstra wrote:
> On Thu, 2012-06-07 at 08:30 -0700, Linus Torvalds wrote:
> > On Thu, Jun 7, 2012 at 3:26 AM, Peter Zijlstra <peterz@...radead.org> wrote:
> > > On Wed, 2012-06-06 at 16:31 -0700, Linus Torvalds wrote:
> > >> PeterZ - the fact that we use mutex_lock_nested with
> > >> I_MUTEX_PARENT/I_MUTEX_CHILD/nothing hopefully doesn't screw us up in
> > >> case we get it wrong, right?
> > >
> > > Sadly, if you get that annotation wrong you can annotate an actual
> > > deadlock away.

What's a (contrived as you want) example where that happens?

> > > This the reason you have to be very careful when
> > > annotating stuff.

Or alternatively--what do I need to check before I call
mutex_lock_nested?

--b.

> > 
> > Is that true even if you can see an actual deadlock on the lock *instances*?
> 
> Yep.. :/
> 
> > The point of the locking classes (to me) was always that you could see
> > the deadlock even if it didn't *actually* happen, just *potentially*
> > happened. If the lock classes then mean that that hides an actual
> > deadlock information, that's a big downside.
> 
> Right, so in order to report a deadlock on instances we'd have to do a
> full lock graph walk on every contended lock acquisition. This is quite
> expensive.
> 
> Although I can look into doing that from say the NMI watchdog and/or
> some sysrq key.
> 
> > And yeah, that really means that we absolutely *have* to get the
> > instance data in the lock debug printout, since we can't see deadlocks
> > any other way. 
> 
> Well, typically deadlocks aren't that hard to find once you trigger
> them. The current case of course being the exception to that rule.. 
> 
> > Can you check the compile error that Dave reported for
> > your patch, because right now Dave is running without it, afaik, due
> > to your lock-instance patch not really working in practice.
> 
> Oh, missed that in the thread.. yeah here goes.
> 
> 
> Now also compile tested with CONFIG_LOCK_STAT...
> 
> ---
>  include/linux/lockdep.h     |   44 ++++++++-------
>  include/trace/events/lock.h |    3 -
>  kernel/lockdep.c            |  128 ++++++++++++++++++++++++++++----------------
>  3 files changed, 107 insertions(+), 68 deletions(-)
> 
> --- a/include/linux/lockdep.h
> +++ b/include/linux/lockdep.h
> @@ -211,6 +211,13 @@ struct lock_chain {
>   */
>  #define MAX_LOCKDEP_KEYS		((1UL << MAX_LOCKDEP_KEYS_BITS) - 1)
>  
> +enum held_lock_state {
> +	hls_unheld = 0,
> +	hls_acquiring,
> +	hls_contended,
> +	hls_acquired,
> +};
> +
>  struct held_lock {
>  	/*
>  	 * One-way hash of the dependency chain up to this point. We
> @@ -254,7 +261,10 @@ struct held_lock {
>  	unsigned int read:2;        /* see lock_acquire() comment */
>  	unsigned int check:2;       /* see lock_acquire() comment */
>  	unsigned int hardirqs_off:1;
> -	unsigned int references:11;					/* 32 bits */
> +	unsigned int state:2;	    /* see held_lock_state */
> +									/* 9 bit hole */
> +	unsigned int references:16;
> +									/* 16 bit hole */
>  };
>  
>  /*
> @@ -363,6 +373,18 @@ extern void lockdep_trace_alloc(gfp_t ma
>  
>  #define lockdep_recursing(tsk)	((tsk)->lockdep_recursion)
>  
> +extern void lock_contended(struct lockdep_map *lock, unsigned long ip);
> +extern void lock_acquired(struct lockdep_map *lock, unsigned long ip);
> +
> +#define LOCK_CONTENDED(_lock, try, lock)			\
> +do {								\
> +	if (!try(_lock)) {					\
> +		lock_contended(&(_lock)->dep_map, _RET_IP_);	\
> +		lock(_lock);					\
> +	}							\
> +	lock_acquired(&(_lock)->dep_map, _RET_IP_);			\
> +} while (0)
> +
>  #else /* !LOCKDEP */
>  
>  static inline void lockdep_off(void)
> @@ -414,31 +436,13 @@ struct lock_class_key { };
>  
>  #define lockdep_recursing(tsk)			(0)
>  
> -#endif /* !LOCKDEP */
> -
> -#ifdef CONFIG_LOCK_STAT
> -
> -extern void lock_contended(struct lockdep_map *lock, unsigned long ip);
> -extern void lock_acquired(struct lockdep_map *lock, unsigned long ip);
> -
> -#define LOCK_CONTENDED(_lock, try, lock)			\
> -do {								\
> -	if (!try(_lock)) {					\
> -		lock_contended(&(_lock)->dep_map, _RET_IP_);	\
> -		lock(_lock);					\
> -	}							\
> -	lock_acquired(&(_lock)->dep_map, _RET_IP_);			\
> -} while (0)
> -
> -#else /* CONFIG_LOCK_STAT */
> -
>  #define lock_contended(lockdep_map, ip) do {} while (0)
>  #define lock_acquired(lockdep_map, ip) do {} while (0)
>  
>  #define LOCK_CONTENDED(_lock, try, lock) \
>  	lock(_lock)
>  
> -#endif /* CONFIG_LOCK_STAT */
> +#endif /* !LOCKDEP */
>  
>  #ifdef CONFIG_LOCKDEP
>  
> --- a/include/trace/events/lock.h
> +++ b/include/trace/events/lock.h
> @@ -61,8 +61,6 @@ DEFINE_EVENT(lock, lock_release,
>  	TP_ARGS(lock, ip)
>  );
>  
> -#ifdef CONFIG_LOCK_STAT
> -
>  DEFINE_EVENT(lock, lock_contended,
>  
>  	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
> @@ -78,7 +76,6 @@ DEFINE_EVENT(lock, lock_acquired,
>  );
>  
>  #endif
> -#endif
>  
>  #endif /* _TRACE_LOCK_H */
>  
> --- a/kernel/lockdep.c
> +++ b/kernel/lockdep.c
> @@ -541,10 +541,23 @@ static void print_lockdep_cache(struct l
>  	printk("%s", name);
>  }
>  
> +static void print_lock_state(struct held_lock *hlock)
> +{
> +	switch (hlock->state) {
> +	/* holding an unheld lock is fail!  */
> +	case hls_unheld:	printk("FAIL:    "); break;
> +
> +	case hls_acquiring:	/* fall-through */
> +	case hls_contended:	printk("blocked: "); break;
> +	case hls_acquired:	printk("held:    "); break;
> +	};
> +}
> +
>  static void print_lock(struct held_lock *hlock)
>  {
> +	print_lock_state(hlock);
>  	print_lock_name(hlock_class(hlock));
> -	printk(", at: ");
> +	printk(", instance: %p, at: ", hlock->instance);
>  	print_ip_sym(hlock->acquire_ip);
>  }
>  
> @@ -556,7 +569,7 @@ static void lockdep_print_held_locks(str
>  		printk("no locks held by %s/%d.\n", curr->comm, task_pid_nr(curr));
>  		return;
>  	}
> -	printk("%d lock%s held by %s/%d:\n",
> +	printk("%d lock%s on stack by %s/%d:\n",
>  		depth, depth > 1 ? "s" : "", curr->comm, task_pid_nr(curr));
>  
>  	for (i = 0; i < depth; i++) {
> @@ -3093,6 +3106,7 @@ static int __lock_acquire(struct lockdep
>  	hlock->check = check;
>  	hlock->hardirqs_off = !!hardirqs_off;
>  	hlock->references = references;
> +	hlock->state = hls_acquiring;
>  #ifdef CONFIG_LOCK_STAT
>  	hlock->waittime_stamp = 0;
>  	hlock->holdtime_stamp = lockstat_clock();
> @@ -3607,7 +3621,6 @@ void lockdep_clear_current_reclaim_state
>  	current->lockdep_reclaim_gfp = 0;
>  }
>  
> -#ifdef CONFIG_LOCK_STAT
>  static int
>  print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
>  			   unsigned long ip)
> @@ -3637,14 +3650,73 @@ print_lock_contention_bug(struct task_st
>  	return 0;
>  }
>  
> +#ifdef CONFIG_LOCK_STAT
> +
> +static void lockstat_contended(struct held_lock *hlock, unsigned long ip)
> +{
> +	struct lockdep_map *lock = hlock->instance;
> +	int contention_point, contending_point;
> +	struct lock_class_stats *stats;
> +
> +	hlock->waittime_stamp = lockstat_clock();
> +
> +	contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
> +	contending_point = lock_point(hlock_class(hlock)->contending_point,
> +				      lock->ip);
> +
> +	stats = get_lock_stats(hlock_class(hlock));
> +	if (contention_point < LOCKSTAT_POINTS)
> +		stats->contention_point[contention_point]++;
> +	if (contending_point < LOCKSTAT_POINTS)
> +		stats->contending_point[contending_point]++;
> +	if (lock->cpu != smp_processor_id())
> +		stats->bounces[bounce_contended + !!hlock->read]++;
> +	put_lock_stats(stats);
> +}
> +
> +static void lockstat_acquired(struct held_lock *hlock, unsigned long ip)
> +{
> +	struct lockdep_map *lock = hlock->instance;
> +	struct lock_class_stats *stats;
> +	u64 now, waittime = 0;
> +	int cpu;
> +
> +	cpu = smp_processor_id();
> +	if (hlock->waittime_stamp) {
> +		now = lockstat_clock();
> +		waittime = now - hlock->waittime_stamp;
> +		hlock->holdtime_stamp = now;
> +	}
> +
> +	stats = get_lock_stats(hlock_class(hlock));
> +	if (waittime) {
> +		if (hlock->read)
> +			lock_time_inc(&stats->read_waittime, waittime);
> +		else
> +			lock_time_inc(&stats->write_waittime, waittime);
> +	}
> +	if (lock->cpu != cpu)
> +		stats->bounces[bounce_acquired + !!hlock->read]++;
> +	put_lock_stats(stats);
> +
> +	lock->cpu = cpu;
> +	lock->ip = ip;
> +}
> +
> +#else /* CONFIG_LOCK_STAT */
> +
> +static inline void lockstat_contended(struct held_lock *hlock, unsigned long ip) { }
> +static inline void lockstat_acquired(struct held_lock *hlock, unsigned long ip) { }
> +
> +#endif /* CONFIG_LOCK_STAT */
> +
>  static void
>  __lock_contended(struct lockdep_map *lock, unsigned long ip)
>  {
>  	struct task_struct *curr = current;
>  	struct held_lock *hlock, *prev_hlock;
> -	struct lock_class_stats *stats;
>  	unsigned int depth;
> -	int i, contention_point, contending_point;
> +	int i;
>  
>  	depth = curr->lockdep_depth;
>  	/*
> @@ -3673,20 +3745,8 @@ __lock_contended(struct lockdep_map *loc
>  	if (hlock->instance != lock)
>  		return;
>  
> -	hlock->waittime_stamp = lockstat_clock();
> -
> -	contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
> -	contending_point = lock_point(hlock_class(hlock)->contending_point,
> -				      lock->ip);
> -
> -	stats = get_lock_stats(hlock_class(hlock));
> -	if (contention_point < LOCKSTAT_POINTS)
> -		stats->contention_point[contention_point]++;
> -	if (contending_point < LOCKSTAT_POINTS)
> -		stats->contending_point[contending_point]++;
> -	if (lock->cpu != smp_processor_id())
> -		stats->bounces[bounce_contended + !!hlock->read]++;
> -	put_lock_stats(stats);
> +	hlock->state = hls_contended;
> +	lockstat_contended(hlock, ip);
>  }
>  
>  static void
> @@ -3694,10 +3754,8 @@ __lock_acquired(struct lockdep_map *lock
>  {
>  	struct task_struct *curr = current;
>  	struct held_lock *hlock, *prev_hlock;
> -	struct lock_class_stats *stats;
>  	unsigned int depth;
> -	u64 now, waittime = 0;
> -	int i, cpu;
> +	int i;
>  
>  	depth = curr->lockdep_depth;
>  	/*
> @@ -3726,28 +3784,8 @@ __lock_acquired(struct lockdep_map *lock
>  	if (hlock->instance != lock)
>  		return;
>  
> -	cpu = smp_processor_id();
> -	if (hlock->waittime_stamp) {
> -		now = lockstat_clock();
> -		waittime = now - hlock->waittime_stamp;
> -		hlock->holdtime_stamp = now;
> -	}
> -
> -	trace_lock_acquired(lock, ip);
> -
> -	stats = get_lock_stats(hlock_class(hlock));
> -	if (waittime) {
> -		if (hlock->read)
> -			lock_time_inc(&stats->read_waittime, waittime);
> -		else
> -			lock_time_inc(&stats->write_waittime, waittime);
> -	}
> -	if (lock->cpu != cpu)
> -		stats->bounces[bounce_acquired + !!hlock->read]++;
> -	put_lock_stats(stats);
> -
> -	lock->cpu = cpu;
> -	lock->ip = ip;
> +	hlock->state = hls_acquired;
> +	lockstat_acquired(hlock, ip);
>  }
>  
>  void lock_contended(struct lockdep_map *lock, unsigned long ip)
> @@ -3783,12 +3821,12 @@ void lock_acquired(struct lockdep_map *l
>  	raw_local_irq_save(flags);
>  	check_flags(flags);
>  	current->lockdep_recursion = 1;
> +	trace_lock_acquired(lock, ip);
>  	__lock_acquired(lock, ip);
>  	current->lockdep_recursion = 0;
>  	raw_local_irq_restore(flags);
>  }
>  EXPORT_SYMBOL_GPL(lock_acquired);
> -#endif
>  
>  /*
>   * Used by the testsuite, sanitize the validator state
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/