[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20120608144602.GA17251@fieldses.org>
Date: Fri, 8 Jun 2012 10:46:02 -0400
From: "J. Bruce Fields" <bfields@...ldses.org>
To: Peter Zijlstra <peterz@...radead.org>
Cc: Linus Torvalds <torvalds@...ux-foundation.org>,
Dave Jones <davej@...hat.com>,
Al Viro <viro@...iv.linux.org.uk>,
Linux Kernel <linux-kernel@...r.kernel.org>,
Miklos Szeredi <mszeredi@...e.cz>, Jan Kara <jack@...e.cz>
Subject: Re: processes hung after sys_renameat, and 'missing' processes
On Fri, Jun 08, 2012 at 09:31:38AM +0200, Peter Zijlstra wrote:
> On Thu, 2012-06-07 at 08:30 -0700, Linus Torvalds wrote:
> > On Thu, Jun 7, 2012 at 3:26 AM, Peter Zijlstra <peterz@...radead.org> wrote:
> > > On Wed, 2012-06-06 at 16:31 -0700, Linus Torvalds wrote:
> > >> PeterZ - the fact that we use mutex_lock_nested with
> > >> I_MUTEX_PARENT/I_MUTEX_CHILD/nothing hopefully doesn't screw us up in
> > >> case we get it wrong, right?
> > >
> > > Sadly, if you get that annotation wrong you can annotate an actual
> > > deadlock away.
What's a (contrived as you want) example where that happens?
> > > This the reason you have to be very careful when
> > > annotating stuff.
Or alternatively--what do I need to check before I call
mutex_lock_nested?
--b.
> >
> > Is that true even if you can see an actual deadlock on the lock *instances*?
>
> Yep.. :/
>
> > The point of the locking classes (to me) was always that you could see
> > the deadlock even if it didn't *actually* happen, just *potentially*
> > happened. If the lock classes then mean that that hides an actual
> > deadlock information, that's a big downside.
>
> Right, so in order to report a deadlock on instances we'd have to do a
> full lock graph walk on every contended lock acquisition. This is quite
> expensive.
>
> Although I can look into doing that from say the NMI watchdog and/or
> some sysrq key.
>
> > And yeah, that really means that we absolutely *have* to get the
> > instance data in the lock debug printout, since we can't see deadlocks
> > any other way.
>
> Well, typically deadlocks aren't that hard to find once you trigger
> them. The current case of course being the exception to that rule..
>
> > Can you check the compile error that Dave reported for
> > your patch, because right now Dave is running without it, afaik, due
> > to your lock-instance patch not really working in practice.
>
> Oh, missed that in the thread.. yeah here goes.
>
>
> Now also compile tested with CONFIG_LOCK_STAT...
>
> ---
> include/linux/lockdep.h | 44 ++++++++-------
> include/trace/events/lock.h | 3 -
> kernel/lockdep.c | 128 ++++++++++++++++++++++++++++----------------
> 3 files changed, 107 insertions(+), 68 deletions(-)
>
> --- a/include/linux/lockdep.h
> +++ b/include/linux/lockdep.h
> @@ -211,6 +211,13 @@ struct lock_chain {
> */
> #define MAX_LOCKDEP_KEYS ((1UL << MAX_LOCKDEP_KEYS_BITS) - 1)
>
> +enum held_lock_state {
> + hls_unheld = 0,
> + hls_acquiring,
> + hls_contended,
> + hls_acquired,
> +};
> +
> struct held_lock {
> /*
> * One-way hash of the dependency chain up to this point. We
> @@ -254,7 +261,10 @@ struct held_lock {
> unsigned int read:2; /* see lock_acquire() comment */
> unsigned int check:2; /* see lock_acquire() comment */
> unsigned int hardirqs_off:1;
> - unsigned int references:11; /* 32 bits */
> + unsigned int state:2; /* see held_lock_state */
> + /* 9 bit hole */
> + unsigned int references:16;
> + /* 16 bit hole */
> };
>
> /*
> @@ -363,6 +373,18 @@ extern void lockdep_trace_alloc(gfp_t ma
>
> #define lockdep_recursing(tsk) ((tsk)->lockdep_recursion)
>
> +extern void lock_contended(struct lockdep_map *lock, unsigned long ip);
> +extern void lock_acquired(struct lockdep_map *lock, unsigned long ip);
> +
> +#define LOCK_CONTENDED(_lock, try, lock) \
> +do { \
> + if (!try(_lock)) { \
> + lock_contended(&(_lock)->dep_map, _RET_IP_); \
> + lock(_lock); \
> + } \
> + lock_acquired(&(_lock)->dep_map, _RET_IP_); \
> +} while (0)
> +
> #else /* !LOCKDEP */
>
> static inline void lockdep_off(void)
> @@ -414,31 +436,13 @@ struct lock_class_key { };
>
> #define lockdep_recursing(tsk) (0)
>
> -#endif /* !LOCKDEP */
> -
> -#ifdef CONFIG_LOCK_STAT
> -
> -extern void lock_contended(struct lockdep_map *lock, unsigned long ip);
> -extern void lock_acquired(struct lockdep_map *lock, unsigned long ip);
> -
> -#define LOCK_CONTENDED(_lock, try, lock) \
> -do { \
> - if (!try(_lock)) { \
> - lock_contended(&(_lock)->dep_map, _RET_IP_); \
> - lock(_lock); \
> - } \
> - lock_acquired(&(_lock)->dep_map, _RET_IP_); \
> -} while (0)
> -
> -#else /* CONFIG_LOCK_STAT */
> -
> #define lock_contended(lockdep_map, ip) do {} while (0)
> #define lock_acquired(lockdep_map, ip) do {} while (0)
>
> #define LOCK_CONTENDED(_lock, try, lock) \
> lock(_lock)
>
> -#endif /* CONFIG_LOCK_STAT */
> +#endif /* !LOCKDEP */
>
> #ifdef CONFIG_LOCKDEP
>
> --- a/include/trace/events/lock.h
> +++ b/include/trace/events/lock.h
> @@ -61,8 +61,6 @@ DEFINE_EVENT(lock, lock_release,
> TP_ARGS(lock, ip)
> );
>
> -#ifdef CONFIG_LOCK_STAT
> -
> DEFINE_EVENT(lock, lock_contended,
>
> TP_PROTO(struct lockdep_map *lock, unsigned long ip),
> @@ -78,7 +76,6 @@ DEFINE_EVENT(lock, lock_acquired,
> );
>
> #endif
> -#endif
>
> #endif /* _TRACE_LOCK_H */
>
> --- a/kernel/lockdep.c
> +++ b/kernel/lockdep.c
> @@ -541,10 +541,23 @@ static void print_lockdep_cache(struct l
> printk("%s", name);
> }
>
> +static void print_lock_state(struct held_lock *hlock)
> +{
> + switch (hlock->state) {
> + /* holding an unheld lock is fail! */
> + case hls_unheld: printk("FAIL: "); break;
> +
> + case hls_acquiring: /* fall-through */
> + case hls_contended: printk("blocked: "); break;
> + case hls_acquired: printk("held: "); break;
> + };
> +}
> +
> static void print_lock(struct held_lock *hlock)
> {
> + print_lock_state(hlock);
> print_lock_name(hlock_class(hlock));
> - printk(", at: ");
> + printk(", instance: %p, at: ", hlock->instance);
> print_ip_sym(hlock->acquire_ip);
> }
>
> @@ -556,7 +569,7 @@ static void lockdep_print_held_locks(str
> printk("no locks held by %s/%d.\n", curr->comm, task_pid_nr(curr));
> return;
> }
> - printk("%d lock%s held by %s/%d:\n",
> + printk("%d lock%s on stack by %s/%d:\n",
> depth, depth > 1 ? "s" : "", curr->comm, task_pid_nr(curr));
>
> for (i = 0; i < depth; i++) {
> @@ -3093,6 +3106,7 @@ static int __lock_acquire(struct lockdep
> hlock->check = check;
> hlock->hardirqs_off = !!hardirqs_off;
> hlock->references = references;
> + hlock->state = hls_acquiring;
> #ifdef CONFIG_LOCK_STAT
> hlock->waittime_stamp = 0;
> hlock->holdtime_stamp = lockstat_clock();
> @@ -3607,7 +3621,6 @@ void lockdep_clear_current_reclaim_state
> current->lockdep_reclaim_gfp = 0;
> }
>
> -#ifdef CONFIG_LOCK_STAT
> static int
> print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
> unsigned long ip)
> @@ -3637,14 +3650,73 @@ print_lock_contention_bug(struct task_st
> return 0;
> }
>
> +#ifdef CONFIG_LOCK_STAT
> +
> +static void lockstat_contended(struct held_lock *hlock, unsigned long ip)
> +{
> + struct lockdep_map *lock = hlock->instance;
> + int contention_point, contending_point;
> + struct lock_class_stats *stats;
> +
> + hlock->waittime_stamp = lockstat_clock();
> +
> + contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
> + contending_point = lock_point(hlock_class(hlock)->contending_point,
> + lock->ip);
> +
> + stats = get_lock_stats(hlock_class(hlock));
> + if (contention_point < LOCKSTAT_POINTS)
> + stats->contention_point[contention_point]++;
> + if (contending_point < LOCKSTAT_POINTS)
> + stats->contending_point[contending_point]++;
> + if (lock->cpu != smp_processor_id())
> + stats->bounces[bounce_contended + !!hlock->read]++;
> + put_lock_stats(stats);
> +}
> +
> +static void lockstat_acquired(struct held_lock *hlock, unsigned long ip)
> +{
> + struct lockdep_map *lock = hlock->instance;
> + struct lock_class_stats *stats;
> + u64 now, waittime = 0;
> + int cpu;
> +
> + cpu = smp_processor_id();
> + if (hlock->waittime_stamp) {
> + now = lockstat_clock();
> + waittime = now - hlock->waittime_stamp;
> + hlock->holdtime_stamp = now;
> + }
> +
> + stats = get_lock_stats(hlock_class(hlock));
> + if (waittime) {
> + if (hlock->read)
> + lock_time_inc(&stats->read_waittime, waittime);
> + else
> + lock_time_inc(&stats->write_waittime, waittime);
> + }
> + if (lock->cpu != cpu)
> + stats->bounces[bounce_acquired + !!hlock->read]++;
> + put_lock_stats(stats);
> +
> + lock->cpu = cpu;
> + lock->ip = ip;
> +}
> +
> +#else /* CONFIG_LOCK_STAT */
> +
> +static inline void lockstat_contended(struct held_lock *hlock, unsigned long ip) { }
> +static inline void lockstat_acquired(struct held_lock *hlock, unsigned long ip) { }
> +
> +#endif /* CONFIG_LOCK_STAT */
> +
> static void
> __lock_contended(struct lockdep_map *lock, unsigned long ip)
> {
> struct task_struct *curr = current;
> struct held_lock *hlock, *prev_hlock;
> - struct lock_class_stats *stats;
> unsigned int depth;
> - int i, contention_point, contending_point;
> + int i;
>
> depth = curr->lockdep_depth;
> /*
> @@ -3673,20 +3745,8 @@ __lock_contended(struct lockdep_map *loc
> if (hlock->instance != lock)
> return;
>
> - hlock->waittime_stamp = lockstat_clock();
> -
> - contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
> - contending_point = lock_point(hlock_class(hlock)->contending_point,
> - lock->ip);
> -
> - stats = get_lock_stats(hlock_class(hlock));
> - if (contention_point < LOCKSTAT_POINTS)
> - stats->contention_point[contention_point]++;
> - if (contending_point < LOCKSTAT_POINTS)
> - stats->contending_point[contending_point]++;
> - if (lock->cpu != smp_processor_id())
> - stats->bounces[bounce_contended + !!hlock->read]++;
> - put_lock_stats(stats);
> + hlock->state = hls_contended;
> + lockstat_contended(hlock, ip);
> }
>
> static void
> @@ -3694,10 +3754,8 @@ __lock_acquired(struct lockdep_map *lock
> {
> struct task_struct *curr = current;
> struct held_lock *hlock, *prev_hlock;
> - struct lock_class_stats *stats;
> unsigned int depth;
> - u64 now, waittime = 0;
> - int i, cpu;
> + int i;
>
> depth = curr->lockdep_depth;
> /*
> @@ -3726,28 +3784,8 @@ __lock_acquired(struct lockdep_map *lock
> if (hlock->instance != lock)
> return;
>
> - cpu = smp_processor_id();
> - if (hlock->waittime_stamp) {
> - now = lockstat_clock();
> - waittime = now - hlock->waittime_stamp;
> - hlock->holdtime_stamp = now;
> - }
> -
> - trace_lock_acquired(lock, ip);
> -
> - stats = get_lock_stats(hlock_class(hlock));
> - if (waittime) {
> - if (hlock->read)
> - lock_time_inc(&stats->read_waittime, waittime);
> - else
> - lock_time_inc(&stats->write_waittime, waittime);
> - }
> - if (lock->cpu != cpu)
> - stats->bounces[bounce_acquired + !!hlock->read]++;
> - put_lock_stats(stats);
> -
> - lock->cpu = cpu;
> - lock->ip = ip;
> + hlock->state = hls_acquired;
> + lockstat_acquired(hlock, ip);
> }
>
> void lock_contended(struct lockdep_map *lock, unsigned long ip)
> @@ -3783,12 +3821,12 @@ void lock_acquired(struct lockdep_map *l
> raw_local_irq_save(flags);
> check_flags(flags);
> current->lockdep_recursion = 1;
> + trace_lock_acquired(lock, ip);
> __lock_acquired(lock, ip);
> current->lockdep_recursion = 0;
> raw_local_irq_restore(flags);
> }
> EXPORT_SYMBOL_GPL(lock_acquired);
> -#endif
>
> /*
> * Used by the testsuite, sanitize the validator state
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists