The new completion/crossrelease annotations interact unfavourable with the extant flush_work()/flush_workqueue() annotations. The problem is that when a single work class does: wait_for_completion(&C) and complete(&C) in different executions, we'll build dependencies like: lock_map_acquire(W) complete_acquire(C) and lock_map_acquire(W) complete_release(C) which results in the dependency chain: W->C->W, which lockdep thinks spells deadlock, even though there is no deadlock potential since works are ran concurrently. One possibility would be to change the work 'lock' to recursive-read, but that would mean hitting a lockdep limitation on recursive locks. Also, unconditinoally switching to recursive-read here would fail to detect the actual deadlock on single-threaded workqueues, which do have a problem with this. For now, forcefully disregard these locks for crossrelease. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/irqflags.h | 4 +-- include/linux/lockdep.h | 8 +++--- kernel/locking/lockdep.c | 60 +++++++++++++++++++++++++++++------------------ kernel/workqueue.c | 23 +++++++++++++++++- 4 files changed, 66 insertions(+), 29 deletions(-) --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -26,7 +26,7 @@ # define trace_hardirq_enter() \ do { \ current->hardirq_context++; \ - crossrelease_hist_start(XHLOCK_HARD); \ + crossrelease_hist_start(XHLOCK_HARD, 0);\ } while (0) # define trace_hardirq_exit() \ do { \ @@ -36,7 +36,7 @@ do { \ # define lockdep_softirq_enter() \ do { \ current->softirq_context++; \ - crossrelease_hist_start(XHLOCK_SOFT); \ + crossrelease_hist_start(XHLOCK_SOFT, 0);\ } while (0) # define lockdep_softirq_exit() \ do { \ --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -578,11 +578,11 @@ extern void lock_commit_crosslock(struct #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ { .name = (_name), .key = (void *)(_key), .cross = 0, } -extern void crossrelease_hist_start(enum xhlock_context_t c); +extern void crossrelease_hist_start(enum xhlock_context_t c, bool force); extern void crossrelease_hist_end(enum xhlock_context_t c); extern void lockdep_init_task(struct task_struct *task); extern void lockdep_free_task(struct task_struct *task); -#else +#else /* !CROSSRELEASE */ #define lockdep_init_map_crosslock(m, n, k, s) do {} while (0) /* * To initialize a lockdep_map statically use this macro. @@ -591,11 +591,11 @@ extern void lockdep_free_task(struct tas #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ { .name = (_name), .key = (void *)(_key), } -static inline void crossrelease_hist_start(enum xhlock_context_t c) {} +static inline void crossrelease_hist_start(enum xhlock_context_t c, bool force) {} static inline void crossrelease_hist_end(enum xhlock_context_t c) {} static inline void lockdep_init_task(struct task_struct *task) {} static inline void lockdep_free_task(struct task_struct *task) {} -#endif +#endif /* CROSSRELEASE */ #ifdef CONFIG_LOCK_STAT --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4629,7 +4629,7 @@ asmlinkage __visible void lockdep_sys_ex * the index to point to the last entry, which is already invalid. */ crossrelease_hist_end(XHLOCK_PROC); - crossrelease_hist_start(XHLOCK_PROC); + crossrelease_hist_start(XHLOCK_PROC, false); } void lockdep_rcu_suspicious(const char *file, const int line, const char *s) @@ -4725,25 +4725,25 @@ static inline void invalidate_xhlock(str /* * Lock history stacks; we have 3 nested lock history stacks: * - * Hard IRQ - * Soft IRQ - * History / Task - * - * The thing is that once we complete a (Hard/Soft) IRQ the future task locks - * should not depend on any of the locks observed while running the IRQ. - * - * So what we do is rewind the history buffer and erase all our knowledge of - * that temporal event. - */ - -/* - * We need this to annotate lock history boundaries. Take for instance - * workqueues; each work is independent of the last. The completion of a future - * work does not depend on the completion of a past work (in general). - * Therefore we must not carry that (lock) dependency across works. + * HARD(IRQ) + * SOFT(IRQ) + * PROC(ess) + * + * The thing is that once we complete a HARD/SOFT IRQ the future task locks + * should not depend on any of the locks observed while running the IRQ. So + * what we do is rewind the history buffer and erase all our knowledge of that + * temporal event. + * + * The PROCess one is special though; it is used to annotate independence + * inside a task. + * + * Take for instance workqueues; each work is independent of the last. The + * completion of a future work does not depend on the completion of a past work + * (in general). Therefore we must not carry that (lock) dependency across + * works. * * This is true for many things; pretty much all kthreads fall into this - * pattern, where they have an 'idle' state and future completions do not + * pattern, where they have an invariant state and future completions do not * depend on past completions. Its just that since they all have the 'same' * form -- the kthread does the same over and over -- it doesn't typically * matter. @@ -4751,15 +4751,31 @@ static inline void invalidate_xhlock(str * The same is true for system-calls, once a system call is completed (we've * returned to userspace) the next system call does not depend on the lock * history of the previous system call. + * + * They key property for independence, this invariant state, is that it must be + * a point where we hold no locks and have no history. Because if we were to + * hold locks, the restore at _end() would not necessarily recover it's history + * entry. Similarly, independence per-definition means it does not depend on + * prior state. */ -void crossrelease_hist_start(enum xhlock_context_t c) +void crossrelease_hist_start(enum xhlock_context_t c, bool force) { struct task_struct *cur = current; - if (cur->xhlocks) { - cur->xhlock_idx_hist[c] = cur->xhlock_idx; - cur->hist_id_save[c] = cur->hist_id; + if (!cur->xhlocks) + return; + + /* + * We call this at an invariant point, no current state, no history. + */ + if (c == XHLOCK_PROC) { + /* verified the former, ensure the latter */ + WARN_ON_ONCE(!force && cur->lockdep_depth); + invalidate_xhlock(&xhlock(cur->xhlock_idx)); } + + cur->xhlock_idx_hist[c] = cur->xhlock_idx; + cur->hist_id_save[c] = cur->hist_id; } void crossrelease_hist_end(enum xhlock_context_t c) --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2093,7 +2093,28 @@ __acquires(&pool->lock) lock_map_acquire(&pwq->wq->lockdep_map); lock_map_acquire(&lockdep_map); - crossrelease_hist_start(XHLOCK_PROC); + /* + * Strictly speaking we should do start(PROC) without holding any + * locks, that is, before these two lock_map_acquire()'s. + * + * However, that would result in: + * + * A(W1) + * WFC(C) + * A(W1) + * C(C) + * + * Which would create W1->C->W1 dependencies, even though there is no + * actual deadlock possible. There are two solutions, using a + * read-recursive acquire on the work(queue) 'locks', but this will then + * hit the lockdep limitation on recursive locks, or simly discard + * these locks. + * + * AFAICT there is no possible deadlock scenario between the + * flush_work() and complete() primitives (except for single-threaded + * workqueues), so hiding them isn't a problem. + */ + crossrelease_hist_start(XHLOCK_PROC, true); trace_workqueue_execute_start(work); worker->current_func(work); /*