linux-kernel - Re: hotplug lockdep splat (tip)

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20170904153725.bxmsqb2fcvda5a2k@hirez.programming.kicks-ass.net>
Date:   Mon, 4 Sep 2017 17:37:25 +0200
From:   Peter Zijlstra <peterz@...radead.org>
To:     Mike Galbraith <efault@....de>
Cc:     LKML <linux-kernel@...r.kernel.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        Byungchul Park <max.byungchul.park@...il.com>
Subject: Re: hotplug lockdep splat (tip)

On Mon, Sep 04, 2017 at 03:27:07PM +0200, Mike Galbraith wrote:
> Well, flavor of gripe changed.

> 
> [  164.114290] ======================================================
> [  164.115146] WARNING: possible circular locking dependency detected
> [  164.115751] 4.13.0.g90abd70-tip-lockdep #4 Tainted: G            E  
> [  164.116348] ------------------------------------------------------
> [  164.116919] cpuhp/0/12 is trying to acquire lock:
> [  164.117381]  (cpuhp_state){+.+.}, at: [<ffffffff8108e4aa>] cpuhp_thread_fun+0x2a/0x160
> [  164.118097] 
>                but now in release context of a crosslock acquired at the following:
> [  164.118845]  ((complete)&per_cpu_ptr(&cpuhp_state, i)->done#2){+.+.}, at: [<ffffffff8108e71f>] cpuhp_issue_call+0x13f/0x170
> [  164.119789] 
>                which lock already depends on the new lock.
> 
> [  164.120483] 
>                the existing dependency chain (in reverse order) is:
> [  164.121121] 
>                -> #2 ((complete)&per_cpu_ptr(&cpuhp_state, i)->done#2){+.+.}:

> [  164.122741]        wait_for_completion+0x53/0x1a0
> [  164.123181]        takedown_cpu+0x8a/0xf0

So this is:

	takedown_cpu()
	  irq_lock_sparse
	  wait_for_completion(st->done);


>                -> #1 (sparse_irq_lock){+.+.}:

> [  164.131664]        irq_lock_sparse+0x17/0x20
> [  164.132039]        irq_affinity_online_cpu+0x18/0xd0
> [  164.132463]        cpuhp_invoke_callback+0x1f6/0x830
> [  164.132928]        cpuhp_up_callbacks+0x37/0xb0
> [  164.133487]        cpuhp_thread_fun+0x14f/0x160

This is:

		cpuhp_state
		  irq_lock_sparse


> [  164.148780]  complete+0x29/0x60
> [  164.149064]  cpuhp_thread_fun+0xa1/0x160

And this is I think:

  cpuhp_thread_fun()
    cpuhq_state
    complete(st->done)


Which then spells deadlock like:

  CPU0			CPU1			CPU2

  cpuhp_state
			irq_lock_sparse()
						cpuhp_state
			wait_for_completion()
    irq_lock_sparse()
						complete()

or something, because CPU0 needs to wait for CPU1's irq_lock_sparse
which will wait for CPU2's completion, which in turn waits for CPU0's
cpuhp_state.


Now, this again mixes up and down chains, but now on cpuhp_state.


But I cannot reproduce this.. I've let:

  while :; do
    echo 0 > /sys/devices/system/cpu/cpu1/online ;
    echo 1 > /sys/devices/system/cpu/cpu1/online ;
  done

run for a while, but nothing... :/

Doth teh beloweth make nice?


---
 kernel/cpu.c | 47 +++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 8 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index acf5308fad51..2ab324d7ff7b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -67,11 +67,15 @@ struct cpuhp_cpu_state {
 static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);
 
 #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
-static struct lock_class_key cpuhp_state_key;
+static struct lock_class_key cpuhp_state_up_key;
+#ifdef CONFIG_HOTPLUG_CPU
+static struct lock_class_key cpuhp_state_down_key;
+#endif
 static struct lockdep_map cpuhp_state_lock_map =
-	STATIC_LOCKDEP_MAP_INIT("cpuhp_state", &cpuhp_state_key);
+	STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_key);
 #endif
 
+
 /**
  * cpuhp_step - Hotplug state machine step
  * @name:	Name of the step
@@ -533,6 +537,28 @@ void __init cpuhp_threads_init(void)
 	kthread_unpark(this_cpu_read(cpuhp_state.thread));
 }
 
+/*
+ * _cpu_down() and _cpu_up() have different lock ordering wrt st->done, but
+ * because these two functions are globally serialized and st->done is private
+ * to them, we can simply re-init st->done for each of them to separate the
+ * lock chains.
+ *
+ * Must be macro to ensure we have two different call sites.
+ */
+#ifdef CONFIG_LOCKDEP
+#define lockdep_reinit_st_done()				\
+do {								\
+	int __cpu;						\
+	for_each_possible_cpu(__cpu) {				\
+		struct cpuhp_cpu_state *st =			\
+			per_cpu_ptr(&cpuhp_state, __cpu);	\
+		init_completion(&st->done);			\
+	}							\
+} while(0)
+#else
+#define lockdep_reinit_st_done()
+#endif
+
 #ifdef CONFIG_HOTPLUG_CPU
 /**
  * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
@@ -676,12 +702,6 @@ void cpuhp_report_idle_dead(void)
 				 cpuhp_complete_idle_dead, st, 0);
 }
 
-#else
-#define takedown_cpu		NULL
-#endif
-
-#ifdef CONFIG_HOTPLUG_CPU
-
 /* Requires cpu_add_remove_lock to be held */
 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
 			   enum cpuhp_state target)
@@ -697,6 +717,10 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
 
 	cpus_write_lock();
 
+	lockdep_reinit_st_done();
+	lockdep_init_map(&cpuhp_state_lock_map, "cpuhp_state-down",
+			 &cpuhp_state_down_key, 0);
+
 	cpuhp_tasks_frozen = tasks_frozen;
 
 	prev_state = st->state;
@@ -759,6 +783,9 @@ int cpu_down(unsigned int cpu)
 	return do_cpu_down(cpu, CPUHP_OFFLINE);
 }
 EXPORT_SYMBOL(cpu_down);
+
+#else
+#define takedown_cpu		NULL
 #endif /*CONFIG_HOTPLUG_CPU*/
 
 /**
@@ -806,6 +833,10 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
 
 	cpus_write_lock();
 
+	lockdep_reinit_st_done();
+	lockdep_init_map(&cpuhp_state_lock_map, "cpuhp_state-up",
+			 &cpuhp_state_up_key, 0);
+
 	if (!cpu_present(cpu)) {
 		ret = -EINVAL;
 		goto out;