linux-kernel - [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1455539803-13913-1-git-send-email-joonas.lahtinen@linux.intel.com>
Date:	Mon, 15 Feb 2016 14:36:43 +0200
From:	Joonas Lahtinen <joonas.lahtinen@...ux.intel.com>
To:	Intel graphics driver community testing & development 
	<intel-gfx@...ts.freedesktop.org>
Cc:	Joonas Lahtinen <joonas.lahtinen@...ux.intel.com>,
	Linux kernel development <linux-kernel@...r.kernel.org>,
	Ingo Molnar <mingo@...nel.org>,
	Peter Zijlstra <peterz@...radead.org>,
	David Hildenbrand <dahi@...ux.vnet.ibm.com>,
	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>,
	"Gautham R. Shenoy" <ego@...ux.vnet.ibm.com>,
	Chris Wilson <chris@...is-wilson.co.uk>
Subject: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting

Instead of implementing a custom locked reference counting, use lockref.

Current implementation leads to a deadlock splat on Intel SKL platforms
when lockdep debugging is enabled.

This is due to few of CPUfreq drivers (including Intel P-state) having this;
policy->rwsem is locked during driver initialization and the functions called
during init that actually apply CPU limits use get_online_cpus (because they
have other calling paths too), which will briefly lock cpu_hotplug.lock to
increase cpu_hotplug.refcount.

On later calling path, when doing a suspend, when cpu_hotplug_begin() is called
in disable_nonboot_cpus(), callbacks to CPUfreq functions get called after,
which will lock policy->rwsem and cpu_hotplug.lock is already held by
cpu_hotplug_begin() and we do have a potential deadlock scenario reported by
our CI system (though it is a very unlikely one). See the Bugzilla link for more
details.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93294
Cc: Linux kernel development <linux-kernel@...r.kernel.org>
Cc: Ingo Molnar <mingo@...nel.org>
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: David Hildenbrand <dahi@...ux.vnet.ibm.com>
Cc: Paul E. McKenney <paulmck@...ux.vnet.ibm.com>
Cc: Gautham R. Shenoy <ego@...ux.vnet.ibm.com>
Cc: Chris Wilson <chris@...is-wilson.co.uk>
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@...ux.intel.com>
---
 kernel/cpu.c | 87 +++++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 54 insertions(+), 33 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 5b9d396..8acec83 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -17,6 +17,7 @@
 #include <linux/kthread.h>
 #include <linux/stop_machine.h>
 #include <linux/mutex.h>
+#include <linux/lockref.h>
 #include <linux/gfp.h>
 #include <linux/suspend.h>
 #include <linux/lockdep.h>
@@ -62,13 +63,10 @@ static struct {
 	struct task_struct *active_writer;
 	/* wait queue to wake up the active_writer */
 	wait_queue_head_t wq;
-	/* verifies that no writer will get active while readers are active */
-	struct mutex lock;
-	/*
-	 * Also blocks the new readers during
-	 * an ongoing cpu hotplug operation.
-	 */
-	atomic_t refcount;
+	/* wait queue to wake up readers */
+	wait_queue_head_t reader_wq;
+	/* track online CPU users */
+	struct lockref lockref;
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
@@ -76,7 +74,7 @@ static struct {
 } cpu_hotplug = {
 	.active_writer = NULL,
 	.wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
-	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
+	.reader_wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.reader_wq),
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	.dep_map = {.name = "cpu_hotplug.lock" },
 #endif
@@ -92,52 +90,64 @@ static struct {
 
 void get_online_cpus(void)
 {
-	might_sleep();
+	DEFINE_WAIT(wait);
+
 	if (cpu_hotplug.active_writer == current)
 		return;
+
 	cpuhp_lock_acquire_read();
-	mutex_lock(&cpu_hotplug.lock);
-	atomic_inc(&cpu_hotplug.refcount);
-	mutex_unlock(&cpu_hotplug.lock);
+
+	/* First to get might have to wait over a hotplug operation. */
+	while (!lockref_get_or_lock(&cpu_hotplug.lockref)) {
+		if (cpu_hotplug.lockref.count == 0) {
+			cpu_hotplug.lockref.count++;
+			spin_unlock(&cpu_hotplug.lockref.lock);
+			break;
+		}
+		spin_unlock(&cpu_hotplug.lockref.lock);
+
+		prepare_to_wait(&cpu_hotplug.reader_wq, &wait, TASK_UNINTERRUPTIBLE);
+		schedule();
+		finish_wait(&cpu_hotplug.reader_wq, &wait);
+	}
 }
 EXPORT_SYMBOL_GPL(get_online_cpus);
 
 void put_online_cpus(void)
 {
-	int refcount;
-
 	if (cpu_hotplug.active_writer == current)
 		return;
 
-	refcount = atomic_dec_return(&cpu_hotplug.refcount);
-	if (WARN_ON(refcount < 0)) /* try to fix things up */
-		atomic_inc(&cpu_hotplug.refcount);
+	/* Last to release might have to wake queued hotplug operation. */
+	if (!lockref_put_or_lock(&cpu_hotplug.lockref)) {
+		WARN_ON(cpu_hotplug.lockref.count <= 0);
+		cpu_hotplug.lockref.count = 0;
+		spin_unlock(&cpu_hotplug.lockref.lock);
 
-	if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
-		wake_up(&cpu_hotplug.wq);
+		if (waitqueue_active(&cpu_hotplug.wq))
+			wake_up(&cpu_hotplug.wq);
+	}
 
 	cpuhp_lock_release();
-
 }
 EXPORT_SYMBOL_GPL(put_online_cpus);
 
 /*
  * This ensures that the hotplug operation can begin only when the
- * refcount goes to zero.
+ * cpu_hotplug.lockref goes to zero.
  *
  * Note that during a cpu-hotplug operation, the new readers, if any,
- * will be blocked by the cpu_hotplug.lock
+ * will be blocked by the cpu_hotplug.lockref being dead.
  *
  * Since cpu_hotplug_begin() is always called after invoking
  * cpu_maps_update_begin(), we can be sure that only one writer is active.
  *
  * Note that theoretically, there is a possibility of a livelock:
- * - Refcount goes to zero, last reader wakes up the sleeping
+ * - Lockref goes to zero, last reader wakes up the sleeping
  *   writer.
- * - Last reader unlocks the cpu_hotplug.lock.
- * - A new reader arrives at this moment, bumps up the refcount.
- * - The writer acquires the cpu_hotplug.lock finds the refcount
- *   non zero and goes to sleep again.
+ * - A new reader arrives at this moment, bumps up the lockref.
+ * - The woken up writer finds the lockref non-zero and goes
+ *   to sleep again.
  *
  * However, this is very difficult to achieve in practice since
  * get_online_cpus() not an api which is called all that often.
@@ -151,20 +161,31 @@ void cpu_hotplug_begin(void)
 	cpuhp_lock_acquire();
 
 	for (;;) {
-		mutex_lock(&cpu_hotplug.lock);
+		spin_lock(&cpu_hotplug.lockref.lock);
+		if (cpu_hotplug.lockref.count <= 0)
+			break;
+		spin_unlock(&cpu_hotplug.lockref.lock);
+
 		prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
-		if (likely(!atomic_read(&cpu_hotplug.refcount)))
-				break;
-		mutex_unlock(&cpu_hotplug.lock);
 		schedule();
+		finish_wait(&cpu_hotplug.wq, &wait);
 	}
-	finish_wait(&cpu_hotplug.wq, &wait);
+
+	WARN_ON(__lockref_is_dead(&cpu_hotplug.lockref));
+	lockref_mark_dead(&cpu_hotplug.lockref);
+	spin_unlock(&cpu_hotplug.lockref.lock);
 }
 
 void cpu_hotplug_done(void)
 {
+	WARN_ON(!__lockref_is_dead(&cpu_hotplug.lockref));
+	cpu_hotplug.lockref.count = 0;
+	spin_unlock(&cpu_hotplug.lockref.lock);
+
+	if (waitqueue_active(&cpu_hotplug.reader_wq))
+		wake_up(&cpu_hotplug.reader_wq);
+
 	cpu_hotplug.active_writer = NULL;
-	mutex_unlock(&cpu_hotplug.lock);
 	cpuhp_lock_release();
 }
 
-- 
2.5.0