lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20161217161911.4753-1-sergey.senozhatsky@gmail.com>
Date:   Sun, 18 Dec 2016 01:19:11 +0900
From:   Sergey Senozhatsky <sergey.senozhatsky@...il.com>
To:     Peter Zijlstra <peterz@...radead.org>
Cc:     Ingo Molnar <mingo@...hat.com>,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        linux-kernel@...r.kernel.org,
        Sergey Senozhatsky <sergey.senozhatsky@...il.com>,
        Sergey Senozhatsky <sergey.senozhatsky.work@...il.com>
Subject: [RFC][PATCH] spinlock_debug: report spinlock lockup from unlock

There is a race window between the point when __spin_lock_debug()
detects spinlock lockup and the time when CPU that caused the
lockup receives its backtrace interrupt.

Before __spin_lock_debug() triggers all_cpu_backtrace() it calls
spin_dump() to printk() the current state of the lock and CPU
backtrace. These printk() calls can take some time to print the
messages to serial console, for instance (we are not talking
about console_unlock() loop and a flood of messages from other
CPUs, but just spin_dump() printk() and serial console).

All those preparation steps can give CPU that caused the lockup
enough time to run away, so when it receives a backtrace interrupt
it can look completely innocent.

The patch extends `struct raw_spinlock' with additional variable
that stores jiffies of successful do_raw_spin_lock() and checks
in debug_spin_unlock() whether the spin_lock has been locked for
too long. So we will have a reliable backtrace from CPU that
locked up and a reliable backtrace from CPU that caused the
lockup.

Missed spin_lock unlock deadline report (example):

BUG: spinlock missed unlock deadline on CPU#0, bash/327
 lock: lock.25562+0x0/0x60, .magic: dead4ead, .owner: bash/327, .owner_cpu: 0
CPU: 0 PID: 327 Comm: bash
Call Trace:
 dump_stack+0x4f/0x65
 spin_dump+0x8a/0x8f
 spin_bug+0x2b/0x2d
 do_raw_spin_unlock+0x92/0xa3
 _raw_spin_unlock+0x27/0x44
 ...

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@...il.com>
---
 include/linux/spinlock_types.h  | 4 +++-
 kernel/locking/spinlock_debug.c | 5 +++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index 73548eb13a5d..8972e56eeefb 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -25,6 +25,7 @@ typedef struct raw_spinlock {
 #ifdef CONFIG_DEBUG_SPINLOCK
 	unsigned int magic, owner_cpu;
 	void *owner;
+	unsigned long acquire_tstamp;
 #endif
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
@@ -45,7 +46,8 @@ typedef struct raw_spinlock {
 # define SPIN_DEBUG_INIT(lockname)		\
 	.magic = SPINLOCK_MAGIC,		\
 	.owner_cpu = -1,			\
-	.owner = SPINLOCK_OWNER_INIT,
+	.owner = SPINLOCK_OWNER_INIT,		\
+	.acquire_tstamp = 0,
 #else
 # define SPIN_DEBUG_INIT(lockname)
 #endif
diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c
index 0374a596cffa..daeab4bc86ff 100644
--- a/kernel/locking/spinlock_debug.c
+++ b/kernel/locking/spinlock_debug.c
@@ -12,6 +12,7 @@
 #include <linux/debug_locks.h>
 #include <linux/delay.h>
 #include <linux/export.h>
+#include <linux/jiffies.h>
 
 void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
 			  struct lock_class_key *key)
@@ -27,6 +28,7 @@ void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
 	lock->magic = SPINLOCK_MAGIC;
 	lock->owner = SPINLOCK_OWNER_INIT;
 	lock->owner_cpu = -1;
+	lock->acquire_tstamp = 0;
 }
 
 EXPORT_SYMBOL(__raw_spin_lock_init);
@@ -90,6 +92,7 @@ static inline void debug_spin_lock_after(raw_spinlock_t *lock)
 {
 	lock->owner_cpu = raw_smp_processor_id();
 	lock->owner = current;
+	lock->acquire_tstamp = jiffies;
 }
 
 static inline void debug_spin_unlock(raw_spinlock_t *lock)
@@ -99,6 +102,8 @@ static inline void debug_spin_unlock(raw_spinlock_t *lock)
 	SPIN_BUG_ON(lock->owner != current, lock, "wrong owner");
 	SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
 							lock, "wrong CPU");
+	SPIN_BUG_ON(time_after_eq(jiffies, lock->acquire_tstamp + HZ),
+			lock, "missed unlock deadline");
 	lock->owner = SPINLOCK_OWNER_INIT;
 	lock->owner_cpu = -1;
 }
-- 
2.11.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ