lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1208176947.7427.68.camel@twins>
Date:	Mon, 14 Apr 2008 14:42:27 +0200
From:	Peter Zijlstra <peterz@...radead.org>
To:	ego@...ibm.com
Cc:	Miles Lane <miles.lane@...il.com>,
	LKML <linux-kernel@...r.kernel.org>,
	"Rafael J. Wysocki" <rjw@...k.pl>, Ingo Molnar <mingo@...e.hu>
Subject: Re: 2.6.25-rc9 -- INFO: possible circular locking dependency
	detected

On Mon, 2008-04-14 at 17:57 +0530, Gautham R Shenoy wrote:

> > Ok, so cpu_hotplug has a few issues imho:
> > 
> >  - access to active_writer isn't serialized and thus racey
> >  - holding the lock over the 'write' section generates the stuff above
> > 
> > So basically we want a reader/writer lock, where get/put_online_cpu is
> > the read side and cpu_hotplug_begin/done the write side.
> > 
> > We want:
> >  - readers to recurse in readers (code excluding cpu-hotplug can
> >    call code needing the same).
> > 
> >  - readers to recurse in the writer (the code changing the state can
> >    call code needing a stable state)
> > 
> > rwlock_t isn't quite suitable because it doesn't allow reader in writer
> > recursion and doesn't allow sleeping.
> > 
> > No lockdep annotation _yet_, because current lockdep recursive reader
> > support is:
> >  - broken (have a patch for that)
> >  - doesn't support reader in writer recursion (will have to do something
> >    about that)
> > 
> > Ok, so aside from the obviuos disclaimers, I've only compiled this and
> > might have made things way too complicated - but a slightly feverish
> > brain does that at times. What do people think?
> 
> You beat me to it! 
> 
> I just whipped up a similar patch, with slight differences, and lockdep
> annotations :)

lockdep doesn't quite acecpt reader in writer recursion without a little
patch like so:

(fold of two patches - one fixing the recursion another adding
reader-writer recursion)

cpu_hotplug should use 3.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@...llo.nl>
---
Index: linux-2.6-2/kernel/lockdep.c
===================================================================
--- linux-2.6-2.orig/kernel/lockdep.c
+++ linux-2.6-2/kernel/lockdep.c
@@ -1281,6 +1281,13 @@ check_deadlock(struct task_struct *curr,
 		 */
 		if ((read == 2) && prev->read)
 			return 2;
+		/*
+		 * Allow read-after-write recursion of the same
+		 * lock class (i.e. write_lock(lock)+read_lock(lock)):
+		 */
+		if (read == 3)
+			return 2;
+
 		return print_deadlock_bug(curr, prev, next);
 	}
 	return 1;
@@ -1557,12 +1564,11 @@ static int validate_chain(struct task_st
 		if (!ret)
 			return 0;
 		/*
-		 * Mark recursive read, as we jump over it when
-		 * building dependencies (just like we jump over
-		 * trylock entries):
+		 * If we are the first recursive read, don't jump over our
+		 * dependency.
 		 */
-		if (ret == 2)
-			hlock->read = 2;
+		if (hlock->read >= 2 && ret != 2)
+			hlock->read = 1;
 		/*
 		 * Add dependency only if this lock is not the head
 		 * of the chain, and if it's not a secondary read-lock:
Index: linux-2.6-2/lib/locking-selftest.c
===================================================================
--- linux-2.6-2.orig/lib/locking-selftest.c
+++ linux-2.6-2/lib/locking-selftest.c
@@ -1135,12 +1135,12 @@ void locking_selftest(void)
 	debug_locks_silent = !debug_locks_verbose;
 
 	DO_TESTCASE_6R("A-A deadlock", AA);
-	DO_TESTCASE_6R("A-B-B-A deadlock", ABBA);
-	DO_TESTCASE_6R("A-B-B-C-C-A deadlock", ABBCCA);
-	DO_TESTCASE_6R("A-B-C-A-B-C deadlock", ABCABC);
-	DO_TESTCASE_6R("A-B-B-C-C-D-D-A deadlock", ABBCCDDA);
-	DO_TESTCASE_6R("A-B-C-D-B-D-D-A deadlock", ABCDBDDA);
-	DO_TESTCASE_6R("A-B-C-D-B-C-D-A deadlock", ABCDBCDA);
+	DO_TESTCASE_6("A-B-B-A deadlock", ABBA);
+	DO_TESTCASE_6("A-B-B-C-C-A deadlock", ABBCCA);
+	DO_TESTCASE_6("A-B-C-A-B-C deadlock", ABCABC);
+	DO_TESTCASE_6("A-B-B-C-C-D-D-A deadlock", ABBCCDDA);
+	DO_TESTCASE_6("A-B-C-D-B-D-D-A deadlock", ABCDBDDA);
+	DO_TESTCASE_6("A-B-C-D-B-C-D-A deadlock", ABCDBCDA);
 	DO_TESTCASE_6("double unlock", double_unlock);
 	DO_TESTCASE_6("initialize held", init_held);
 	DO_TESTCASE_6_SUCCESS("bad unlock order", bad_unlock_order);
Index: linux-2.6-2/include/linux/lockdep.h
===================================================================
--- linux-2.6-2.orig/include/linux/lockdep.h
+++ linux-2.6-2/include/linux/lockdep.h
@@ -291,6 +291,7 @@ extern void lockdep_init_map(struct lock
  *   0: exclusive (write) acquire
  *   1: read-acquire (no recursion allowed)
  *   2: read-acquire with same-instance recursion allowed
+ *   3: 2 + reader in writer recursion
  *
  * Values for check:
  *

> comments below
> 
> > 
> > Signed-off-by: Peter Zijlstra <a.p.zijlstra@...llo.nl>
> > ---
> > diff --git a/kernel/cpu.c b/kernel/cpu.c
> > index 2011ad8..119d837 100644
> > --- a/kernel/cpu.c
> > +++ b/kernel/cpu.c
> > @@ -27,12 +27,13 @@ static int cpu_hotplug_disabled;
> > 
> >  static struct {
> >  	struct task_struct *active_writer;
> > -	struct mutex lock; /* Synchronizes accesses to refcount, */
> > +	spinlock_t lock; /* Synchronizes accesses to refcount, */
> >  	/*
> >  	 * Also blocks the new readers during
> >  	 * an ongoing cpu hotplug operation.
> >  	 */
> >  	int refcount;
> > +	wait_queue_head_t reader_queue;
> >  	wait_queue_head_t writer_queue;
> >  } cpu_hotplug;
> > 
> > @@ -41,8 +42,9 @@ static struct {
> >  void __init cpu_hotplug_init(void)
> >  {
> >  	cpu_hotplug.active_writer = NULL;
> > -	mutex_init(&cpu_hotplug.lock);
> > +	spin_lock_init(&cpu_hotplug.lock);
> >  	cpu_hotplug.refcount = 0;
> > +	init_waitqueue_head(&cpu_hotplug.reader_queue);
> >  	init_waitqueue_head(&cpu_hotplug.writer_queue);
> >  }
> > 
> > @@ -51,27 +53,42 @@ void __init cpu_hotplug_init(void)
> >  void get_online_cpus(void)
> >  {
> >  	might_sleep();
> > +
> > +	spin_lock(&cpu_hotplug.lock);
> >  	if (cpu_hotplug.active_writer == current)
> > -		return;
> We don't need to perform this check holding the spinlock.
> The reason being, this check should succeed only for get_online_cpus()
> invoked from the CPU-hotplug callback path. and by that time,
> the writer thread would have set active_writer to it's task struct
> value.
> 
> For every other thread, when it's trying to check if it is the
> active_writer, the value is either NULL, or the value of the currently
> active writer's task_struct, but never current.
> 
> Am I missing something ?

I guess you're right - inside makes me feel better though :-) And its
not like its a fast path or something like that.

> > -	mutex_lock(&cpu_hotplug.lock);
> > +		goto unlock;
> > +
> > +	if (cpu_hotplug.active_writer) {
> > +		DEFINE_WAIT(wait);
> > +
> > +		for (;;) {
> > +			prepare_to_wait(&cpu_hotplug.reader_queue, &wait,
> > +					TASK_UNINTERRUPTIBLE);
> > +			if (!cpu_hotplug.active_writer)
> > +				break;
> > +			spin_unlock(&cpu_hotplug.lock);
> > +			schedule();
> > +			spin_lock(&cpu_hotplug.lock);
> > +		}
> > +		finish_wait(&cpu_hotplug.reader_queue, &wait);
> > +	}
> >  	cpu_hotplug.refcount++;
> > -	mutex_unlock(&cpu_hotplug.lock);
> > -
> > + unlock:
> > +	spin_unlock(&cpu_hotplug.lock);
> >  }
> >  EXPORT_SYMBOL_GPL(get_online_cpus);
> > 
> >  void put_online_cpus(void)
> >  {
> > +	spin_lock(&cpu_hotplug.lock);
> >  	if (cpu_hotplug.active_writer == current)
> > -		return;
> 
> ditto!
> 
> > -	mutex_lock(&cpu_hotplug.lock);
> > -	cpu_hotplug.refcount--;
> > +		goto unlock;
> > 
> > -	if (unlikely(writer_exists()) && !cpu_hotplug.refcount)
> > +	cpu_hotplug.refcount--;
> > +	if (!cpu_hotplug.refcount)
> >  		wake_up(&cpu_hotplug.writer_queue);
> 	hmm.. when there is no active writer, can't we avoid this
> 	additional wake up ??
> > -
> > -	mutex_unlock(&cpu_hotplug.lock);
> > -
> > + unlock:
> > +	spin_unlock(&cpu_hotplug.lock);
> >  }
> >  EXPORT_SYMBOL_GPL(put_online_cpus);
> > 
> > @@ -95,45 +112,41 @@ void cpu_maps_update_done(void)
> >   * This ensures that the hotplug operation can begin only when the
> >   * refcount goes to zero.
> >   *
> > - * Note that during a cpu-hotplug operation, the new readers, if any,
> > - * will be blocked by the cpu_hotplug.lock
> > - *
> > - * Since cpu_maps_update_begin is always called after invoking
> > - * cpu_maps_update_begin, we can be sure that only one writer is active.
> > - *
> > - * Note that theoretically, there is a possibility of a livelock:
> > - * - Refcount goes to zero, last reader wakes up the sleeping
> > - *   writer.
> > - * - Last reader unlocks the cpu_hotplug.lock.
> > - * - A new reader arrives at this moment, bumps up the refcount.
> > - * - The writer acquires the cpu_hotplug.lock finds the refcount
> > - *   non zero and goes to sleep again.
> > - *
> > - * However, this is very difficult to achieve in practice since
> > - * get_online_cpus() not an api which is called all that often.
> > - *
> > + * cpu_hotplug is basically an unfair recursive reader/writer lock that
> > + * allows reader in writer recursion.
> >   */
> >  static void cpu_hotplug_begin(void)
> >  {
> > -	DECLARE_WAITQUEUE(wait, current);
> > -
> > -	mutex_lock(&cpu_hotplug.lock);
> > +	might_sleep();
> > 
> > -	cpu_hotplug.active_writer = current;
> > -	add_wait_queue_exclusive(&cpu_hotplug.writer_queue, &wait);
> > -	while (cpu_hotplug.refcount) {
> > -		set_current_state(TASK_UNINTERRUPTIBLE);
> > -		mutex_unlock(&cpu_hotplug.lock);
> > -		schedule();
> > -		mutex_lock(&cpu_hotplug.lock);
> > +	spin_lock(&cpu_hotplug.lock);
> > +	if (cpu_hotplug.refcount || cpu_hotplug.active_writer) {
> 	if we reach this point, there can be only one writer, i.e.
> 	ourselves!
> 
> 	Because, the other writers are serialized by the
> 	cpu_add_remove_lock in cpu_up()/cpu_down().
> 
> 	Hence we can safely assign cpu_hotplug.active_writer to current.

Ah, missed that. Does it make sense to keep it like this, in case this
outer lock goes away?

> > +		DEFINE_WAIT(wait);
> > +
> > +		for (;;) {
> > +			prepare_to_wait(&cpu_hotplug.writer_queue, &wait,
> > +					TASK_UNINTERRUPTIBLE);
> > +			if (!cpu_hotplug.refcount && !cpu_hotplug.active_writer)
> > +				break;
> > +			spin_unlock(&cpu_hotplug.lock);
> > +			schedule();
> > +			spin_lock(&cpu_hotplug.lock);
> > +		}
> > +		finish_wait(&cpu_hotplug.writer_queue, &wait);
> >  	}
> > -	remove_wait_queue_locked(&cpu_hotplug.writer_queue, &wait);
> > +	cpu_hotplug.active_writer = current;
> 
> 
> > +	spin_unlock(&cpu_hotplug.lock);
> >  }
> > 
> >  static void cpu_hotplug_done(void)
> >  {
> > +	spin_lock(&cpu_hotplug.lock);
> >  	cpu_hotplug.active_writer = NULL;
> > -	mutex_unlock(&cpu_hotplug.lock);
> > +	if (!list_empty(&cpu_hotplug.writer_queue.task_list))
> > +		wake_up(&cpu_hotplug.writer_queue);
> > +	else
> > +		wake_up_all(&cpu_hotplug.reader_queue);
> > +	spin_unlock(&cpu_hotplug.lock);
> >  }
> >  /* Need to know about CPUs going up/down? */
> >  int __cpuinit register_cpu_notifier(struct notifier_block *nb)
> >
> 
> Looks good otherwise!

Thanks..

lockdep annotations:

Signed-off-by: Peter Zijlstra <a.p.zijlstra@...llo.nl>
---
Index: linux-2.6-2/include/linux/lockdep.h
===================================================================
--- linux-2.6-2.orig/include/linux/lockdep.h
+++ linux-2.6-2/include/linux/lockdep.h
@@ -458,4 +458,19 @@ static inline void print_irqtrace_events
 # define rwsem_release(l, n, i)			do { } while (0)
 #endif
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# ifdef CONFIG_PROVE_LOCKING
+#  define cpu_hotplug_acquire(l, s, t, i)	lock_acquire(l, s, t, 0, 2,
i)
+#  define cpu_hotplug_acquire_read(l, s, t, i)	lock_acquire(l, s, t, 3,
2, i)
+# else
+#  define cpu_hotplug_acquire(l, s, t, i)	lock_acquire(l, s, t, 0, 1,
i)
+#  define cpu_hotplug_acquire_read(l, s, t, i)	lock_acquire(l, s, t, 3,
1, i)
+# endif
+# define cpu_hotplug_release(l, n, i)		lock_release(l, n, i)
+#else
+# define cpu_hotplug_acquire(l, s, t, i)	do { } while (0)
+# define cpu_hotplug_acquire_read(l, s, t, i)	do { } while (0)
+# define cpu_hotplug_release(l, n, i)		do { } while (0)
+#endif
+
 #endif /* __LINUX_LOCKDEP_H */
Index: linux-2.6-2/kernel/cpu.c
===================================================================
--- linux-2.6-2.orig/kernel/cpu.c
+++ linux-2.6-2/kernel/cpu.c
@@ -35,12 +35,20 @@ static struct {
 	int refcount;
 	wait_queue_head_t reader_queue;
 	wait_queue_head_t writer_queue;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map	dep_map;
+#endif
 } cpu_hotplug;
 
 #define writer_exists() (cpu_hotplug.active_writer != NULL)
 
 void __init cpu_hotplug_init(void)
 {
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	static struct lockdep_class_key __key;
+
+	lockdep_init_map(&cpu_hotplug.dep_map, "cpu_hotplug", &__key, 0);
+#endif
 	cpu_hotplug.active_writer = NULL;
 	spin_lock_init(&cpu_hotplug.lock);
 	cpu_hotplug.refcount = 0;
@@ -54,6 +62,8 @@ void get_online_cpus(void)
 {
 	might_sleep();
 
+	cpu_hotplug_acquire_read(&cpu_hotplug.dep_map, 0, 0, _THIS_IP_);
+
 	spin_lock(&cpu_hotplug.lock);
 	if (cpu_hotplug.active_writer == current)
 		goto unlock;
@@ -80,6 +90,8 @@ EXPORT_SYMBOL_GPL(get_online_cpus);
 
 void put_online_cpus(void)
 {
+	cpu_hotplug_release(&cpu_hotplug.lock, 1, _THIS_IP_);
+
 	spin_lock(&cpu_hotplug.lock);
 	if (cpu_hotplug.active_writer == current)
 		goto unlock;
@@ -119,6 +131,8 @@ static void cpu_hotplug_begin(void)
 {
 	might_sleep();
 
+	cpu_hotplug_acquire(&cpu_hotplug.dep_map, 0, 0, _THIS_IP_);
+
 	spin_lock(&cpu_hotplug.lock);
 	if (cpu_hotplug.refcount || cpu_hotplug.active_writer) {
 		DEFINE_WAIT(wait);
@@ -140,6 +154,8 @@ static void cpu_hotplug_begin(void)
 
 static void cpu_hotplug_done(void)
 {
+	cpu_hotplug_release(&cpu_hotplug.lock, 1, _THIS_IP_);
+
 	spin_lock(&cpu_hotplug.lock);
 	cpu_hotplug.active_writer = NULL;
 	if (!list_empty(&cpu_hotplug.writer_queue.task_list))


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ