lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 1 May 2019 19:09:53 +0200
From:   Peter Zijlstra <peterz@...radead.org>
To:     Sebastian Andrzej Siewior <bigeasy@...utronix.de>
Cc:     linux-rt-users <linux-rt-users@...r.kernel.org>,
        LKML <linux-kernel@...r.kernel.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        Daniel Bristot de Oliveira <bristot@...hat.com>,
        Clark Williams <williams@...hat.com>,
        Juri Lelli <juri.lelli@...hat.com>,
        Oleg Nesterov <oleg@...hat.com>, jack@...e.com,
        Waiman Long <longman@...hat.com>,
        Davidlohr Bueso <dave@...olabs.net>
Subject: Re: [RT WARNING] DEBUG_LOCKS_WARN_ON(rt_mutex_owner(lock) !=
 current) with fsfreeze (4.19.25-rt16)

On Tue, Apr 30, 2019 at 03:28:11PM +0200, Peter Zijlstra wrote:

> Yeah, but AFAIK fs freezing code has a history of doing exactly that..
> This is just the latest incarnation here.
> 
> So the immediate problem here is that the task doing thaw isn't the same
> that did freeze, right? The thing is, I'm not seeing how that isn't a
> problem with upstream either.
> 
> The freeze code seems to do: percpu_down_write() for the various states,
> and then frobs lockdep state.
> 
> Thaw then does the reverse, frobs lockdep and then does: percpu_up_write().
> 
> percpu_down_write() directly relies on down_write(), and
> percpu_up_write() on up_write(). And note how __up_write() has:
> 
> 	DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem);
> 
> So why isn't this same code coming unstuck in mainline?

Anyway; I cobbled together the below. Oleg, could you have a look, I'm
sure I messed it up.

---
 fs/super.c                    | 31 ++----------------
 include/linux/fs.h            |  4 +--
 include/linux/percpu-rwsem.h  | 25 ++++++--------
 kernel/locking/percpu-rwsem.c | 76 ++++++++++++++++++++++++++++++++-----------
 4 files changed, 71 insertions(+), 65 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index 583a0124bc39..bf9c54d05edb 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1629,30 +1629,7 @@ EXPORT_SYMBOL(__sb_start_write);
  */
 static void sb_wait_write(struct super_block *sb, int level)
 {
-	percpu_down_write(sb->s_writers.rw_sem + level-1);
-}
-
-/*
- * We are going to return to userspace and forget about these locks, the
- * ownership goes to the caller of thaw_super() which does unlock().
- */
-static void lockdep_sb_freeze_release(struct super_block *sb)
-{
-	int level;
-
-	for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
-		percpu_rwsem_release(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
-}
-
-/*
- * Tell lockdep we are holding these locks before we call ->unfreeze_fs(sb).
- */
-static void lockdep_sb_freeze_acquire(struct super_block *sb)
-{
-	int level;
-
-	for (level = 0; level < SB_FREEZE_LEVELS; ++level)
-		percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
+	percpu_down_write_non_owner(sb->s_writers.rw_sem + level-1);
 }
 
 static void sb_freeze_unlock(struct super_block *sb)
@@ -1660,7 +1637,7 @@ static void sb_freeze_unlock(struct super_block *sb)
 	int level;
 
 	for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
-		percpu_up_write(sb->s_writers.rw_sem + level);
+		percpu_up_write_non_owner(sb->s_writers.rw_sem + level);
 }
 
 /**
@@ -1753,7 +1730,6 @@ int freeze_super(struct super_block *sb)
 	 * when frozen is set to SB_FREEZE_COMPLETE, and for thaw_super().
 	 */
 	sb->s_writers.frozen = SB_FREEZE_COMPLETE;
-	lockdep_sb_freeze_release(sb);
 	up_write(&sb->s_umount);
 	return 0;
 }
@@ -1779,14 +1755,11 @@ static int thaw_super_locked(struct super_block *sb)
 		goto out;
 	}
 
-	lockdep_sb_freeze_acquire(sb);
-
 	if (sb->s_op->unfreeze_fs) {
 		error = sb->s_op->unfreeze_fs(sb);
 		if (error) {
 			printk(KERN_ERR
 				"VFS:Filesystem thaw failed\n");
-			lockdep_sb_freeze_release(sb);
 			up_write(&sb->s_umount);
 			return error;
 		}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dd28e7679089..3b61740b90d7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1557,9 +1557,9 @@ void __sb_end_write(struct super_block *sb, int level);
 int __sb_start_write(struct super_block *sb, int level, bool wait);
 
 #define __sb_writers_acquired(sb, lev)	\
-	percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
+	percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], _THIS_IP_)
 #define __sb_writers_release(sb, lev)	\
-	percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
+	percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], _THIS_IP_)
 
 /**
  * sb_end_write - drop write access to a superblock
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 03cb4b6f842e..e0c02d0f82a6 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -5,15 +5,15 @@
 #include <linux/atomic.h>
 #include <linux/rwsem.h>
 #include <linux/percpu.h>
-#include <linux/rcuwait.h>
+#include <linux/wait.h>
 #include <linux/rcu_sync.h>
 #include <linux/lockdep.h>
 
 struct percpu_rw_semaphore {
 	struct rcu_sync		rss;
 	unsigned int __percpu	*read_count;
-	struct rw_semaphore	rw_sem; /* slowpath */
-	struct rcuwait          writer; /* blocked writer */
+	struct rw_semaphore	rw_sem;
+	wait_queue_head_t	writer;
 	int			readers_block;
 };
 
@@ -23,7 +23,7 @@ static struct percpu_rw_semaphore name = {				\
 	.rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC),	\
 	.read_count = &__percpu_rwsem_rc_##name,			\
 	.rw_sem = __RWSEM_INITIALIZER(name.rw_sem),			\
-	.writer = __RCUWAIT_INITIALIZER(name.writer),			\
+	.writer = __WAIT_QUEUE_HEAD_INITIALIZER(name.writer),		\
 }
 
 extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
@@ -95,6 +95,9 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
 extern void percpu_down_write(struct percpu_rw_semaphore *);
 extern void percpu_up_write(struct percpu_rw_semaphore *);
 
+extern void percpu_down_write_non_owner(struct percpu_rw_semaphore *);
+extern void percpu_up_write_non_owner(struct percpu_rw_semaphore *);
+
 extern int __percpu_init_rwsem(struct percpu_rw_semaphore *,
 				const char *, struct lock_class_key *);
 
@@ -112,23 +115,15 @@ extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
 	lockdep_assert_held(&(sem)->rw_sem)
 
 static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
-					bool read, unsigned long ip)
+					unsigned long ip)
 {
 	lock_release(&sem->rw_sem.dep_map, 1, ip);
-#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-	if (!read)
-		sem->rw_sem.owner = RWSEM_OWNER_UNKNOWN;
-#endif
 }
 
 static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem,
-					bool read, unsigned long ip)
+					unsigned long ip)
 {
-	lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip);
-#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-	if (!read)
-		sem->rw_sem.owner = current;
-#endif
+	lock_acquire(&sem->rw_sem.dep_map, 0, 1, 1, 1, NULL, ip);
 }
 
 #endif
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index f17dad99eec8..a51fd2a9ee90 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -1,6 +1,7 @@
 #include <linux/atomic.h>
 #include <linux/rwsem.h>
 #include <linux/percpu.h>
+#include <linux/wait.h>
 #include <linux/lockdep.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/rcupdate.h>
@@ -19,7 +20,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
 	/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
 	rcu_sync_init(&sem->rss, RCU_SCHED_SYNC);
 	__init_rwsem(&sem->rw_sem, name, rwsem_key);
-	rcuwait_init(&sem->writer);
+	init_waitqueue_head(&sem->writer);
 	sem->readers_block = 0;
 	return 0;
 }
@@ -40,6 +41,40 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *sem)
 }
 EXPORT_SYMBOL_GPL(percpu_free_rwsem);
 
+static void readers_block(struct percpu_rw_semaphore *sem)
+{
+	wait_event_cmd(sem->writer, !sem->readers_block,
+		       __up_read(&sem->rw_sem), __down_read(&sem->rw_sem));
+}
+
+static void block_readers(struct percpu_rw_semaphore *sem)
+{
+	wait_event_exclusive_cmd(sem->writer, !sem->readers_block,
+				 __up_write(&sem->rw_sem),
+				 __down_write(&sem->rw_sem));
+	/*
+	 * Notify new readers to block; up until now, and thus throughout the
+	 * longish rcu_sync_enter() above, new readers could still come in.
+	 */
+	WRITE_ONCE(sem->readers_block, 1);
+}
+
+static void unblock_readers(struct percpu_rw_semaphore *sem)
+{
+	/*
+	 * Signal the writer is done, no fast path yet.
+	 *
+	 * One reason that we cannot just immediately flip to readers_fast is
+	 * that new readers might fail to see the results of this writer's
+	 * critical section.
+	 *
+	 * Therefore we force it through the slow path which guarantees an
+	 * acquire and thereby guarantees the critical section's consistency.
+	 */
+	smp_store_release(&sem->readers_block, 0);
+	wake_up(&sem->writer);
+}
+
 int __percpu_down_read(struct percpu_rw_semaphore *sem, int try)
 {
 	/*
@@ -85,6 +120,9 @@ int __percpu_down_read(struct percpu_rw_semaphore *sem, int try)
 	 * Avoid lockdep for the down/up_read() we already have them.
 	 */
 	__down_read(&sem->rw_sem);
+
+	readers_block(sem);
+
 	this_cpu_inc(*sem->read_count);
 	__up_read(&sem->rw_sem);
 
@@ -104,7 +142,7 @@ void __percpu_up_read(struct percpu_rw_semaphore *sem)
 	__this_cpu_dec(*sem->read_count);
 
 	/* Prod writer to recheck readers_active */
-	rcuwait_wake_up(&sem->writer);
+	wake_up(&sem->writer);
 }
 EXPORT_SYMBOL_GPL(__percpu_up_read);
 
@@ -146,11 +184,7 @@ void percpu_down_write(struct percpu_rw_semaphore *sem)
 
 	down_write(&sem->rw_sem);
 
-	/*
-	 * Notify new readers to block; up until now, and thus throughout the
-	 * longish rcu_sync_enter() above, new readers could still come in.
-	 */
-	WRITE_ONCE(sem->readers_block, 1);
+	block_readers(sem);
 
 	smp_mb(); /* D matches A */
 
@@ -161,23 +195,13 @@ void percpu_down_write(struct percpu_rw_semaphore *sem)
 	 */
 
 	/* Wait for all now active readers to complete. */
-	rcuwait_wait_event(&sem->writer, readers_active_check(sem));
+	wait_event(sem->writer, readers_active_check(sem));
 }
 EXPORT_SYMBOL_GPL(percpu_down_write);
 
 void percpu_up_write(struct percpu_rw_semaphore *sem)
 {
-	/*
-	 * Signal the writer is done, no fast path yet.
-	 *
-	 * One reason that we cannot just immediately flip to readers_fast is
-	 * that new readers might fail to see the results of this writer's
-	 * critical section.
-	 *
-	 * Therefore we force it through the slow path which guarantees an
-	 * acquire and thereby guarantees the critical section's consistency.
-	 */
-	smp_store_release(&sem->readers_block, 0);
+	unblock_readers(sem);
 
 	/*
 	 * Release the write lock, this will allow readers back in the game.
@@ -191,4 +215,18 @@ void percpu_up_write(struct percpu_rw_semaphore *sem)
 	 */
 	rcu_sync_exit(&sem->rss);
 }
+EXPORT_SYMBOL_GPL(percpu_up_write_non_owner);
+
+void percpu_down_write_non_owner(struct percpu_rw_semaphore *sem)
+{
+	percpu_down_write(sem);
+	up_write(&sem->rw_sem);
+}
+EXPORT_SYMBOL_GPL(percpu_down_write_non_owner);
+
+void percpu_up_write_non_owner(struct percpu_rw_semaphore *sem)
+{
+	down_write(&sem->rw_sem);
+	percpu_up_write(sem);
+}
 EXPORT_SYMBOL_GPL(percpu_up_write);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ