lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20150713212559.GA13886@redhat.com>
Date:	Mon, 13 Jul 2015 23:25:59 +0200
From:	Oleg Nesterov <oleg@...hat.com>
To:	Al Viro <viro@...iv.linux.org.uk>, Jan Kara <jack@...e.cz>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Paul McKenney <paulmck@...ux.vnet.ibm.com>,
	Peter Zijlstra <peterz@...radead.org>
Cc:	Daniel Wagner <daniel.wagner@...-carit.de>,
	Davidlohr Bueso <dave@...olabs.net>,
	Ingo Molnar <mingo@...hat.com>, Tejun Heo <tj@...nel.org>,
	linux-kernel@...r.kernel.org
Subject: [PATCH 4/4] change sb_writers to use percpu_rw_semaphore

__sb_start/end_write() can use percpu_down/up_read().
---
 fs/super.c         |  135 +++++++++++++++++-----------------------------------
 include/linux/fs.h |   14 +----
 2 files changed, 47 insertions(+), 102 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index 94303fc..6e336b8 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -147,7 +147,7 @@ static void destroy_super(struct super_block *s)
 	list_lru_destroy(&s->s_dentry_lru);
 	list_lru_destroy(&s->s_inode_lru);
 	for (i = 0; i < SB_FREEZE_LEVELS; i++)
-		percpu_counter_destroy(&s->s_writers.counter[i]);
+		percpu_free_rwsem(s->s_writers.rw_sem + i);
 	security_sb_free(s);
 	WARN_ON(!list_empty(&s->s_mounts));
 	kfree(s->s_subtype);
@@ -178,14 +178,10 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
 		goto fail;
 
 	for (i = 0; i < SB_FREEZE_LEVELS; i++) {
-		if (percpu_counter_init(&s->s_writers.counter[i], 0,
-					GFP_KERNEL) < 0)
-			goto fail;
-		lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
-				 &type->s_writers_key[i], 0);
+		__percpu_init_rwsem(&s->s_writers.rw_sem[i],
+					sb_writers_name[i],
+					&type->s_writers_key[i]);
 	}
-	init_waitqueue_head(&s->s_writers.wait);
-	init_waitqueue_head(&s->s_writers.wait_unfrozen);
 	s->s_bdi = &noop_backing_dev_info;
 	s->s_flags = flags;
 	INIT_HLIST_NODE(&s->s_instances);
@@ -1146,43 +1142,10 @@ out:
  */
 void __sb_end_write(struct super_block *sb, int level)
 {
-	percpu_counter_dec(&sb->s_writers.counter[level-1]);
-	/*
-	 * Make sure s_writers are updated before we wake up waiters in
-	 * freeze_super().
-	 */
-	smp_mb();
-	if (waitqueue_active(&sb->s_writers.wait))
-		wake_up(&sb->s_writers.wait);
-	rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_);
+	percpu_up_read(sb->s_writers.rw_sem + level-1);
 }
 EXPORT_SYMBOL(__sb_end_write);
 
-#ifdef CONFIG_LOCKDEP
-/*
- * We want lockdep to tell us about possible deadlocks with freezing but
- * it's it bit tricky to properly instrument it. Getting a freeze protection
- * works as getting a read lock but there are subtle problems. XFS for example
- * gets freeze protection on internal level twice in some cases, which is OK
- * only because we already hold a freeze protection also on higher level. Due
- * to these cases we have to tell lockdep we are doing trylock when we
- * already hold a freeze protection for a higher freeze level.
- */
-static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock,
-				unsigned long ip)
-{
-	int i;
-
-	if (!trylock) {
-		for (i = 0; i < level - 1; i++)
-			if (lock_is_held(&sb->s_writers.lock_map[i])) {
-				trylock = true;
-				break;
-			}
-	}
-	rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip);
-}
-#endif
 
 /*
  * This is an internal function, please use sb_start_{write,pagefault,intwrite}
@@ -1190,27 +1153,15 @@ static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock,
  */
 int __sb_start_write(struct super_block *sb, int level, bool wait)
 {
-retry:
-	if (unlikely(sb->s_writers.frozen >= level)) {
-		if (!wait)
-			return 0;
-		wait_event(sb->s_writers.wait_unfrozen,
-			   sb->s_writers.frozen < level);
-	}
-
-#ifdef CONFIG_LOCKDEP
-	acquire_freeze_lock(sb, level, !wait, _RET_IP_);
-#endif
-	percpu_counter_inc(&sb->s_writers.counter[level-1]);
 	/*
-	 * Make sure counter is updated before we check for frozen.
-	 * freeze_super() first sets frozen and then checks the counter.
+	 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+	 * !!!!!!!!! percpu_down_read_trylock() wasn't merged yet !!!!!!!!!!!!!
+	 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 	 */
-	smp_mb();
-	if (unlikely(sb->s_writers.frozen >= level)) {
-		__sb_end_write(sb, level);
-		goto retry;
-	}
+	if (!wait)
+		return 0;
+
+	percpu_down_read(sb->s_writers.rw_sem + level-1);
 	return 1;
 }
 EXPORT_SYMBOL(__sb_start_write);
@@ -1227,42 +1178,46 @@ EXPORT_SYMBOL(__sb_start_write);
  */
 static void sb_wait_write(struct super_block *sb, int level)
 {
-	s64 writers;
-
-	rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_);
-
-	do {
-		DEFINE_WAIT(wait);
-
-		/*
-		 * We use a barrier in prepare_to_wait() to separate setting
-		 * of frozen and checking of the counter
-		 */
-		prepare_to_wait(&sb->s_writers.wait, &wait,
-				TASK_UNINTERRUPTIBLE);
-
-		writers = percpu_counter_sum(&sb->s_writers.counter[level-1]);
-		if (writers)
-			schedule();
-
-		finish_wait(&sb->s_writers.wait, &wait);
-	} while (writers);
+	percpu_down_write(sb->s_writers.rw_sem + level-1);
 }
 
+/*
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ * !!!!!!!!!!! Move this code into kernel/locking/percpu-rwsem.c !!!!!!!!!!!!!!
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ */
+#include "../kernel/locking/rwsem.h"
 /* Avoid the warning from lockdep_sys_exit() */
 static void sb_lockdep_release(struct super_block *sb)
 {
 	int level;
+	struct percpu_rw_semaphore *sem;
+
+	for (level = 0; level < SB_FREEZE_LEVELS; ++level) {
+		sem = sb->s_writers.rw_sem + level;
+		rwsem_clear_owner(&sem->rw_sem);
+		rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_);
+	}
+}
+
+static void sb_lockdep_acquire(struct super_block *sb)
+{
+	int level;
+	struct percpu_rw_semaphore *sem;
 
 	for (level = 0; level < SB_FREEZE_LEVELS; ++level) {
-		rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_);
+		sem = sb->s_writers.rw_sem + level;
+		rwsem_set_owner(&sem->rw_sem); /* unneeded */
+		rwsem_acquire(&sem->rw_sem.dep_map, 0, 1, _RET_IP_);
 	}
 }
 
 static void sb_unlock_frozen(struct super_block *sb)
 {
-	smp_wmb();
-	wake_up(&sb->s_writers.wait_unfrozen);
+	int level;
+
+	for (level = 0; level < SB_FREEZE_LEVELS; ++level)
+		percpu_up_write(sb->s_writers.rw_sem + level);
 }
 
 /**
@@ -1323,8 +1278,6 @@ int freeze_super(struct super_block *sb)
 
 	/* From now on, no new normal writers can start */
 	sb->s_writers.frozen = SB_FREEZE_WRITE;
-	smp_wmb();
-
 	/* Release s_umount to preserve sb_start_write -> s_umount ordering */
 	up_write(&sb->s_umount);
 
@@ -1332,9 +1285,8 @@ int freeze_super(struct super_block *sb)
 
 	/* Now we go and block page faults... */
 	down_write(&sb->s_umount);
-	sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
-	smp_wmb();
 
+	sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
 	sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
 
 	/* All writers are done so after syncing there won't be dirty data */
@@ -1342,7 +1294,6 @@ int freeze_super(struct super_block *sb)
 
 	/* Now wait for internal filesystem counter */
 	sb->s_writers.frozen = SB_FREEZE_FS;
-	smp_wmb();
 	sb_wait_write(sb, SB_FREEZE_FS);
 
 	if (sb->s_op->freeze_fs) {
@@ -1350,8 +1301,8 @@ int freeze_super(struct super_block *sb)
 		if (ret) {
 			printk(KERN_ERR
 				"VFS:Filesystem freeze failed\n");
-			sb->s_writers.frozen = SB_UNFROZEN;
 			sb_unlock_frozen(sb);
+			sb->s_writers.frozen = SB_UNFROZEN;
 			deactivate_locked_super(sb);
 			return ret;
 		}
@@ -1386,6 +1337,8 @@ int thaw_super(struct super_block *sb)
 	if (sb->s_flags & MS_RDONLY)
 		goto out;
 
+	sb_lockdep_acquire(sb);
+
 	if (sb->s_op->unfreeze_fs) {
 		error = sb->s_op->unfreeze_fs(sb);
 		if (error) {
@@ -1396,9 +1349,9 @@ int thaw_super(struct super_block *sb)
 		}
 	}
 
+	sb_unlock_frozen(sb);
 out:
 	sb->s_writers.frozen = SB_UNFROZEN;
-	sb_unlock_frozen(sb);
 	deactivate_locked_super(sb);
 
 	return 0;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 35ec87e..314e2d0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1,7 +1,6 @@
 #ifndef _LINUX_FS_H
 #define _LINUX_FS_H
 
-
 #include <linux/linkage.h>
 #include <linux/wait.h>
 #include <linux/kdev_t.h>
@@ -30,6 +29,7 @@
 #include <linux/lockdep.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/blk_types.h>
+#include <linux/percpu-rwsem.h>
 
 #include <asm/byteorder.h>
 #include <uapi/linux/fs.h>
@@ -1246,16 +1246,8 @@ enum {
 #define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
 
 struct sb_writers {
-	/* Counters for counting writers at each level */
-	struct percpu_counter	counter[SB_FREEZE_LEVELS];
-	wait_queue_head_t	wait;		/* queue for waiting for
-						   writers / faults to finish */
-	int			frozen;		/* Is sb frozen? */
-	wait_queue_head_t	wait_unfrozen;	/* queue for waiting for
-						   sb to be thawed */
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map	lock_map[SB_FREEZE_LEVELS];
-#endif
+	int				frozen;		/* Is sb frozen? */
+	struct percpu_rw_semaphore	rw_sem[SB_FREEZE_LEVELS];
 };
 
 struct super_block {
-- 
1.5.5.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ