lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 29 Sep 2010 22:18:46 +1000
From:	Dave Chinner <david@...morbit.com>
To:	linux-fsdevel@...r.kernel.org
Cc:	linux-kernel@...r.kernel.org
Subject: [PATCH 14/17] fs: Inode counters do not need to be atomic.

From: Nick Piggin <npiggin@...e.de>

atomics for counters do not scale on large machines, so convert them
back to normal variables protected by spin locks. We can do this
because the counters are associated with specific list operations
that are protected by locks; nr_inodes can be protected by the
sb_inode_list_lock, and nr_unused can be protected by the
wb_inode_list_lock.

Signed-off-by: Nick Piggin <npiggin@...e.de>
Signed-off-by: Dave Chinner <dchinner@...hat.com>
---
 fs/fs-writeback.c  |    6 ++----
 fs/inode.c         |   30 ++++++++++++------------------
 include/linux/fs.h |   12 ++++++------
 3 files changed, 20 insertions(+), 28 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 432a4df..8e390e8 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -743,8 +743,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
 	wb->last_old_flush = jiffies;
 	nr_pages = global_page_state(NR_FILE_DIRTY) +
 			global_page_state(NR_UNSTABLE_NFS) +
-			(atomic_read(&inodes_stat.nr_inodes) -
-			atomic_read(&inodes_stat.nr_unused));
+			inodes_stat.nr_inodes - inodes_stat.nr_unused;
 
 	if (nr_pages) {
 		struct wb_writeback_work work = {
@@ -1116,8 +1115,7 @@ void writeback_inodes_sb(struct super_block *sb)
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
 
 	work.nr_pages = nr_dirty + nr_unstable +
-			(atomic_read(&inodes_stat.nr_inodes) -
-			atomic_read(&inodes_stat.nr_unused));
+			inodes_stat.nr_inodes - inodes_stat.nr_unused;
 
 	bdi_queue_work(sb->s_bdi, &work);
 	wait_for_completion(&done);
diff --git a/fs/inode.c b/fs/inode.c
index 50599d7..d279517 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -139,8 +139,8 @@ static DECLARE_RWSEM(iprune_sem);
  * Statistics gathering..
  */
 struct inodes_stat_t inodes_stat = {
-	.nr_inodes = ATOMIC_INIT(0),
-	.nr_unused = ATOMIC_INIT(0),
+	.nr_inodes = 0,
+	.nr_unused = 0,
 };
 
 static struct kmem_cache *inode_cachep __read_mostly;
@@ -376,7 +376,6 @@ static void dispose_list(struct list_head *head)
 		destroy_inode(inode);
 		nr_disposed++;
 	}
-	atomic_sub(nr_disposed, &inodes_stat.nr_inodes);
 }
 
 /*
@@ -385,7 +384,7 @@ static void dispose_list(struct list_head *head)
 static int invalidate_list(struct list_head *head, struct list_head *dispose)
 {
 	struct list_head *next;
-	int busy = 0, count = 0;
+	int busy = 0;
 
 	next = head->next;
 	for (;;) {
@@ -413,19 +412,17 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
 		if (!inode->i_count) {
 			spin_lock(&wb_inode_list_lock);
 			list_del(&inode->i_list);
+			inodes_stat.nr_unused--;
 			spin_unlock(&wb_inode_list_lock);
 			WARN_ON(inode->i_state & I_NEW);
 			inode->i_state |= I_FREEING;
 			spin_unlock(&inode->i_lock);
 			list_add(&inode->i_list, dispose);
-			count++;
 			continue;
 		}
 		spin_unlock(&inode->i_lock);
 		busy = 1;
 	}
-	/* only unused inodes may be cached with i_count zero */
-	atomic_sub(count, &inodes_stat.nr_unused);
 	return busy;
 }
 
@@ -471,7 +468,6 @@ EXPORT_SYMBOL(invalidate_inodes);
 static void prune_icache(int nr_to_scan)
 {
 	LIST_HEAD(freeable);
-	int nr_pruned = 0;
 	unsigned long reap = 0;
 
 	down_read(&iprune_sem);
@@ -492,7 +488,7 @@ again:
 		if (inode->i_count || (inode->i_state & ~I_REFERENCED)) {
 			list_del_init(&inode->i_list);
 			spin_unlock(&inode->i_lock);
-			atomic_dec(&inodes_stat.nr_unused);
+			inodes_stat.nr_unused--;
 			continue;
 		}
 		if (inode->i_state) {
@@ -518,9 +514,8 @@ again:
 		WARN_ON(inode->i_state & I_NEW);
 		inode->i_state |= I_FREEING;
 		spin_unlock(&inode->i_lock);
-		nr_pruned++;
+		inodes_stat.nr_unused--;
 	}
-	atomic_sub(nr_pruned, &inodes_stat.nr_unused);
 	if (current_is_kswapd())
 		__count_vm_events(KSWAPD_INODESTEAL, reap);
 	else
@@ -552,8 +547,7 @@ static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 			return -1;
 		prune_icache(nr);
 	}
-	return (atomic_read(&inodes_stat.nr_unused) / 100) *
-					sysctl_vfs_cache_pressure;
+	return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
 }
 
 static struct shrinker icache_shrinker = {
@@ -649,7 +643,7 @@ static inline void
 __inode_add_to_lists(struct super_block *sb, struct inode_hash_bucket *b,
 			struct inode *inode)
 {
-	atomic_inc(&inodes_stat.nr_inodes);
+	inodes_stat.nr_inodes++;
 	list_add(&inode->i_sb_list, &sb->s_inodes);
 	spin_unlock(&sb_inode_list_lock);
 	if (b) {
@@ -1325,9 +1319,9 @@ static void iput_final(struct inode *inode)
 		if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
 			spin_lock(&wb_inode_list_lock);
 			list_move(&inode->i_list, &inode_unused);
+			inodes_stat.nr_unused++;
 			spin_unlock(&wb_inode_list_lock);
 		}
-		atomic_inc(&inodes_stat.nr_unused);
 		if (sb->s_flags & MS_ACTIVE) {
 			spin_unlock(&inode->i_lock);
 			spin_unlock(&sb_inode_list_lock);
@@ -1347,16 +1341,16 @@ static void iput_final(struct inode *inode)
 	if (!list_empty(&inode->i_list)) {
 		spin_lock(&wb_inode_list_lock);
 		list_del_init(&inode->i_list);
-		spin_unlock(&wb_inode_list_lock);
 		if (!inode->i_state)
-			atomic_dec(&inodes_stat.nr_unused);
+			inodes_stat.nr_unused--;
+		spin_unlock(&wb_inode_list_lock);
 	}
 	list_del_init(&inode->i_sb_list);
+	inodes_stat.nr_inodes--;
 	spin_unlock(&sb_inode_list_lock);
 	WARN_ON(inode->i_state & I_NEW);
 	inode->i_state |= I_FREEING;
 	spin_unlock(&inode->i_lock);
-	atomic_dec(&inodes_stat.nr_inodes);
 	evict(inode);
 
 	/*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 096a5eb..3a43313 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -32,6 +32,12 @@
 #define SEEK_END	2	/* seek relative to end of file */
 #define SEEK_MAX	SEEK_END
 
+struct inodes_stat_t {
+	int nr_inodes;
+	int nr_unused;
+	int dummy[5];		/* padding for sysctl ABI compatibility */
+};
+
 /* And dynamically-tunable limits and defaults: */
 struct files_stat_struct {
 	int nr_files;		/* read only */
@@ -410,12 +416,6 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 			ssize_t bytes, void *private, int ret,
 			bool is_async);
 
-struct inodes_stat_t {
-	atomic_t nr_inodes;
-	atomic_t nr_unused;
-	int dummy[5];		/* padding for sysctl ABI compatibility */
-};
-
 /*
  * Attribute flags.  These should be or-ed together to figure out what
  * has been changed!
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ