lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20130301050029.GB4452@thunk.org>
Date:	Fri, 1 Mar 2013 00:00:29 -0500
From:	Theodore Ts'o <tytso@....edu>
To:	Dave Jones <davej@...hat.com>,
	"gnehzuil.liu" <gnehzuil.liu@...il.com>,
	Zheng Liu <wenqing.lz@...bao.com>,
	"linux-ext4@...r.kernel.org" <linux-ext4@...r.kernel.org>
Subject: [PATCH] ext4: optimize ext4_es_shrink()

When the system is under memory pressure, ext4_es_srhink() will get
called very often.  So optimize returning the number of items in the
file system's extent status cache by keeping a per-filesystem count,
instead of calculating it each time by scanning all of the inodes in
the extent status cache.

Also rename the slab used for the extent status cache to be
"ext4_extent_status" so it's obviousl the slab in question is created
by ext4.

Signed-off-by: "Theodore Ts'o" <tytso@....edu>
Cc: Zheng Liu <gnehzuil.liu@...il.com>
---
 fs/ext4/ext4.h              |  1 +
 fs/ext4/extents_status.c    | 39 +++++++++++++--------------------------
 include/trace/events/ext4.h | 40 ++++++++++++----------------------------
 3 files changed, 26 insertions(+), 54 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6e16c18..96c1093 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1268,6 +1268,7 @@ struct ext4_sb_info {
 	atomic_t s_mb_preallocated;
 	atomic_t s_mb_discarded;
 	atomic_t s_lock_busy;
+	atomic_t s_extent_cache_cnt;
 
 	/* locality groups */
 	struct ext4_locality_group __percpu *s_locality_groups;
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index f768f4a..27fcdd2 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -147,11 +147,12 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
 			      ext4_lblk_t end);
 static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
 				       int nr_to_scan);
-static int ext4_es_reclaim_extents_count(struct super_block *sb);
 
 int __init ext4_init_es(void)
 {
-	ext4_es_cachep = KMEM_CACHE(extent_status, SLAB_RECLAIM_ACCOUNT);
+	ext4_es_cachep = kmem_cache_create("ext4_extent_status",
+					   sizeof(struct extent_status),
+					   0, (SLAB_RECLAIM_ACCOUNT), NULL);
 	if (ext4_es_cachep == NULL)
 		return -ENOMEM;
 	return 0;
@@ -302,8 +303,10 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
 	/*
 	 * We don't count delayed extent because we never try to reclaim them
 	 */
-	if (!ext4_es_is_delayed(es))
+	if (!ext4_es_is_delayed(es)) {
 		EXT4_I(inode)->i_es_lru_nr++;
+		atomic_inc(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt);
+	}
 
 	return es;
 }
@@ -314,6 +317,7 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
 	if (!ext4_es_is_delayed(es)) {
 		BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0);
 		EXT4_I(inode)->i_es_lru_nr--;
+		atomic_dec(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt);
 	}
 
 	kmem_cache_free(ext4_es_cachep, es);
@@ -674,10 +678,11 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
 	int nr_to_scan = sc->nr_to_scan;
 	int ret, nr_shrunk = 0;
 
-	trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan);
+	ret = atomic_read(&sbi->s_extent_cache_cnt);
+	trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret);
 
 	if (!nr_to_scan)
-		return ext4_es_reclaim_extents_count(sbi->s_sb);
+		return ret;
 
 	INIT_LIST_HEAD(&scanned);
 
@@ -705,9 +710,10 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
 	}
 	list_splice_tail(&scanned, &sbi->s_es_lru);
 	spin_unlock(&sbi->s_es_lru_lock);
-	trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk);
 
-	return ext4_es_reclaim_extents_count(sbi->s_sb);
+	ret = atomic_read(&sbi->s_extent_cache_cnt);
+	trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret);
+	return ret;
 }
 
 void ext4_es_register_shrinker(struct super_block *sb)
@@ -751,25 +757,6 @@ void ext4_es_lru_del(struct inode *inode)
 	spin_unlock(&sbi->s_es_lru_lock);
 }
 
-static int ext4_es_reclaim_extents_count(struct super_block *sb)
-{
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	struct ext4_inode_info *ei;
-	struct list_head *cur;
-	int nr_cached = 0;
-
-	spin_lock(&sbi->s_es_lru_lock);
-	list_for_each(cur, &sbi->s_es_lru) {
-		ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
-		read_lock(&ei->i_es_lock);
-		nr_cached += ei->i_es_lru_nr;
-		read_unlock(&ei->i_es_lock);
-	}
-	spin_unlock(&sbi->s_es_lru_lock);
-	trace_ext4_es_reclaim_extents_count(sb, nr_cached);
-	return nr_cached;
-}
-
 static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
 				       int nr_to_scan)
 {
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index c0457c0..4ee4710 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2255,64 +2255,48 @@ TRACE_EVENT(ext4_es_lookup_extent_exit,
 		  __entry->found ? __entry->status : 0)
 );
 
-TRACE_EVENT(ext4_es_reclaim_extents_count,
-	TP_PROTO(struct super_block *sb, int nr_cached),
-
-	TP_ARGS(sb, nr_cached),
-
-	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
-		__field(	int,	nr_cached		)
-	),
-
-	TP_fast_assign(
-		__entry->dev		= sb->s_dev;
-		__entry->nr_cached	= nr_cached;
-	),
-
-	TP_printk("dev %d,%d cached objects nr %d",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->nr_cached)
-);
-
 TRACE_EVENT(ext4_es_shrink_enter,
-	TP_PROTO(struct super_block *sb, int nr_to_scan),
+	TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt),
 
-	TP_ARGS(sb, nr_to_scan),
+	TP_ARGS(sb, nr_to_scan, cache_cnt),
 
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
 		__field(	int,	nr_to_scan		)
+		__field(	int,	cache_cnt		)
 	),
 
 	TP_fast_assign(
 		__entry->dev		= sb->s_dev;
 		__entry->nr_to_scan	= nr_to_scan;
+		__entry->cache_cnt	= cache_cnt;
 	),
 
-	TP_printk("dev %d,%d nr to scan %d",
+	TP_printk("dev %d,%d nr_to_scan %d cache_cnt %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->nr_to_scan)
+		  __entry->nr_to_scan, __entry->cache_cnt)
 );
 
 TRACE_EVENT(ext4_es_shrink_exit,
-	TP_PROTO(struct super_block *sb, int shrunk_nr),
+	TP_PROTO(struct super_block *sb, int shrunk_nr, int cache_cnt),
 
-	TP_ARGS(sb, shrunk_nr),
+	TP_ARGS(sb, shrunk_nr, cache_cnt),
 
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
 		__field(	int,	shrunk_nr		)
+		__field(	int,	cache_cnt		)
 	),
 
 	TP_fast_assign(
 		__entry->dev		= sb->s_dev;
 		__entry->shrunk_nr	= shrunk_nr;
+		__entry->cache_cnt	= cache_cnt;
 	),
 
-	TP_printk("dev %d,%d nr to scan %d",
+	TP_printk("dev %d,%d shrunk_nr %d cache_cnt %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->shrunk_nr)
+		  __entry->shrunk_nr, __entry->cache_cnt)
 );
 
 #endif /* _TRACE_EXT4_H */
-- 
1.7.12.rc0.22.gcdd159b

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ