[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1407382553-24256-6-git-send-email-wenqing.lz@taobao.com>
Date: Thu, 7 Aug 2014 11:35:52 +0800
From: Zheng Liu <gnehzuil.liu@...il.com>
To: linux-ext4@...r.kernel.org
Cc: "Theodore Ts'o" <tytso@....edu>,
Andreas Dilger <adilger.kernel@...ger.ca>,
Jan Kara <jack@...e.cz>, Zheng Liu <wenqing.lz@...bao.com>
Subject: [PATCH v3 5/6] ext4: use a list to track all reclaimable objects for extent status tree
From: Zheng Liu <wenqing.lz@...bao.com>
Currently the shrinker needs to take a long time to reclaim some objects
from a extent status tree because there are a lot of objects that are
delayed. These objects could not be reclaimed because ext4 uses them to
finish seeking data/hole, finding delayed range and other works. If a
rb-tree has a large number of delayed objects, shrinker should scan more
objects and the latency will be high. This commit uses a list to track
all reclaimble objects in order to reduce the latency when the shrinker
tries to reclaim some objects from a extent status tree.
Cc: "Theodore Ts'o" <tytso@....edu>
Cc: Andreas Dilger <adilger.kernel@...ger.ca>
Cc: Jan Kara <jack@...e.cz>
Signed-off-by: Zheng Liu <wenqing.lz@...bao.com>
---
fs/ext4/extents_status.c | 74 ++++++++++++++++++++++++++--------------------
fs/ext4/extents_status.h | 2 ++
2 files changed, 44 insertions(+), 32 deletions(-)
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 8d76fd5..2f81d1e 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -148,7 +148,8 @@ static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t end);
static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
- int nr_to_scan);
+ struct list_head *freeable,
+ int *nr_to_scan);
static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
struct ext4_inode_info *locked_ei);
@@ -171,6 +172,7 @@ void ext4_exit_es(void)
void ext4_es_init_tree(struct ext4_es_tree *tree)
{
tree->root = RB_ROOT;
+ INIT_LIST_HEAD(&tree->list);
tree->cache_es = NULL;
}
@@ -302,6 +304,7 @@ static struct extent_status *
ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
ext4_fsblk_t pblk)
{
+ struct ext4_inode_info *ei = EXT4_I(inode);
struct extent_status *es;
es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
if (es == NULL)
@@ -314,12 +317,13 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
* We don't count delayed extent because we never try to reclaim them
*/
if (!ext4_es_is_delayed(es)) {
- EXT4_I(inode)->i_es_shk_nr++;
+ list_add_tail(&es->list, &ei->i_es_tree.list);
+ ei->i_es_shk_nr++;
percpu_counter_inc(&EXT4_SB(inode->i_sb)->
s_es_stats.es_stats_shk_cnt);
}
- EXT4_I(inode)->i_es_all_nr++;
+ ei->i_es_all_nr++;
percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
return es;
@@ -327,13 +331,15 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
{
- EXT4_I(inode)->i_es_all_nr--;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+
+ ei->i_es_all_nr--;
percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
/* Decrease the shrink counter when this es is not delayed */
if (!ext4_es_is_delayed(es)) {
- BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0);
- EXT4_I(inode)->i_es_shk_nr--;
+ list_del(&es->list);
+ BUG_ON(ei->i_es_shk_nr-- == 0);
percpu_counter_dec(&EXT4_SB(inode->i_sb)->
s_es_stats.es_stats_shk_cnt);
}
@@ -958,11 +964,24 @@ void ext4_es_list_del(struct inode *inode)
spin_unlock(&sbi->s_es_lock);
}
+static void dispose_list(struct inode *inode, struct list_head *head)
+{
+ while (!list_empty(head)) {
+ struct extent_status *es;
+
+ es = list_first_entry(head, struct extent_status, list);
+ list_del_init(&es->list);
+
+ ext4_es_free_extent(inode, es);
+ }
+}
+
static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
struct ext4_inode_info *locked_ei)
{
struct ext4_inode_info *ei;
struct ext4_es_stats *es_stats;
+ LIST_HEAD(freeable);
ktime_t start_time;
u64 scan_time;
int nr_to_walk;
@@ -976,8 +995,6 @@ retry:
spin_lock(&sbi->s_es_lock);
nr_to_walk = sbi->s_es_nr_inode;
while (!list_empty(&sbi->s_es_list) && nr_to_walk-- > 0) {
- int shrunk;
-
ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info,
i_es_list);
@@ -1009,11 +1026,10 @@ retry:
continue;
}
- shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan);
+ nr_shrunk += __es_try_to_reclaim_extents(ei, &freeable,
+ &nr_to_scan);
write_unlock(&ei->i_es_lock);
-
- nr_shrunk += shrunk;
- nr_to_scan -= shrunk;
+ dispose_list(&ei->vfs_inode, &freeable);
if (nr_to_scan == 0)
goto out;
@@ -1030,8 +1046,11 @@ retry:
goto retry;
}
- if (locked_ei && nr_shrunk == 0)
- nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan);
+ if (locked_ei && nr_shrunk == 0) {
+ nr_shrunk = __es_try_to_reclaim_extents(locked_ei, &freeable,
+ &nr_to_scan);
+ dispose_list(&locked_ei->vfs_inode, &freeable);
+ }
out:
scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
@@ -1227,12 +1246,12 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
}
static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
- int nr_to_scan)
+ struct list_head *freeable,
+ int *nr_to_scan)
{
struct inode *inode = &ei->vfs_inode;
struct ext4_es_tree *tree = &ei->i_es_tree;
- struct rb_node *node;
- struct extent_status *es;
+ struct extent_status *es, *tmp;
unsigned long nr_shrunk = 0;
static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
@@ -1244,21 +1263,12 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
__ratelimit(&_rs))
ext4_warning(inode->i_sb, "forced shrink of precached extents");
- node = rb_first(&tree->root);
- while (node != NULL) {
- es = rb_entry(node, struct extent_status, rb_node);
- node = rb_next(&es->rb_node);
- /*
- * We can't reclaim delayed extent from status tree because
- * fiemap, bigallic, and seek_data/hole need to use it.
- */
- if (!ext4_es_is_delayed(es)) {
- rb_erase(&es->rb_node, &tree->root);
- ext4_es_free_extent(inode, es);
- nr_shrunk++;
- if (--nr_to_scan == 0)
- break;
- }
+ list_for_each_entry_safe(es, tmp, &tree->list, list) {
+ rb_erase(&es->rb_node, &tree->root);
+ list_move_tail(&es->list, freeable);
+ nr_shrunk++;
+ if (--*nr_to_scan == 0)
+ break;
}
tree->cache_es = NULL;
return nr_shrunk;
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index 0e6a33e..a45c7fe 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -54,6 +54,7 @@ struct ext4_extent;
struct extent_status {
struct rb_node rb_node;
+ struct list_head list;
ext4_lblk_t es_lblk; /* first logical block extent covers */
ext4_lblk_t es_len; /* length of extent in block */
ext4_fsblk_t es_pblk; /* first physical block */
@@ -61,6 +62,7 @@ struct extent_status {
struct ext4_es_tree {
struct rb_root root;
+ struct list_head list;
struct extent_status *cache_es; /* recently accessed extent */
};
--
1.7.9.7
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists