[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1360313046-9876-6-git-send-email-wenqing.lz@taobao.com>
Date: Fri, 8 Feb 2013 16:44:01 +0800
From: Zheng Liu <gnehzuil.liu@...il.com>
To: linux-ext4@...r.kernel.org
Cc: Zheng Liu <wenqing.lz@...bao.com>, "Theodore Ts'o" <tytso@....edu>,
Jan kara <jack@...e.cz>
Subject: [PATCH 05/10 v5] ext4: lookup block mapping in extent status tree
From: Zheng Liu <wenqing.lz@...bao.com>
After tracking all extent status, we already have a extent cache in
memory. Every time we want to lookup a block mapping, we can first
try to lookup it in extent status tree to avoid a potential disk I/O.
A new function called ext4_es_lookup_extent is defined to finish this
work. When we try to lookup a block mapping, we always call
ext4_map_blocks and/or ext4_da_map_blocks. So in these functions we
first try to lookup a block mapping in extent status tree.
A new flag EXT4_GET_BLOCKS_NO_PUT_HOLE is used in ext4_da_map_blocks
in order not to put a hole into extent status tree because this hole
will be converted to delayed extent in the tree immediately.
Signed-off-by: Zheng Liu <wenqing.lz@...bao.com>
Cc: "Theodore Ts'o" <tytso@....edu>
Cc: Jan kara <jack@...e.cz>
---
fs/ext4/ext4.h | 2 ++
fs/ext4/extents.c | 7 ++++-
fs/ext4/extents_status.c | 59 +++++++++++++++++++++++++++++++++++++++++
fs/ext4/extents_status.h | 1 +
fs/ext4/inode.c | 64 +++++++++++++++++++++++++++++++++++++++++++--
include/trace/events/ext4.h | 56 +++++++++++++++++++++++++++++++++++++++
6 files changed, 186 insertions(+), 3 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8462eb3..ad885b5 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -582,6 +582,8 @@ enum {
#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
/* Do not take i_data_sem locking in ext4_map_blocks */
#define EXT4_GET_BLOCKS_NO_LOCK 0x0100
+ /* Do not put hole in extent cache */
+#define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200
/*
* Flags used by ext4_free_blocks
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4b065ff..1be8955 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2154,6 +2154,8 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
block,
le32_to_cpu(ex->ee_block),
ext4_ext_get_actual_len(ex));
+ ext4_es_insert_extent(inode, lblock, len, ~0,
+ EXTENT_STATUS_HOLE);
} else if (block >= le32_to_cpu(ex->ee_block)
+ ext4_ext_get_actual_len(ex)) {
ext4_lblk_t next;
@@ -2167,6 +2169,8 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
block);
BUG_ON(next == lblock);
len = next - lblock;
+ ext4_es_insert_extent(inode, lblock, len, ~0,
+ EXTENT_STATUS_HOLE);
} else {
lblock = len = 0;
BUG();
@@ -4006,7 +4010,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* put just found gap into cache to speed up
* subsequent requests
*/
- ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
+ if ((flags & EXT4_GET_BLOCKS_NO_PUT_HOLE) == 0)
+ ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
goto out2;
}
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 71cb75a..ca7dc9f 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -468,6 +468,65 @@ error:
return err;
}
+/*
+ * ext4_es_lookup_extent() looks up an extent in extent status tree.
+ *
+ * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks.
+ *
+ * Return: 1 on found, 0 on not
+ */
+int ext4_es_lookup_extent(struct inode *inode, struct extent_status *es)
+{
+ struct ext4_es_tree *tree;
+ struct extent_status *es1 = NULL;
+ struct rb_node *node;
+ int found = 0;
+
+ trace_ext4_es_lookup_extent_enter(inode, es->es_lblk);
+ es_debug("lookup extent in block %u\n", es->es_lblk);
+
+ tree = &EXT4_I(inode)->i_es_tree;
+ read_lock(&EXT4_I(inode)->i_es_lock);
+
+ /* find extent in cache firstly */
+ es->es_len = es->es_pblk = 0;
+ if (tree->cache_es) {
+ es1 = tree->cache_es;
+ if (in_range(es->es_lblk, es1->es_lblk, es1->es_len)) {
+ es_debug("%u cached by [%u/%u)\n",
+ es->es_lblk, es1->es_lblk, es1->es_len);
+ found = 1;
+ goto out;
+ }
+ }
+
+ node = tree->root.rb_node;
+ while (node) {
+ es1 = rb_entry(node, struct extent_status, rb_node);
+ if (es->es_lblk < es1->es_lblk)
+ node = node->rb_left;
+ else if (es->es_lblk > ext4_es_end(es1))
+ node = node->rb_right;
+ else {
+ found = 1;
+ break;
+ }
+ }
+
+out:
+ if (found) {
+ BUG_ON(!es1);
+ es->es_lblk = es1->es_lblk;
+ es->es_len = es1->es_len;
+ es->es_pblk = es1->es_pblk;
+ }
+
+ read_unlock(&EXT4_I(inode)->i_es_lock);
+
+ trace_ext4_es_lookup_extent_exit(inode, es, found);
+ return found;
+}
+
static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,
ext4_lblk_t end)
{
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index b5788eb..effe78c 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -53,6 +53,7 @@ extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len);
extern ext4_lblk_t ext4_es_find_delayed_extent(struct inode *inode,
struct extent_status *es);
+extern int ext4_es_lookup_extent(struct inode *inode, struct extent_status *es);
static inline int ext4_es_is_written(struct extent_status *es)
{
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 16454fc..670779a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -508,12 +508,34 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
int ext4_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags)
{
+ struct extent_status es;
int retval;
map->m_flags = 0;
ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
"logical block %lu\n", inode->i_ino, flags, map->m_len,
(unsigned long) map->m_lblk);
+
+ /* Lookup extent status tree firstly */
+ es.es_lblk = map->m_lblk;
+ if (ext4_es_lookup_extent(inode, &es)) {
+ if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
+ map->m_pblk = ext4_es_pblock(&es) +
+ map->m_lblk - es.es_lblk;
+ map->m_flags |= ext4_es_is_written(&es) ?
+ EXT4_MAP_MAPPED : EXT4_MAP_UNWRITTEN;
+ retval = es.es_len - (map->m_lblk - es.es_lblk);
+ if (retval > map->m_len)
+ retval = map->m_len;
+ map->m_len = retval;
+ } else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) {
+ retval = 0;
+ } else {
+ BUG_ON(1);
+ }
+ goto found;
+ }
+
/*
* Try to see if we can get the block without requesting a new
* file system block.
@@ -541,6 +563,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
up_read((&EXT4_I(inode)->i_data_sem));
+found:
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
int ret = check_block_validity(inode, map);
if (ret != 0)
@@ -1772,6 +1795,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
struct ext4_map_blocks *map,
struct buffer_head *bh)
{
+ struct extent_status es;
int retval;
sector_t invalid_block = ~((sector_t) 0xffff);
@@ -1782,6 +1806,39 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"
"logical block %lu\n", inode->i_ino, map->m_len,
(unsigned long) map->m_lblk);
+
+ /* Lookup extent status tree firstly */
+ es.es_lblk = iblock;
+ if (ext4_es_lookup_extent(inode, &es)) {
+
+ if (ext4_es_is_hole(&es)) {
+ retval = 0;
+ down_read((&EXT4_I(inode)->i_data_sem));
+ goto add_delayed;
+ }
+
+ if (ext4_es_is_delayed(&es)) {
+ map_bh(bh, inode->i_sb, invalid_block);
+ set_buffer_new(bh);
+ set_buffer_delay(bh);
+ return 0;
+ }
+
+ map->m_pblk = ext4_es_pblock(&es) + iblock - es.es_lblk;
+ retval = es.es_len - (iblock - es.es_lblk);
+ if (retval > map->m_len)
+ retval = map->m_len;
+ map->m_len = retval;
+ if (ext4_es_is_written(&es))
+ map->m_flags |= EXT4_MAP_MAPPED;
+ else if (ext4_es_is_unwritten(&es))
+ map->m_flags |= EXT4_MAP_UNWRITTEN;
+ else
+ BUG_ON(1);
+
+ return retval;
+ }
+
/*
* Try to see if we can get the block without requesting a new
* file system block.
@@ -1800,10 +1857,13 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
map->m_flags |= EXT4_MAP_FROM_CLUSTER;
retval = 0;
} else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
- retval = ext4_ext_map_blocks(NULL, inode, map, 0);
+ retval = ext4_ext_map_blocks(NULL, inode, map,
+ EXT4_GET_BLOCKS_NO_PUT_HOLE);
else
- retval = ext4_ind_map_blocks(NULL, inode, map, 0);
+ retval = ext4_ind_map_blocks(NULL, inode, map,
+ EXT4_GET_BLOCKS_NO_PUT_HOLE);
+add_delayed:
if (retval == 0) {
int ret;
/*
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index d278ced..822780a 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2177,6 +2177,62 @@ TRACE_EVENT(ext4_es_find_delayed_extent_exit,
__entry->pblk, __entry->status, __entry->ret)
);
+TRACE_EVENT(ext4_es_lookup_extent_enter,
+ TP_PROTO(struct inode *inode, ext4_lblk_t lblk),
+
+ TP_ARGS(inode, lblk),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( ino_t, ino )
+ __field( ext4_lblk_t, lblk )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->lblk = lblk;
+ ),
+
+ TP_printk("dev %d,%d ino %lu lblk %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino, __entry->lblk)
+);
+
+TRACE_EVENT(ext4_es_lookup_extent_exit,
+ TP_PROTO(struct inode *inode, struct extent_status *es,
+ int found),
+
+ TP_ARGS(inode, es, found),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( ino_t, ino )
+ __field( ext4_lblk_t, lblk )
+ __field( ext4_lblk_t, len )
+ __field( ext4_fsblk_t, pblk )
+ __field( unsigned long long, status )
+ __field( int, found )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->lblk = es->es_lblk;
+ __entry->len = es->es_len;
+ __entry->pblk = ext4_es_pblock(es);
+ __entry->status = ext4_es_status(es);
+ __entry->found = found;
+ ),
+
+ TP_printk("dev %d,%d ino %lu found %d [%u/%u) %llu %llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino, __entry->found,
+ __entry->lblk, __entry->len,
+ __entry->found ? __entry->pblk : 0,
+ __entry->found ? __entry->status : 0)
+);
+
#endif /* _TRACE_EXT4_H */
/* This part must be outside protection */
--
1.7.12.rc2.18.g61b472e
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists