[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20130212123142.GC19583@quack.suse.cz>
Date: Tue, 12 Feb 2013 13:31:42 +0100
From: Jan Kara <jack@...e.cz>
To: Zheng Liu <gnehzuil.liu@...il.com>
Cc: linux-ext4@...r.kernel.org, Zheng Liu <wenqing.lz@...bao.com>,
Theodore Ts'o <tytso@....edu>, Jan kara <jack@...e.cz>
Subject: Re: [PATCH 05/10 v5] ext4: lookup block mapping in extent status
tree
On Fri 08-02-13 16:44:01, Zheng Liu wrote:
> From: Zheng Liu <wenqing.lz@...bao.com>
>
> After tracking all extent status, we already have a extent cache in
> memory. Every time we want to lookup a block mapping, we can first
> try to lookup it in extent status tree to avoid a potential disk I/O.
>
> A new function called ext4_es_lookup_extent is defined to finish this
> work. When we try to lookup a block mapping, we always call
> ext4_map_blocks and/or ext4_da_map_blocks. So in these functions we
> first try to lookup a block mapping in extent status tree.
>
> A new flag EXT4_GET_BLOCKS_NO_PUT_HOLE is used in ext4_da_map_blocks
> in order not to put a hole into extent status tree because this hole
> will be converted to delayed extent in the tree immediately.
It looks somewhat inconsistent that you put hole into the extent tree in
ext4_ext_map_blocks() but all other extent types are handled in
ext4_map_blocks() or ext4_da_map_blocks(). Can we put the handling in one
place?
Otherwise the patch looks OK.
Honza
> Signed-off-by: Zheng Liu <wenqing.lz@...bao.com>
> Cc: "Theodore Ts'o" <tytso@....edu>
> Cc: Jan kara <jack@...e.cz>
> ---
> fs/ext4/ext4.h | 2 ++
> fs/ext4/extents.c | 7 ++++-
> fs/ext4/extents_status.c | 59 +++++++++++++++++++++++++++++++++++++++++
> fs/ext4/extents_status.h | 1 +
> fs/ext4/inode.c | 64 +++++++++++++++++++++++++++++++++++++++++++--
> include/trace/events/ext4.h | 56 +++++++++++++++++++++++++++++++++++++++
> 6 files changed, 186 insertions(+), 3 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 8462eb3..ad885b5 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -582,6 +582,8 @@ enum {
> #define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
> /* Do not take i_data_sem locking in ext4_map_blocks */
> #define EXT4_GET_BLOCKS_NO_LOCK 0x0100
> + /* Do not put hole in extent cache */
> +#define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200
>
> /*
> * Flags used by ext4_free_blocks
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 4b065ff..1be8955 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -2154,6 +2154,8 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
> block,
> le32_to_cpu(ex->ee_block),
> ext4_ext_get_actual_len(ex));
> + ext4_es_insert_extent(inode, lblock, len, ~0,
> + EXTENT_STATUS_HOLE);
> } else if (block >= le32_to_cpu(ex->ee_block)
> + ext4_ext_get_actual_len(ex)) {
> ext4_lblk_t next;
> @@ -2167,6 +2169,8 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
> block);
> BUG_ON(next == lblock);
> len = next - lblock;
> + ext4_es_insert_extent(inode, lblock, len, ~0,
> + EXTENT_STATUS_HOLE);
> } else {
> lblock = len = 0;
> BUG();
> @@ -4006,7 +4010,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
> * put just found gap into cache to speed up
> * subsequent requests
> */
> - ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
> + if ((flags & EXT4_GET_BLOCKS_NO_PUT_HOLE) == 0)
> + ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
> goto out2;
> }
>
> diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
> index 71cb75a..ca7dc9f 100644
> --- a/fs/ext4/extents_status.c
> +++ b/fs/ext4/extents_status.c
> @@ -468,6 +468,65 @@ error:
> return err;
> }
>
> +/*
> + * ext4_es_lookup_extent() looks up an extent in extent status tree.
> + *
> + * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks.
> + *
> + * Return: 1 on found, 0 on not
> + */
> +int ext4_es_lookup_extent(struct inode *inode, struct extent_status *es)
> +{
> + struct ext4_es_tree *tree;
> + struct extent_status *es1 = NULL;
> + struct rb_node *node;
> + int found = 0;
> +
> + trace_ext4_es_lookup_extent_enter(inode, es->es_lblk);
> + es_debug("lookup extent in block %u\n", es->es_lblk);
> +
> + tree = &EXT4_I(inode)->i_es_tree;
> + read_lock(&EXT4_I(inode)->i_es_lock);
> +
> + /* find extent in cache firstly */
> + es->es_len = es->es_pblk = 0;
> + if (tree->cache_es) {
> + es1 = tree->cache_es;
> + if (in_range(es->es_lblk, es1->es_lblk, es1->es_len)) {
> + es_debug("%u cached by [%u/%u)\n",
> + es->es_lblk, es1->es_lblk, es1->es_len);
> + found = 1;
> + goto out;
> + }
> + }
> +
> + node = tree->root.rb_node;
> + while (node) {
> + es1 = rb_entry(node, struct extent_status, rb_node);
> + if (es->es_lblk < es1->es_lblk)
> + node = node->rb_left;
> + else if (es->es_lblk > ext4_es_end(es1))
> + node = node->rb_right;
> + else {
> + found = 1;
> + break;
> + }
> + }
> +
> +out:
> + if (found) {
> + BUG_ON(!es1);
> + es->es_lblk = es1->es_lblk;
> + es->es_len = es1->es_len;
> + es->es_pblk = es1->es_pblk;
> + }
> +
> + read_unlock(&EXT4_I(inode)->i_es_lock);
> +
> + trace_ext4_es_lookup_extent_exit(inode, es, found);
> + return found;
> +}
> +
> static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,
> ext4_lblk_t end)
> {
> diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
> index b5788eb..effe78c 100644
> --- a/fs/ext4/extents_status.h
> +++ b/fs/ext4/extents_status.h
> @@ -53,6 +53,7 @@ extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
> ext4_lblk_t len);
> extern ext4_lblk_t ext4_es_find_delayed_extent(struct inode *inode,
> struct extent_status *es);
> +extern int ext4_es_lookup_extent(struct inode *inode, struct extent_status *es);
>
> static inline int ext4_es_is_written(struct extent_status *es)
> {
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 16454fc..670779a 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -508,12 +508,34 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
> int ext4_map_blocks(handle_t *handle, struct inode *inode,
> struct ext4_map_blocks *map, int flags)
> {
> + struct extent_status es;
> int retval;
>
> map->m_flags = 0;
> ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
> "logical block %lu\n", inode->i_ino, flags, map->m_len,
> (unsigned long) map->m_lblk);
> +
> + /* Lookup extent status tree firstly */
> + es.es_lblk = map->m_lblk;
> + if (ext4_es_lookup_extent(inode, &es)) {
> + if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
> + map->m_pblk = ext4_es_pblock(&es) +
> + map->m_lblk - es.es_lblk;
> + map->m_flags |= ext4_es_is_written(&es) ?
> + EXT4_MAP_MAPPED : EXT4_MAP_UNWRITTEN;
> + retval = es.es_len - (map->m_lblk - es.es_lblk);
> + if (retval > map->m_len)
> + retval = map->m_len;
> + map->m_len = retval;
> + } else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) {
> + retval = 0;
> + } else {
> + BUG_ON(1);
> + }
> + goto found;
> + }
> +
> /*
> * Try to see if we can get the block without requesting a new
> * file system block.
> @@ -541,6 +563,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
> if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
> up_read((&EXT4_I(inode)->i_data_sem));
>
> +found:
> if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
> int ret = check_block_validity(inode, map);
> if (ret != 0)
> @@ -1772,6 +1795,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
> struct ext4_map_blocks *map,
> struct buffer_head *bh)
> {
> + struct extent_status es;
> int retval;
> sector_t invalid_block = ~((sector_t) 0xffff);
>
> @@ -1782,6 +1806,39 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
> ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"
> "logical block %lu\n", inode->i_ino, map->m_len,
> (unsigned long) map->m_lblk);
> +
> + /* Lookup extent status tree firstly */
> + es.es_lblk = iblock;
> + if (ext4_es_lookup_extent(inode, &es)) {
> +
> + if (ext4_es_is_hole(&es)) {
> + retval = 0;
> + down_read((&EXT4_I(inode)->i_data_sem));
> + goto add_delayed;
> + }
> +
> + if (ext4_es_is_delayed(&es)) {
> + map_bh(bh, inode->i_sb, invalid_block);
> + set_buffer_new(bh);
> + set_buffer_delay(bh);
> + return 0;
> + }
> +
> + map->m_pblk = ext4_es_pblock(&es) + iblock - es.es_lblk;
> + retval = es.es_len - (iblock - es.es_lblk);
> + if (retval > map->m_len)
> + retval = map->m_len;
> + map->m_len = retval;
> + if (ext4_es_is_written(&es))
> + map->m_flags |= EXT4_MAP_MAPPED;
> + else if (ext4_es_is_unwritten(&es))
> + map->m_flags |= EXT4_MAP_UNWRITTEN;
> + else
> + BUG_ON(1);
> +
> + return retval;
> + }
> +
> /*
> * Try to see if we can get the block without requesting a new
> * file system block.
> @@ -1800,10 +1857,13 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
> map->m_flags |= EXT4_MAP_FROM_CLUSTER;
> retval = 0;
> } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
> - retval = ext4_ext_map_blocks(NULL, inode, map, 0);
> + retval = ext4_ext_map_blocks(NULL, inode, map,
> + EXT4_GET_BLOCKS_NO_PUT_HOLE);
> else
> - retval = ext4_ind_map_blocks(NULL, inode, map, 0);
> + retval = ext4_ind_map_blocks(NULL, inode, map,
> + EXT4_GET_BLOCKS_NO_PUT_HOLE);
>
> +add_delayed:
> if (retval == 0) {
> int ret;
> /*
> diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
> index d278ced..822780a 100644
> --- a/include/trace/events/ext4.h
> +++ b/include/trace/events/ext4.h
> @@ -2177,6 +2177,62 @@ TRACE_EVENT(ext4_es_find_delayed_extent_exit,
> __entry->pblk, __entry->status, __entry->ret)
> );
>
> +TRACE_EVENT(ext4_es_lookup_extent_enter,
> + TP_PROTO(struct inode *inode, ext4_lblk_t lblk),
> +
> + TP_ARGS(inode, lblk),
> +
> + TP_STRUCT__entry(
> + __field( dev_t, dev )
> + __field( ino_t, ino )
> + __field( ext4_lblk_t, lblk )
> + ),
> +
> + TP_fast_assign(
> + __entry->dev = inode->i_sb->s_dev;
> + __entry->ino = inode->i_ino;
> + __entry->lblk = lblk;
> + ),
> +
> + TP_printk("dev %d,%d ino %lu lblk %u",
> + MAJOR(__entry->dev), MINOR(__entry->dev),
> + (unsigned long) __entry->ino, __entry->lblk)
> +);
> +
> +TRACE_EVENT(ext4_es_lookup_extent_exit,
> + TP_PROTO(struct inode *inode, struct extent_status *es,
> + int found),
> +
> + TP_ARGS(inode, es, found),
> +
> + TP_STRUCT__entry(
> + __field( dev_t, dev )
> + __field( ino_t, ino )
> + __field( ext4_lblk_t, lblk )
> + __field( ext4_lblk_t, len )
> + __field( ext4_fsblk_t, pblk )
> + __field( unsigned long long, status )
> + __field( int, found )
> + ),
> +
> + TP_fast_assign(
> + __entry->dev = inode->i_sb->s_dev;
> + __entry->ino = inode->i_ino;
> + __entry->lblk = es->es_lblk;
> + __entry->len = es->es_len;
> + __entry->pblk = ext4_es_pblock(es);
> + __entry->status = ext4_es_status(es);
> + __entry->found = found;
> + ),
> +
> + TP_printk("dev %d,%d ino %lu found %d [%u/%u) %llu %llx",
> + MAJOR(__entry->dev), MINOR(__entry->dev),
> + (unsigned long) __entry->ino, __entry->found,
> + __entry->lblk, __entry->len,
> + __entry->found ? __entry->pblk : 0,
> + __entry->found ? __entry->status : 0)
> +);
> +
> #endif /* _TRACE_EXT4_H */
>
> /* This part must be outside protection */
> --
> 1.7.12.rc2.18.g61b472e
>
--
Jan Kara <jack@...e.cz>
SUSE Labs, CR
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists