[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20130208153900.GB10226@quack.suse.cz>
Date: Fri, 8 Feb 2013 16:39:00 +0100
From: Jan Kara <jack@...e.cz>
To: Zheng Liu <gnehzuil.liu@...il.com>
Cc: linux-ext4@...r.kernel.org, Zheng Liu <wenqing.lz@...bao.com>,
Theodore Ts'o <tytso@....edu>, Jan kara <jack@...e.cz>
Subject: Re: [PATCH 02/10 v5] ext4: add physical block and status member
into extent status tree
On Fri 08-02-13 16:43:58, Zheng Liu wrote:
> From: Zheng Liu <wenqing.lz@...bao.com>
>
> This commit adds two members in extent_status structure to let it record
> physical block and extent status. Here es_pblk is used to record both
> of them because physical block only has 48 bits. So extent status could
> be stashed into it so that we can save some memory. Now written,
> unwritten, delayed and hole are defined as status.
>
> Due to new member is added into extent status tree, all interfaces need
> to be adjusted.
The patch looks good. You can add:
Reviewed-by: Jan Kara <jack@...e.cz>
Honza
>
> Signed-off-by: Zheng Liu <wenqing.lz@...bao.com>
> Cc: "Theodore Ts'o" <tytso@....edu>
> Cc: Jan kara <jack@...e.cz>
> ---
> fs/ext4/extents_status.c | 67 +++++++++++++++++++++++++++++++++++++--------
> fs/ext4/extents_status.h | 64 ++++++++++++++++++++++++++++++++++++++++++-
> fs/ext4/inode.c | 3 +-
> include/trace/events/ext4.h | 34 +++++++++++++++--------
> 4 files changed, 142 insertions(+), 26 deletions(-)
>
> diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
> index aa4d346..5093cee 100644
> --- a/fs/ext4/extents_status.c
> +++ b/fs/ext4/extents_status.c
> @@ -179,7 +179,9 @@ static void ext4_es_print_tree(struct inode *inode)
> while (node) {
> struct extent_status *es;
> es = rb_entry(node, struct extent_status, rb_node);
> - printk(KERN_DEBUG " [%u/%u)", es->es_lblk, es->es_len);
> + printk(KERN_DEBUG " [%u/%u) %llu %llx",
> + es->es_lblk, es->es_len,
> + ext4_es_pblock(es), ext4_es_status(es));
> node = rb_next(node);
> }
> printk(KERN_DEBUG "\n");
> @@ -234,7 +236,7 @@ static struct extent_status *__es_tree_search(struct rb_root *root,
> * @es: delayed extent that we found
> *
> * Returns the first block of the next extent after es, otherwise
> - * EXT_MAX_BLOCKS if no delay extent is found.
> + * EXT_MAX_BLOCKS if no extent is found.
> * Delayed extent is returned via @es.
> */
> ext4_lblk_t ext4_es_find_extent(struct inode *inode, struct extent_status *es)
> @@ -249,17 +251,18 @@ ext4_lblk_t ext4_es_find_extent(struct inode *inode, struct extent_status *es)
> read_lock(&EXT4_I(inode)->i_es_lock);
> tree = &EXT4_I(inode)->i_es_tree;
>
> - /* find delay extent in cache firstly */
> + /* find extent in cache firstly */
> + es->es_len = es->es_pblk = 0;
> if (tree->cache_es) {
> es1 = tree->cache_es;
> if (in_range(es->es_lblk, es1->es_lblk, es1->es_len)) {
> - es_debug("%u cached by [%u/%u)\n",
> - es->es_lblk, es1->es_lblk, es1->es_len);
> + es_debug("%u cached by [%u/%u) %llu %llx\n",
> + es->es_lblk, es1->es_lblk, es1->es_len,
> + ext4_es_pblock(es1), ext4_es_status(es1));
> goto out;
> }
> }
>
> - es->es_len = 0;
> es1 = __es_tree_search(&tree->root, es->es_lblk);
>
> out:
> @@ -267,6 +270,7 @@ out:
> tree->cache_es = es1;
> es->es_lblk = es1->es_lblk;
> es->es_len = es1->es_len;
> + es->es_pblk = es1->es_pblk;
> node = rb_next(&es1->rb_node);
> if (node) {
> es1 = rb_entry(node, struct extent_status, rb_node);
> @@ -281,7 +285,7 @@ out:
> }
>
> static struct extent_status *
> -ext4_es_alloc_extent(ext4_lblk_t lblk, ext4_lblk_t len)
> +ext4_es_alloc_extent(ext4_lblk_t lblk, ext4_lblk_t len, ext4_fsblk_t pblk)
> {
> struct extent_status *es;
> es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
> @@ -289,6 +293,7 @@ ext4_es_alloc_extent(ext4_lblk_t lblk, ext4_lblk_t len)
> return NULL;
> es->es_lblk = lblk;
> es->es_len = len;
> + es->es_pblk = pblk;
> return es;
> }
>
> @@ -301,6 +306,8 @@ static void ext4_es_free_extent(struct extent_status *es)
> * Check whether or not two extents can be merged
> * Condition:
> * - logical block number is contiguous
> + * - physical block number is contiguous
> + * - status is equal
> */
> static int ext4_es_can_be_merged(struct extent_status *es1,
> struct extent_status *es2)
> @@ -308,6 +315,13 @@ static int ext4_es_can_be_merged(struct extent_status *es1,
> if (es1->es_lblk + es1->es_len != es2->es_lblk)
> return 0;
>
> + if (ext4_es_status(es1) != ext4_es_status(es2))
> + return 0;
> +
> + if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
> + (ext4_es_pblock(es1) + es1->es_len != ext4_es_pblock(es2)))
> + return 0;
> +
> return 1;
> }
>
> @@ -367,6 +381,10 @@ static int __es_insert_extent(struct ext4_es_tree *tree,
> if (ext4_es_can_be_merged(newes, es)) {
> es->es_lblk = newes->es_lblk;
> es->es_len += newes->es_len;
> + if (ext4_es_is_written(es) ||
> + ext4_es_is_unwritten(es))
> + ext4_es_store_pblock(es,
> + newes->es_pblk);
> es = ext4_es_try_to_merge_left(tree, es);
> goto out;
> }
> @@ -384,7 +402,8 @@ static int __es_insert_extent(struct ext4_es_tree *tree,
> }
> }
>
> - es = ext4_es_alloc_extent(newes->es_lblk, newes->es_len);
> + es = ext4_es_alloc_extent(newes->es_lblk, newes->es_len,
> + newes->es_pblk);
> if (!es)
> return -ENOMEM;
> rb_link_node(&es->rb_node, parent, p);
> @@ -404,21 +423,24 @@ out:
> * Return 0 on success, error code on failure.
> */
> int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
> - ext4_lblk_t len)
> + ext4_lblk_t len, ext4_fsblk_t pblk,
> + unsigned long long status)
> {
> struct ext4_es_tree *tree;
> struct extent_status newes;
> ext4_lblk_t end = lblk + len - 1;
> int err = 0;
>
> - trace_ext4_es_insert_extent(inode, lblk, len);
> - es_debug("add [%u/%u) to extent status tree of inode %lu\n",
> - lblk, len, inode->i_ino);
> + es_debug("add [%u/%u) %llu %llx to extent status tree of inode %lu\n",
> + lblk, len, pblk, status, inode->i_ino);
>
> BUG_ON(end < lblk);
>
> newes.es_lblk = lblk;
> newes.es_len = len;
> + ext4_es_store_pblock(&newes, pblk);
> + ext4_es_store_status(&newes, status);
> + trace_ext4_es_insert_extent(inode, &newes);
>
> write_lock(&EXT4_I(inode)->i_es_lock);
> tree = &EXT4_I(inode)->i_es_tree;
> @@ -442,6 +464,7 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,
> struct extent_status *es;
> struct extent_status orig_es;
> ext4_lblk_t len1, len2;
> + ext4_fsblk_t block;
> int err = 0;
>
> es = __es_tree_search(&tree->root, lblk);
> @@ -455,6 +478,8 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,
>
> orig_es.es_lblk = es->es_lblk;
> orig_es.es_len = es->es_len;
> + orig_es.es_pblk = es->es_pblk;
> +
> len1 = lblk > es->es_lblk ? lblk - es->es_lblk : 0;
> len2 = ext4_es_end(es) > end ? ext4_es_end(es) - end : 0;
> if (len1 > 0)
> @@ -465,6 +490,13 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,
>
> newes.es_lblk = end + 1;
> newes.es_len = len2;
> + if (ext4_es_is_written(&orig_es) ||
> + ext4_es_is_unwritten(&orig_es)) {
> + block = ext4_es_pblock(&orig_es) +
> + orig_es.es_len - len2;
> + ext4_es_store_pblock(&newes, block);
> + }
> + ext4_es_store_status(&newes, ext4_es_status(&orig_es));
> err = __es_insert_extent(tree, &newes);
> if (err) {
> es->es_lblk = orig_es.es_lblk;
> @@ -474,6 +506,11 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,
> } else {
> es->es_lblk = end + 1;
> es->es_len = len2;
> + if (ext4_es_is_written(es) ||
> + ext4_es_is_unwritten(es)) {
> + block = orig_es.es_pblk + orig_es.es_len - len2;
> + ext4_es_store_pblock(es, block);
> + }
> }
> goto out;
> }
> @@ -498,9 +535,15 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,
> }
>
> if (es && es->es_lblk < end + 1) {
> + ext4_lblk_t orig_len = es->es_len;
> +
> len1 = ext4_es_end(es) - end;
> es->es_lblk = end + 1;
> es->es_len = len1;
> + if (ext4_es_is_written(es) || ext4_es_is_unwritten(es)) {
> + block = es->es_pblk + orig_len - len1;
> + ext4_es_store_pblock(es, block);
> + }
> }
>
> out:
> diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
> index 81e9339..2a5d69e 100644
> --- a/fs/ext4/extents_status.h
> +++ b/fs/ext4/extents_status.h
> @@ -20,10 +20,21 @@
> #define es_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
> #endif
>
> +#define EXTENT_STATUS_WRITTEN 0x10000000 /* written extent */
> +#define EXTENT_STATUS_UNWRITTEN 0x20000000 /* unwritten extent */
> +#define EXTENT_STATUS_DELAYED 0x40000000 /* delayed extent */
> +#define EXTENT_STATUS_HOLE 0x80000000 /* hole */
> +
> +#define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \
> + EXTENT_STATUS_UNWRITTEN | \
> + EXTENT_STATUS_DELAYED | \
> + EXTENT_STATUS_HOLE)
> +
> struct extent_status {
> struct rb_node rb_node;
> ext4_lblk_t es_lblk; /* first logical block extent covers */
> ext4_lblk_t es_len; /* length of extent in block */
> + ext4_fsblk_t es_pblk; /* first physical block */
> };
>
> struct ext4_es_tree {
> @@ -36,10 +47,61 @@ extern void ext4_exit_es(void);
> extern void ext4_es_init_tree(struct ext4_es_tree *tree);
>
> extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
> - ext4_lblk_t len);
> + ext4_lblk_t len, ext4_fsblk_t pblk,
> + unsigned long long status);
> extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
> ext4_lblk_t len);
> extern ext4_lblk_t ext4_es_find_extent(struct inode *inode,
> struct extent_status *es);
>
> +static inline int ext4_es_is_written(struct extent_status *es)
> +{
> + return (es->es_pblk & EXTENT_STATUS_WRITTEN);
> +}
> +
> +static inline int ext4_es_is_unwritten(struct extent_status *es)
> +{
> + return (es->es_pblk & EXTENT_STATUS_UNWRITTEN);
> +}
> +
> +static inline int ext4_es_is_delayed(struct extent_status *es)
> +{
> + return (es->es_pblk & EXTENT_STATUS_DELAYED);
> +}
> +
> +static inline int ext4_es_is_hole(struct extent_status *es)
> +{
> + return (es->es_pblk & EXTENT_STATUS_HOLE);
> +}
> +
> +static inline ext4_fsblk_t ext4_es_status(struct extent_status *es)
> +{
> + return (es->es_pblk & EXTENT_STATUS_FLAGS);
> +}
> +
> +static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es)
> +{
> + return (es->es_pblk & ~EXTENT_STATUS_FLAGS);
> +}
> +
> +static inline void ext4_es_store_pblock(struct extent_status *es,
> + ext4_fsblk_t pb)
> +{
> + ext4_fsblk_t block;
> +
> + block = (pb & ~EXTENT_STATUS_FLAGS) |
> + (es->es_pblk & EXTENT_STATUS_FLAGS);
> + es->es_pblk = block;
> +}
> +
> +static inline void ext4_es_store_status(struct extent_status *es,
> + unsigned long long status)
> +{
> + ext4_fsblk_t block;
> +
> + block = (status & EXTENT_STATUS_FLAGS) |
> + (es->es_pblk & ~EXTENT_STATUS_FLAGS);
> + es->es_pblk = block;
> +}
> +
> #endif /* _EXT4_EXTENTS_STATUS_H */
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index cbfe13b..7fb00d8 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -1821,7 +1821,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
> goto out_unlock;
> }
>
> - retval = ext4_es_insert_extent(inode, map->m_lblk, map->m_len);
> + retval = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
> + ~0, EXTENT_STATUS_DELAYED);
> if (retval)
> goto out_unlock;
>
> diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
> index 952628a..ef2f96e 100644
> --- a/include/trace/events/ext4.h
> +++ b/include/trace/events/ext4.h
> @@ -2068,28 +2068,33 @@ TRACE_EVENT(ext4_ext_remove_space_done,
> );
>
> TRACE_EVENT(ext4_es_insert_extent,
> - TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len),
> + TP_PROTO(struct inode *inode, struct extent_status *es),
>
> - TP_ARGS(inode, lblk, len),
> + TP_ARGS(inode, es),
>
> TP_STRUCT__entry(
> - __field( dev_t, dev )
> - __field( ino_t, ino )
> - __field( loff_t, lblk )
> - __field( loff_t, len )
> + __field( dev_t, dev )
> + __field( ino_t, ino )
> + __field( ext4_lblk_t, lblk )
> + __field( ext4_lblk_t, len )
> + __field( ext4_fsblk_t, pblk )
> + __field( unsigned long long, status )
> ),
>
> TP_fast_assign(
> __entry->dev = inode->i_sb->s_dev;
> __entry->ino = inode->i_ino;
> - __entry->lblk = lblk;
> - __entry->len = len;
> + __entry->lblk = es->es_lblk;
> + __entry->len = es->es_len;
> + __entry->pblk = ext4_es_pblock(es);
> + __entry->status = ext4_es_status(es);
> ),
>
> - TP_printk("dev %d,%d ino %lu es [%lld/%lld)",
> + TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %llx",
> MAJOR(__entry->dev), MINOR(__entry->dev),
> (unsigned long) __entry->ino,
> - __entry->lblk, __entry->len)
> + __entry->lblk, __entry->len,
> + __entry->pblk, __entry->status)
> );
>
> TRACE_EVENT(ext4_es_remove_extent,
> @@ -2150,6 +2155,8 @@ TRACE_EVENT(ext4_es_find_extent_exit,
> __field( ino_t, ino )
> __field( ext4_lblk_t, lblk )
> __field( ext4_lblk_t, len )
> + __field( ext4_fsblk_t, pblk )
> + __field( unsigned long long, status )
> __field( ext4_lblk_t, ret )
> ),
>
> @@ -2158,13 +2165,16 @@ TRACE_EVENT(ext4_es_find_extent_exit,
> __entry->ino = inode->i_ino;
> __entry->lblk = es->es_lblk;
> __entry->len = es->es_len;
> + __entry->pblk = ext4_es_pblock(es);
> + __entry->status = ext4_es_status(es);
> __entry->ret = ret;
> ),
>
> - TP_printk("dev %d,%d ino %lu es [%u/%u) ret %u",
> + TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %llx ret %u",
> MAJOR(__entry->dev), MINOR(__entry->dev),
> (unsigned long) __entry->ino,
> - __entry->lblk, __entry->len, __entry->ret)
> + __entry->lblk, __entry->len,
> + __entry->pblk, __entry->status, __entry->ret)
> );
>
> #endif /* _TRACE_EXT4_H */
> --
> 1.7.12.rc2.18.g61b472e
>
--
Jan Kara <jack@...e.cz>
SUSE Labs, CR
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists