lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20130208153900.GB10226@quack.suse.cz>
Date:	Fri, 8 Feb 2013 16:39:00 +0100
From:	Jan Kara <jack@...e.cz>
To:	Zheng Liu <gnehzuil.liu@...il.com>
Cc:	linux-ext4@...r.kernel.org, Zheng Liu <wenqing.lz@...bao.com>,
	Theodore Ts'o <tytso@....edu>, Jan kara <jack@...e.cz>
Subject: Re: [PATCH 02/10 v5] ext4: add physical block and status member
 into extent status tree

On Fri 08-02-13 16:43:58, Zheng Liu wrote:
> From: Zheng Liu <wenqing.lz@...bao.com>
> 
> This commit adds two members in extent_status structure to let it record
> physical block and extent status.  Here es_pblk is used to record both
> of them because physical block only has 48 bits.  So extent status could
> be stashed into it so that we can save some memory.  Now written,
> unwritten, delayed and hole are defined as status.
> 
> Due to new member is added into extent status tree, all interfaces need
> to be adjusted.
  The patch looks good. You can add:
Reviewed-by: Jan Kara <jack@...e.cz>

								Honza
> 
> Signed-off-by: Zheng Liu <wenqing.lz@...bao.com>
> Cc: "Theodore Ts'o" <tytso@....edu>
> Cc: Jan kara <jack@...e.cz>
> ---
>  fs/ext4/extents_status.c    | 67 +++++++++++++++++++++++++++++++++++++--------
>  fs/ext4/extents_status.h    | 64 ++++++++++++++++++++++++++++++++++++++++++-
>  fs/ext4/inode.c             |  3 +-
>  include/trace/events/ext4.h | 34 +++++++++++++++--------
>  4 files changed, 142 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
> index aa4d346..5093cee 100644
> --- a/fs/ext4/extents_status.c
> +++ b/fs/ext4/extents_status.c
> @@ -179,7 +179,9 @@ static void ext4_es_print_tree(struct inode *inode)
>  	while (node) {
>  		struct extent_status *es;
>  		es = rb_entry(node, struct extent_status, rb_node);
> -		printk(KERN_DEBUG " [%u/%u)", es->es_lblk, es->es_len);
> +		printk(KERN_DEBUG " [%u/%u) %llu %llx",
> +		       es->es_lblk, es->es_len,
> +		       ext4_es_pblock(es), ext4_es_status(es));
>  		node = rb_next(node);
>  	}
>  	printk(KERN_DEBUG "\n");
> @@ -234,7 +236,7 @@ static struct extent_status *__es_tree_search(struct rb_root *root,
>   * @es: delayed extent that we found
>   *
>   * Returns the first block of the next extent after es, otherwise
> - * EXT_MAX_BLOCKS if no delay extent is found.
> + * EXT_MAX_BLOCKS if no extent is found.
>   * Delayed extent is returned via @es.
>   */
>  ext4_lblk_t ext4_es_find_extent(struct inode *inode, struct extent_status *es)
> @@ -249,17 +251,18 @@ ext4_lblk_t ext4_es_find_extent(struct inode *inode, struct extent_status *es)
>  	read_lock(&EXT4_I(inode)->i_es_lock);
>  	tree = &EXT4_I(inode)->i_es_tree;
>  
> -	/* find delay extent in cache firstly */
> +	/* find extent in cache firstly */
> +	es->es_len = es->es_pblk = 0;
>  	if (tree->cache_es) {
>  		es1 = tree->cache_es;
>  		if (in_range(es->es_lblk, es1->es_lblk, es1->es_len)) {
> -			es_debug("%u cached by [%u/%u)\n",
> -				 es->es_lblk, es1->es_lblk, es1->es_len);
> +			es_debug("%u cached by [%u/%u) %llu %llx\n",
> +				 es->es_lblk, es1->es_lblk, es1->es_len,
> +				 ext4_es_pblock(es1), ext4_es_status(es1));
>  			goto out;
>  		}
>  	}
>  
> -	es->es_len = 0;
>  	es1 = __es_tree_search(&tree->root, es->es_lblk);
>  
>  out:
> @@ -267,6 +270,7 @@ out:
>  		tree->cache_es = es1;
>  		es->es_lblk = es1->es_lblk;
>  		es->es_len = es1->es_len;
> +		es->es_pblk = es1->es_pblk;
>  		node = rb_next(&es1->rb_node);
>  		if (node) {
>  			es1 = rb_entry(node, struct extent_status, rb_node);
> @@ -281,7 +285,7 @@ out:
>  }
>  
>  static struct extent_status *
> -ext4_es_alloc_extent(ext4_lblk_t lblk, ext4_lblk_t len)
> +ext4_es_alloc_extent(ext4_lblk_t lblk, ext4_lblk_t len, ext4_fsblk_t pblk)
>  {
>  	struct extent_status *es;
>  	es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
> @@ -289,6 +293,7 @@ ext4_es_alloc_extent(ext4_lblk_t lblk, ext4_lblk_t len)
>  		return NULL;
>  	es->es_lblk = lblk;
>  	es->es_len = len;
> +	es->es_pblk = pblk;
>  	return es;
>  }
>  
> @@ -301,6 +306,8 @@ static void ext4_es_free_extent(struct extent_status *es)
>   * Check whether or not two extents can be merged
>   * Condition:
>   *  - logical block number is contiguous
> + *  - physical block number is contiguous
> + *  - status is equal
>   */
>  static int ext4_es_can_be_merged(struct extent_status *es1,
>  				 struct extent_status *es2)
> @@ -308,6 +315,13 @@ static int ext4_es_can_be_merged(struct extent_status *es1,
>  	if (es1->es_lblk + es1->es_len != es2->es_lblk)
>  		return 0;
>  
> +	if (ext4_es_status(es1) != ext4_es_status(es2))
> +		return 0;
> +
> +	if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
> +	    (ext4_es_pblock(es1) + es1->es_len != ext4_es_pblock(es2)))
> +		return 0;
> +
>  	return 1;
>  }
>  
> @@ -367,6 +381,10 @@ static int __es_insert_extent(struct ext4_es_tree *tree,
>  			if (ext4_es_can_be_merged(newes, es)) {
>  				es->es_lblk = newes->es_lblk;
>  				es->es_len += newes->es_len;
> +				if (ext4_es_is_written(es) ||
> +				    ext4_es_is_unwritten(es))
> +					ext4_es_store_pblock(es,
> +							     newes->es_pblk);
>  				es = ext4_es_try_to_merge_left(tree, es);
>  				goto out;
>  			}
> @@ -384,7 +402,8 @@ static int __es_insert_extent(struct ext4_es_tree *tree,
>  		}
>  	}
>  
> -	es = ext4_es_alloc_extent(newes->es_lblk, newes->es_len);
> +	es = ext4_es_alloc_extent(newes->es_lblk, newes->es_len,
> +				  newes->es_pblk);
>  	if (!es)
>  		return -ENOMEM;
>  	rb_link_node(&es->rb_node, parent, p);
> @@ -404,21 +423,24 @@ out:
>   * Return 0 on success, error code on failure.
>   */
>  int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
> -			  ext4_lblk_t len)
> +			  ext4_lblk_t len, ext4_fsblk_t pblk,
> +			  unsigned long long status)
>  {
>  	struct ext4_es_tree *tree;
>  	struct extent_status newes;
>  	ext4_lblk_t end = lblk + len - 1;
>  	int err = 0;
>  
> -	trace_ext4_es_insert_extent(inode, lblk, len);
> -	es_debug("add [%u/%u) to extent status tree of inode %lu\n",
> -		 lblk, len, inode->i_ino);
> +	es_debug("add [%u/%u) %llu %llx to extent status tree of inode %lu\n",
> +		 lblk, len, pblk, status, inode->i_ino);
>  
>  	BUG_ON(end < lblk);
>  
>  	newes.es_lblk = lblk;
>  	newes.es_len = len;
> +	ext4_es_store_pblock(&newes, pblk);
> +	ext4_es_store_status(&newes, status);
> +	trace_ext4_es_insert_extent(inode, &newes);
>  
>  	write_lock(&EXT4_I(inode)->i_es_lock);
>  	tree = &EXT4_I(inode)->i_es_tree;
> @@ -442,6 +464,7 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,
>  	struct extent_status *es;
>  	struct extent_status orig_es;
>  	ext4_lblk_t len1, len2;
> +	ext4_fsblk_t block;
>  	int err = 0;
>  
>  	es = __es_tree_search(&tree->root, lblk);
> @@ -455,6 +478,8 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,
>  
>  	orig_es.es_lblk = es->es_lblk;
>  	orig_es.es_len = es->es_len;
> +	orig_es.es_pblk = es->es_pblk;
> +
>  	len1 = lblk > es->es_lblk ? lblk - es->es_lblk : 0;
>  	len2 = ext4_es_end(es) > end ? ext4_es_end(es) - end : 0;
>  	if (len1 > 0)
> @@ -465,6 +490,13 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,
>  
>  			newes.es_lblk = end + 1;
>  			newes.es_len = len2;
> +			if (ext4_es_is_written(&orig_es) ||
> +			    ext4_es_is_unwritten(&orig_es)) {
> +				block = ext4_es_pblock(&orig_es) +
> +					orig_es.es_len - len2;
> +				ext4_es_store_pblock(&newes, block);
> +			}
> +			ext4_es_store_status(&newes, ext4_es_status(&orig_es));
>  			err = __es_insert_extent(tree, &newes);
>  			if (err) {
>  				es->es_lblk = orig_es.es_lblk;
> @@ -474,6 +506,11 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,
>  		} else {
>  			es->es_lblk = end + 1;
>  			es->es_len = len2;
> +			if (ext4_es_is_written(es) ||
> +			    ext4_es_is_unwritten(es)) {
> +				block = orig_es.es_pblk + orig_es.es_len - len2;
> +				ext4_es_store_pblock(es, block);
> +			}
>  		}
>  		goto out;
>  	}
> @@ -498,9 +535,15 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,
>  	}
>  
>  	if (es && es->es_lblk < end + 1) {
> +		ext4_lblk_t orig_len = es->es_len;
> +
>  		len1 = ext4_es_end(es) - end;
>  		es->es_lblk = end + 1;
>  		es->es_len = len1;
> +		if (ext4_es_is_written(es) || ext4_es_is_unwritten(es)) {
> +			block = es->es_pblk + orig_len - len1;
> +			ext4_es_store_pblock(es, block);
> +		}
>  	}
>  
>  out:
> diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
> index 81e9339..2a5d69e 100644
> --- a/fs/ext4/extents_status.h
> +++ b/fs/ext4/extents_status.h
> @@ -20,10 +20,21 @@
>  #define es_debug(fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
>  #endif
>  
> +#define EXTENT_STATUS_WRITTEN	0x10000000	/* written extent */
> +#define EXTENT_STATUS_UNWRITTEN	0x20000000	/* unwritten extent */
> +#define EXTENT_STATUS_DELAYED	0x40000000	/* delayed extent */
> +#define EXTENT_STATUS_HOLE	0x80000000	/* hole */
> +
> +#define EXTENT_STATUS_FLAGS	(EXTENT_STATUS_WRITTEN | \
> +				 EXTENT_STATUS_UNWRITTEN | \
> +				 EXTENT_STATUS_DELAYED | \
> +				 EXTENT_STATUS_HOLE)
> +
>  struct extent_status {
>  	struct rb_node rb_node;
>  	ext4_lblk_t es_lblk;	/* first logical block extent covers */
>  	ext4_lblk_t es_len;	/* length of extent in block */
> +	ext4_fsblk_t es_pblk;	/* first physical block */
>  };
>  
>  struct ext4_es_tree {
> @@ -36,10 +47,61 @@ extern void ext4_exit_es(void);
>  extern void ext4_es_init_tree(struct ext4_es_tree *tree);
>  
>  extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
> -				 ext4_lblk_t len);
> +				 ext4_lblk_t len, ext4_fsblk_t pblk,
> +				 unsigned long long status);
>  extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
>  				 ext4_lblk_t len);
>  extern ext4_lblk_t ext4_es_find_extent(struct inode *inode,
>  				struct extent_status *es);
>  
> +static inline int ext4_es_is_written(struct extent_status *es)
> +{
> +	return (es->es_pblk & EXTENT_STATUS_WRITTEN);
> +}
> +
> +static inline int ext4_es_is_unwritten(struct extent_status *es)
> +{
> +	return (es->es_pblk & EXTENT_STATUS_UNWRITTEN);
> +}
> +
> +static inline int ext4_es_is_delayed(struct extent_status *es)
> +{
> +	return (es->es_pblk & EXTENT_STATUS_DELAYED);
> +}
> +
> +static inline int ext4_es_is_hole(struct extent_status *es)
> +{
> +	return (es->es_pblk & EXTENT_STATUS_HOLE);
> +}
> +
> +static inline ext4_fsblk_t ext4_es_status(struct extent_status *es)
> +{
> +	return (es->es_pblk & EXTENT_STATUS_FLAGS);
> +}
> +
> +static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es)
> +{
> +	return (es->es_pblk & ~EXTENT_STATUS_FLAGS);
> +}
> +
> +static inline void ext4_es_store_pblock(struct extent_status *es,
> +					ext4_fsblk_t pb)
> +{
> +	ext4_fsblk_t block;
> +
> +	block = (pb & ~EXTENT_STATUS_FLAGS) |
> +		(es->es_pblk & EXTENT_STATUS_FLAGS);
> +	es->es_pblk = block;
> +}
> +
> +static inline void ext4_es_store_status(struct extent_status *es,
> +					unsigned long long status)
> +{
> +	ext4_fsblk_t block;
> +
> +	block = (status & EXTENT_STATUS_FLAGS) |
> +		(es->es_pblk & ~EXTENT_STATUS_FLAGS);
> +	es->es_pblk = block;
> +}
> +
>  #endif /* _EXT4_EXTENTS_STATUS_H */
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index cbfe13b..7fb00d8 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -1821,7 +1821,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
>  				goto out_unlock;
>  		}
>  
> -		retval = ext4_es_insert_extent(inode, map->m_lblk, map->m_len);
> +		retval = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
> +					       ~0, EXTENT_STATUS_DELAYED);
>  		if (retval)
>  			goto out_unlock;
>  
> diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
> index 952628a..ef2f96e 100644
> --- a/include/trace/events/ext4.h
> +++ b/include/trace/events/ext4.h
> @@ -2068,28 +2068,33 @@ TRACE_EVENT(ext4_ext_remove_space_done,
>  );
>  
>  TRACE_EVENT(ext4_es_insert_extent,
> -	TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len),
> +	TP_PROTO(struct inode *inode, struct extent_status *es),
>  
> -	TP_ARGS(inode, lblk, len),
> +	TP_ARGS(inode, es),
>  
>  	TP_STRUCT__entry(
> -		__field(	dev_t,	dev			)
> -		__field(	ino_t,	ino			)
> -		__field(	loff_t,	lblk			)
> -		__field(	loff_t, len			)
> +		__field(	dev_t,		dev		)
> +		__field(	ino_t,		ino		)
> +		__field(	ext4_lblk_t,	lblk		)
> +		__field(	ext4_lblk_t,	len		)
> +		__field(	ext4_fsblk_t,	pblk		)
> +		__field(	unsigned long long, status	)
>  	),
>  
>  	TP_fast_assign(
>  		__entry->dev	= inode->i_sb->s_dev;
>  		__entry->ino	= inode->i_ino;
> -		__entry->lblk	= lblk;
> -		__entry->len	= len;
> +		__entry->lblk	= es->es_lblk;
> +		__entry->len	= es->es_len;
> +		__entry->pblk	= ext4_es_pblock(es);
> +		__entry->status	= ext4_es_status(es);
>  	),
>  
> -	TP_printk("dev %d,%d ino %lu es [%lld/%lld)",
> +	TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %llx",
>  		  MAJOR(__entry->dev), MINOR(__entry->dev),
>  		  (unsigned long) __entry->ino,
> -		  __entry->lblk, __entry->len)
> +		  __entry->lblk, __entry->len,
> +		  __entry->pblk, __entry->status)
>  );
>  
>  TRACE_EVENT(ext4_es_remove_extent,
> @@ -2150,6 +2155,8 @@ TRACE_EVENT(ext4_es_find_extent_exit,
>  		__field(	ino_t,		ino		)
>  		__field(	ext4_lblk_t,	lblk		)
>  		__field(	ext4_lblk_t,	len		)
> +		__field(	ext4_fsblk_t,	pblk		)
> +		__field(	unsigned long long, status	)
>  		__field(	ext4_lblk_t,	ret		)
>  	),
>  
> @@ -2158,13 +2165,16 @@ TRACE_EVENT(ext4_es_find_extent_exit,
>  		__entry->ino	= inode->i_ino;
>  		__entry->lblk	= es->es_lblk;
>  		__entry->len	= es->es_len;
> +		__entry->pblk	= ext4_es_pblock(es);
> +		__entry->status	= ext4_es_status(es);
>  		__entry->ret	= ret;
>  	),
>  
> -	TP_printk("dev %d,%d ino %lu es [%u/%u) ret %u",
> +	TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %llx ret %u",
>  		  MAJOR(__entry->dev), MINOR(__entry->dev),
>  		  (unsigned long) __entry->ino,
> -		  __entry->lblk, __entry->len, __entry->ret)
> +		  __entry->lblk, __entry->len,
> +		  __entry->pblk, __entry->status, __entry->ret)
>  );
>  
>  #endif /* _TRACE_EXT4_H */
> -- 
> 1.7.12.rc2.18.g61b472e
> 
-- 
Jan Kara <jack@...e.cz>
SUSE Labs, CR
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ