lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <705A22C6-4583-47D4-8FB0-B2820F7051F3@dilger.ca>
Date:   Fri, 14 Jul 2017 14:13:16 -0700
From:   Andreas Dilger <adilger@...ger.ca>
To:     Tahsin Erdogan <tahsin@...gle.com>
Cc:     Theodore Ts'o <tytso@....edu>, linux-ext4@...r.kernel.org,
        linux-kernel@...r.kernel.org
Subject: Re: [PATCH] ext4: make xattr inode reads faster

On Jul 12, 2017, at 1:29 AM, Tahsin Erdogan <tahsin@...gle.com> wrote:
> 
> ext4_xattr_inode_read() currently reads each block sequentially while
> waiting for io operation to complete before moving on to the next
> block. This prevents request merging in block layer.
> 
> Fix this by starting reads for all blocks then wait for completions.

It surprises me that ext4/VFS doesn't already have a helper routine to do this.
It looks like ext4_find_entry() is doing something similar for read-ahead of
directory blocks, so it may be worthwhile to consider moving that code over
to use ext4_bread_batch(), but it looks like it would need to add a flag for
whether to wait on the buffers to be uptodate or not.

> Signed-off-by: Tahsin Erdogan <tahsin@...gle.com>
> ---
> fs/ext4/ext4.h  |  2 ++
> fs/ext4/inode.c | 36 ++++++++++++++++++++++++++++++++++++
> fs/ext4/xattr.c | 50 +++++++++++++++++++++++++++++++-------------------
> 3 files changed, 69 insertions(+), 19 deletions(-)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 9ebde0cd632e..12f0a16ad500 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2462,6 +2462,8 @@ extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid);
> int ext4_inode_is_fast_symlink(struct inode *inode);
> struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
> struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
> +int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count,
> +		     struct buffer_head **bhs);
> int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
> 			     struct buffer_head *bh_result, int create);
> int ext4_get_block(struct inode *inode, sector_t iblock,
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 3c600f02673f..5b8ae1b66f09 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -1015,6 +1015,42 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
> 	return ERR_PTR(-EIO);
> }
> 
> +/* Read a contiguous batch of blocks. */
> +int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count,
> +		     struct buffer_head **bhs)
> +{
> +	int i, err;
> +
> +	for (i = 0; i < bh_count; i++) {
> +		bhs[i] = ext4_getblk(NULL, inode, block + i, 0 /* map_flags */);


It's nice to have the parameter annotation, since it can often be confusing if
there are multiple int/bool arguments to a function, but, I'd write this as:

	ret = ext4_getblk(NULL, inode, block + i, /* map_flags = */ 0);

I don't know if there is some kind of convention for this kind of comment?

> +		if (IS_ERR(bhs[i])) {
> +			err = PTR_ERR(bhs[i]);
> +			while (i--)
> +				brelse(bhs[i]);

It would be better to do the error cleanup once at the end of the function,
instead of multiple times in the code, like:

		if (IS_ERR(bhs[i])) {
			err = PTR_ERR(bhs[i]);
			bh_count = i;
			goto out_brelse;
		}
	:
	:
out_brelse:
	for (i = 0; i < bh_count; i++) {
		brelse[bhs[i]);
		bhs[i] = NULL;
	}
	return err;


The "bhs[i] = NULL" isn't strictly necessary, but better to not leave stray
pointers around in this array.

> +			return err;
> +		}
> +	}
> +
> +	for (i = 0; i < bh_count; i++)
> +		/* Note that NULL bhs[i] is valid because of holes. */
> +		if (bhs[i] && !buffer_uptodate(bhs[i]))
> +			ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1,
> +				    &bhs[i]);
> +
> +	for (i = 0; i < bh_count; i++)
> +		if (bhs[i])
> +			wait_on_buffer(bhs[i]);
> +
> +	for (i = 0; i < bh_count; i++) {
> +		if (bhs[i] && !buffer_uptodate(bhs[i])) {
> +			for (i = 0; i < bh_count; i++)
> +				brelse(bhs[i]);

The "out_brelse:" label could also be used here.

> +			return -EIO;
> +		}
> +	}
> +	return 0;
> +}
> +
> int ext4_walk_page_buffers(handle_t *handle,
> 			   struct buffer_head *head,
> 			   unsigned from,
> diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
> index cff4f41ced61..f7364a842ff4 100644
> --- a/fs/ext4/xattr.c
> +++ b/fs/ext4/xattr.c
> @@ -317,28 +317,40 @@ static void ext4_xattr_inode_set_hash(struct inode *ea_inode, u32 hash)
>  */
> static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size)
> {
> -	unsigned long block = 0;
> -	struct buffer_head *bh;
> -	int blocksize = ea_inode->i_sb->s_blocksize;
> -	size_t csize, copied = 0;
> -	void *copy_pos = buf;
> -
> -	while (copied < size) {
> -		csize = (size - copied) > blocksize ? blocksize : size - copied;
> -		bh = ext4_bread(NULL, ea_inode, block, 0);
> -		if (IS_ERR(bh))
> -			return PTR_ERR(bh);
> -		if (!bh)
> -			return -EFSCORRUPTED;
> +	int blocksize = 1 << ea_inode->i_blkbits;
> +	int bh_count = (size + blocksize - 1) >> ea_inode->i_blkbits;
> +	int tail_size = (size % blocksize) ?: blocksize;
> +	struct buffer_head *bhs_inline[8];

> +	struct buffer_head **bhs = bhs_inline;
> +	int i, ret;
> +
> +	if (bh_count > ARRAY_SIZE(bhs_inline)) {
> +		bhs = kmalloc_array(bh_count, sizeof(*bhs), GFP_NOFS);
> +		if (!bhs)
> +			return -ENOMEM;
> +	}
> 
> -		memcpy(copy_pos, bh->b_data, csize);
> -		brelse(bh);
> +	ret = ext4_bread_batch(ea_inode, 0 /* block */, bh_count, bhs);


	ret = ext4_bread_batch(ea_inode, /* block = */ 0, bh_count, bhs);  ?

> +	if (ret)
> +		goto free_bhs;
> 
> -		copy_pos += csize;
> -		block += 1;
> -		copied += csize;
> +	for (i = 0; i < bh_count; i++) {
> +		/* There shouldn't be any holes in ea_inode. */
> +		if (!bhs[i]) {
> +			ret = -EFSCORRUPTED;
> +			goto put_bhs;
> +		}
> +		memcpy((char *)buf + blocksize * i, bhs[i]->b_data,
> +		       i < bh_count - 1 ? blocksize : tail_size);
> 	}
> -	return 0;
> +	ret = 0;
> +put_bhs:
> +	for (i = 0; i < bh_count; i++)
> +		brelse(bhs[i]);
> +free_bhs:
> +	if (bhs != bhs_inline)
> +		kfree(bhs);
> +	return ret;
> }
> 
> static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
> --
> 2.13.2.932.g7449e964c-goog
> 


Cheers, Andreas






Download attachment "signature.asc" of type "application/pgp-signature" (196 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ