lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220901154409.dcyvtknzzdjhuzas@quack3>
Date:   Thu, 1 Sep 2022 17:44:09 +0200
From:   Jan Kara <jack@...e.cz>
To:     Zhang Yi <yi.zhang@...wei.com>
Cc:     linux-ext4@...r.kernel.org, linux-fsdevel@...r.kernel.org,
        linux-kernel@...r.kernel.org, cluster-devel@...hat.com,
        ntfs3@...ts.linux.dev, ocfs2-devel@....oracle.com,
        reiserfs-devel@...r.kernel.org, jack@...e.cz, tytso@....edu,
        akpm@...ux-foundation.org, axboe@...nel.dk,
        viro@...iv.linux.org.uk, rpeterso@...hat.com, agruenba@...hat.com,
        almaz.alexandrovich@...agon-software.com, mark@...heh.com,
        dushistov@...l.ru, hch@...radead.org, chengzhihao1@...wei.com,
        yukuai3@...wei.com
Subject: Re: [PATCH v2 02/14] fs/buffer: add some new buffer read helpers

On Thu 01-09-22 21:34:53, Zhang Yi wrote:
> Current ll_rw_block() helper is fragile because it assumes that locked
> buffer means it's under IO which is submitted by some other who holds
> the lock, it skip buffer if it failed to get the lock, so it's only
> safe on the readahead path. Unfortunately, now that most filesystems
> still use this helper mistakenly on the sync metadata read path. There
> is no guarantee that the one who holds the buffer lock always submit IO
> (e.g. buffer_migrate_folio_norefs() after commit 88dbcbb3a484 ("blkdev:
> avoid migration stalls for blkdev pages"), it could lead to false
> positive -EIO when submitting reading IO.
> 
> This patch add some friendly buffer read helpers to prepare replacing
> ll_rw_block() and similar calls. We can only call bh_readahead_[]
> helpers for the readahead paths.
> 
> Signed-off-by: Zhang Yi <yi.zhang@...wei.com>

Looks good to me. Feel free to add:

Reviewed-by: Jan Kara <jack@...e.cz>

								Honza

> ---
>  fs/buffer.c                 | 65 +++++++++++++++++++++++++++++++++++++
>  include/linux/buffer_head.h | 38 ++++++++++++++++++++++
>  2 files changed, 103 insertions(+)
> 
> diff --git a/fs/buffer.c b/fs/buffer.c
> index a0b70b3239f3..a6bc769e665d 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -3017,6 +3017,71 @@ int bh_uptodate_or_lock(struct buffer_head *bh)
>  }
>  EXPORT_SYMBOL(bh_uptodate_or_lock);
>  
> +/**
> + * __bh_read - Submit read for a locked buffer
> + * @bh: struct buffer_head
> + * @op_flags: appending REQ_OP_* flags besides REQ_OP_READ
> + * @wait: wait until reading finish
> + *
> + * Returns zero on success or don't wait, and -EIO on error.
> + */
> +int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait)
> +{
> +	int ret = 0;
> +
> +	BUG_ON(!buffer_locked(bh));
> +
> +	get_bh(bh);
> +	bh->b_end_io = end_buffer_read_sync;
> +	submit_bh(REQ_OP_READ | op_flags, bh);
> +	if (wait) {
> +		wait_on_buffer(bh);
> +		if (!buffer_uptodate(bh))
> +			ret = -EIO;
> +	}
> +	return ret;
> +}
> +EXPORT_SYMBOL(__bh_read);
> +
> +/**
> + * __bh_read_batch - Submit read for a batch of unlocked buffers
> + * @nr: entry number of the buffer batch
> + * @bhs: a batch of struct buffer_head
> + * @op_flags: appending REQ_OP_* flags besides REQ_OP_READ
> + * @force_lock: force to get a lock on the buffer if set, otherwise drops any
> + *              buffer that cannot lock.
> + *
> + * Returns zero on success or don't wait, and -EIO on error.
> + */
> +void __bh_read_batch(int nr, struct buffer_head *bhs[],
> +		     blk_opf_t op_flags, bool force_lock)
> +{
> +	int i;
> +
> +	for (i = 0; i < nr; i++) {
> +		struct buffer_head *bh = bhs[i];
> +
> +		if (buffer_uptodate(bh))
> +			continue;
> +
> +		if (force_lock)
> +			lock_buffer(bh);
> +		else
> +			if (!trylock_buffer(bh))
> +				continue;
> +
> +		if (buffer_uptodate(bh)) {
> +			unlock_buffer(bh);
> +			continue;
> +		}
> +
> +		bh->b_end_io = end_buffer_read_sync;
> +		get_bh(bh);
> +		submit_bh(REQ_OP_READ | op_flags, bh);
> +	}
> +}
> +EXPORT_SYMBOL(__bh_read_batch);
> +
>  /**
>   * bh_submit_read - Submit a locked buffer for reading
>   * @bh: struct buffer_head
> diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
> index c3863c417b00..6d09785bed9f 100644
> --- a/include/linux/buffer_head.h
> +++ b/include/linux/buffer_head.h
> @@ -232,6 +232,9 @@ void write_boundary_block(struct block_device *bdev,
>  			sector_t bblock, unsigned blocksize);
>  int bh_uptodate_or_lock(struct buffer_head *bh);
>  int bh_submit_read(struct buffer_head *bh);
> +int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait);
> +void __bh_read_batch(int nr, struct buffer_head *bhs[],
> +		     blk_opf_t op_flags, bool force_lock);
>  
>  extern int buffer_heads_over_limit;
>  
> @@ -399,6 +402,41 @@ static inline struct buffer_head *__getblk(struct block_device *bdev,
>  	return __getblk_gfp(bdev, block, size, __GFP_MOVABLE);
>  }
>  
> +static inline void bh_readahead(struct buffer_head *bh, blk_opf_t op_flags)
> +{
> +	if (!buffer_uptodate(bh) && trylock_buffer(bh)) {
> +		if (!buffer_uptodate(bh))
> +			__bh_read(bh, op_flags, false);
> +		else
> +			unlock_buffer(bh);
> +	}
> +}
> +
> +static inline void bh_read_nowait(struct buffer_head *bh, blk_opf_t op_flags)
> +{
> +	if (!bh_uptodate_or_lock(bh))
> +		__bh_read(bh, op_flags, false);
> +}
> +
> +/* Returns 1 if buffer uptodated, 0 on success, and -EIO on error. */
> +static inline int bh_read(struct buffer_head *bh, blk_opf_t op_flags)
> +{
> +	if (bh_uptodate_or_lock(bh))
> +		return 1;
> +	return __bh_read(bh, op_flags, true);
> +}
> +
> +static inline void bh_read_batch(int nr, struct buffer_head *bhs[])
> +{
> +	__bh_read_batch(nr, bhs, 0, true);
> +}
> +
> +static inline void bh_readahead_batch(int nr, struct buffer_head *bhs[],
> +				      blk_opf_t op_flags)
> +{
> +	__bh_read_batch(nr, bhs, op_flags, false);
> +}
> +
>  /**
>   *  __bread() - reads a specified block and returns the bh
>   *  @bdev: the block_device to read from
> -- 
> 2.31.1
> 
-- 
Jan Kara <jack@...e.com>
SUSE Labs, CR

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ