lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20231211140753.975297-1-yukuai1@huaweicloud.com>
Date:   Mon, 11 Dec 2023 22:07:53 +0800
From:   Yu Kuai <yukuai1@...weicloud.com>
To:     axboe@...nel.dk, roger.pau@...rix.com, colyli@...e.de,
        kent.overstreet@...il.com, joern@...ybastard.org,
        miquel.raynal@...tlin.com, richard@....at, vigneshr@...com,
        sth@...ux.ibm.com, hoeppner@...ux.ibm.com, hca@...ux.ibm.com,
        gor@...ux.ibm.com, agordeev@...ux.ibm.com, jejb@...ux.ibm.com,
        martin.petersen@...cle.com, clm@...com, josef@...icpanda.com,
        dsterba@...e.com, viro@...iv.linux.org.uk, brauner@...nel.org,
        nico@...xnic.net, xiang@...nel.org, chao@...nel.org, tytso@....edu,
        adilger.kernel@...ger.ca, agruenba@...hat.com, jack@...e.com,
        konishi.ryusuke@...il.com, willy@...radead.org,
        akpm@...ux-foundation.org, p.raghav@...sung.com, hare@...e.de
Cc:     linux-block@...r.kernel.org, linux-kernel@...r.kernel.org,
        xen-devel@...ts.xenproject.org, linux-bcache@...r.kernel.org,
        linux-mtd@...ts.infradead.org, linux-s390@...r.kernel.org,
        linux-scsi@...r.kernel.org, linux-bcachefs@...r.kernel.org,
        linux-btrfs@...r.kernel.org, linux-fsdevel@...r.kernel.org,
        linux-erofs@...ts.ozlabs.org, linux-ext4@...r.kernel.org,
        gfs2@...ts.linux.dev, linux-nilfs@...r.kernel.org,
        yukuai3@...wei.com, yukuai1@...weicloud.com, yi.zhang@...wei.com,
        yangerkun@...wei.com
Subject: [PATCH RFC v2 for-6.8/block 15/18] buffer: add a new helper to read sb block

From: Yu Kuai <yukuai3@...wei.com>

Unlike __bread_gfp(), ext4 has special handing while reading sb block:

1) __GFP_NOFAIL is not set, and memory allocation can fail;
2) If buffer write failed before, set buffer uptodate and don't read
   block from disk;
3) REQ_META is set for all IO, and REQ_PRIO is set for reading xattr;
4) If failed, return error ptr instead of NULL;

This patch add a new helper __bread_gfp2() that will match above 2 and 3(
1 will be used, and 4 will still be encapsulated by ext4), and prepare to
prevent calling mapping_gfp_constraint() directly on bd_inode->i_mapping
in ext4.

Signed-off-by: Yu Kuai <yukuai3@...wei.com>
---
 fs/buffer.c                 | 68 ++++++++++++++++++++++++++-----------
 include/linux/buffer_head.h | 18 +++++++++-
 2 files changed, 65 insertions(+), 21 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 967f34b70aa8..188bd36c9fea 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1255,16 +1255,19 @@ void __bforget(struct buffer_head *bh)
 }
 EXPORT_SYMBOL(__bforget);
 
-static struct buffer_head *__bread_slow(struct buffer_head *bh)
+static struct buffer_head *__bread_slow(struct buffer_head *bh,
+					blk_opf_t op_flags,
+					bool check_write_error)
 {
 	lock_buffer(bh);
-	if (buffer_uptodate(bh)) {
+	if (buffer_uptodate(bh) ||
+	    (check_write_error && buffer_uptodate_or_error(bh))) {
 		unlock_buffer(bh);
 		return bh;
 	} else {
 		get_bh(bh);
 		bh->b_end_io = end_buffer_read_sync;
-		submit_bh(REQ_OP_READ, bh);
+		submit_bh(REQ_OP_READ | op_flags, bh);
 		wait_on_buffer(bh);
 		if (buffer_uptodate(bh))
 			return bh;
@@ -1445,6 +1448,31 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
 }
 EXPORT_SYMBOL(__breadahead);
 
+static struct buffer_head *
+bread_gfp(struct block_device *bdev, sector_t block, unsigned int size,
+	  blk_opf_t op_flags, gfp_t gfp, bool check_write_error)
+{
+	struct buffer_head *bh;
+
+	gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS);
+
+	/*
+	 * Prefer looping in the allocator rather than here, at least that
+	 * code knows what it's doing.
+	 */
+	gfp |= __GFP_NOFAIL;
+
+	bh = bdev_getblk(bdev, block, size, gfp);
+	if (unlikely(!bh))
+		return NULL;
+
+	if (buffer_uptodate(bh) ||
+	    (check_write_error && buffer_uptodate_or_error(bh)))
+		return bh;
+
+	return __bread_slow(bh, op_flags, check_write_error);
+}
+
 /**
  *  __bread_gfp() - reads a specified block and returns the bh
  *  @bdev: the block_device to read from
@@ -1458,27 +1486,27 @@ EXPORT_SYMBOL(__breadahead);
  *  It returns NULL if the block was unreadable.
  */
 struct buffer_head *
-__bread_gfp(struct block_device *bdev, sector_t block,
-		   unsigned size, gfp_t gfp)
+__bread_gfp(struct block_device *bdev, sector_t block, unsigned int size,
+	    gfp_t gfp)
 {
-	struct buffer_head *bh;
-
-	gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS);
-
-	/*
-	 * Prefer looping in the allocator rather than here, at least that
-	 * code knows what it's doing.
-	 */
-	gfp |= __GFP_NOFAIL;
-
-	bh = bdev_getblk(bdev, block, size, gfp);
-
-	if (likely(bh) && !buffer_uptodate(bh))
-		bh = __bread_slow(bh);
-	return bh;
+	return bread_gfp(bdev, block, size, 0, gfp, false);
 }
 EXPORT_SYMBOL(__bread_gfp);
 
+/*
+ * This works like __bread_gfp() except:
+ * 1) If buffer write failed before, set buffer uptodate and don't read
+ * block from disk;
+ * 2) Caller can pass in additional op_flags like REQ_META;
+ */
+struct buffer_head *
+__bread_gfp2(struct block_device *bdev, sector_t block, unsigned int size,
+	     blk_opf_t op_flags, gfp_t gfp)
+{
+	return bread_gfp(bdev, block, size, op_flags, gfp, true);
+}
+EXPORT_SYMBOL(__bread_gfp2);
+
 static void __invalidate_bh_lrus(struct bh_lru *b)
 {
 	int i;
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 5f23ee599889..751b2744b4ae 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -171,6 +171,18 @@ static __always_inline int buffer_uptodate(const struct buffer_head *bh)
 	return test_bit_acquire(BH_Uptodate, &bh->b_state);
 }
 
+static __always_inline int buffer_uptodate_or_error(struct buffer_head *bh)
+{
+	/*
+	 * If the buffer has the write error flag, data was failed to write
+	 * out in the block. In this case, set buffer uptodate to prevent
+	 * reading old data.
+	 */
+	if (buffer_write_io_error(bh))
+		set_buffer_uptodate(bh);
+	return buffer_uptodate(bh);
+}
+
 static inline unsigned long bh_offset(const struct buffer_head *bh)
 {
 	return (unsigned long)(bh)->b_data & (page_size(bh->b_page) - 1);
@@ -231,7 +243,11 @@ void __brelse(struct buffer_head *);
 void __bforget(struct buffer_head *);
 void __breadahead(struct block_device *, sector_t block, unsigned int size);
 struct buffer_head *__bread_gfp(struct block_device *,
-				sector_t block, unsigned size, gfp_t gfp);
+				sector_t block, unsigned int size, gfp_t gfp);
+struct buffer_head *__bread_gfp2(struct block_device *bdev, sector_t block,
+				 unsigned int size, blk_opf_t op_flags,
+				 gfp_t gfp);
+
 struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
 void free_buffer_head(struct buffer_head * bh);
 void unlock_buffer(struct buffer_head *bh);
-- 
2.39.2

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ