lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 11 Aug 2023 03:57:05 +0000
From:   Hui Zhu <teawaterz@...ux.alibaba.com>
To:     viro@...iv.linux.org.uk, brauner@...nel.org, tytso@....edu,
        adilger.kernel@...ger.ca, akpm@...ux-foundation.org, jack@...e.cz,
        willy@...radead.org, yi.zhang@...wei.com, hare@...e.de,
        p.raghav@...sung.com, ritesh.list@...il.com, mpatocka@...hat.com,
        linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org,
        linux-ext4@...r.kernel.org
Cc:     teawater@...group.com, teawater@...il.com
Subject: [PATCH] ext4_sb_breadahead_unmovable: Change to be no-blocking

From: Hui Zhu <teawater@...group.com>

Encountered an issue where a large number of filesystem reads and writes
occurred suddenly within a container.  At the same time, other tasks on
the same host that were performing filesystem read and write operations
became blocked.  It was observed that many of the blocked tasks were
blocked on the ext4 journal lock. For example:
PID: 171453 TASK: ffff926566c9440 CPU: 54 COMMAND: "Thread"

Meanwhile, it was observed that the task holding the ext4 journal lock
was blocked for an extended period of time on "shrink_page_list" due to
"ext4_sb_breadahead_unmovable".

The function "grow_dev_page" increased the gfp mask with "__GFP_NOFAIL",
causing longer blocking times.
	/*
	 * XXX: __getblk_slow() can not really deal with failure and
	 * will endlessly loop on improvised global reclaim.  Prefer
	 * looping in the allocator rather than here, at least that
	 * code knows what it's doing.
	 */
	gfp_mask |= __GFP_NOFAIL;
However, "ext4_sb_breadahead_unmovable" is a prefetch function and
failures are acceptable.

Therefore, this commit changes "ext4_sb_breadahead_unmovable" to be
non-blocking, removing "__GFP_DIRECT_RECLAIM" from the gfp mask in the
"grow_dev_page" function if caller is ext4_sb_breadahead_unmovable to
alleviate memory-related blocking issues.

Signed-off-by: Hui Zhu <teawater@...group.com>
---
 fs/buffer.c                 | 41 +++++++++++++++++++++++--------------
 fs/ext4/super.c             |  2 +-
 include/linux/buffer_head.h | 17 ++++++++++-----
 3 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index bd091329026c..1086da366392 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1041,7 +1041,7 @@ static sector_t folio_init_buffers(struct folio *folio,
  */
 static int
 grow_dev_page(struct block_device *bdev, sector_t block,
-	      pgoff_t index, int size, int sizebits, gfp_t gfp)
+	      pgoff_t index, int size, int sizebits, gfp_t gfp, bool noblocking)
 {
 	struct inode *inode = bdev->bd_inode;
 	struct folio *folio;
@@ -1052,16 +1052,24 @@ grow_dev_page(struct block_device *bdev, sector_t block,
 
 	gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
 
-	/*
-	 * XXX: __getblk_slow() can not really deal with failure and
-	 * will endlessly loop on improvised global reclaim.  Prefer
-	 * looping in the allocator rather than here, at least that
-	 * code knows what it's doing.
-	 */
-	gfp_mask |= __GFP_NOFAIL;
+	if (noblocking)
+		gfp_mask &= ~__GFP_DIRECT_RECLAIM;
+	else {
+		/*
+		 * XXX: __getblk_slow() can not really deal with failure and
+		 * will endlessly loop on improvised global reclaim.  Prefer
+		 * looping in the allocator rather than here, at least that
+		 * code knows what it's doing.
+		 */
+		gfp_mask |= __GFP_NOFAIL;
+	}
 
 	folio = __filemap_get_folio(inode->i_mapping, index,
 			FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp_mask);
+	if (IS_ERR(folio)) {
+		ret = PTR_ERR(folio);
+		goto out;
+	}
 
 	bh = folio_buffers(folio);
 	if (bh) {
@@ -1091,6 +1099,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
 failed:
 	folio_unlock(folio);
 	folio_put(folio);
+out:
 	return ret;
 }
 
@@ -1099,7 +1108,8 @@ grow_dev_page(struct block_device *bdev, sector_t block,
  * that page was dirty, the buffers are set dirty also.
  */
 static int
-grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
+grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp,
+	     bool noblocking)
 {
 	pgoff_t index;
 	int sizebits;
@@ -1120,12 +1130,13 @@ grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
 	}
 
 	/* Create a page with the proper size buffers.. */
-	return grow_dev_page(bdev, block, index, size, sizebits, gfp);
+	return grow_dev_page(bdev, block, index, size, sizebits, gfp,
+			     noblocking);
 }
 
 static struct buffer_head *
 __getblk_slow(struct block_device *bdev, sector_t block,
-	     unsigned size, gfp_t gfp)
+	      unsigned size, gfp_t gfp, bool noblocking)
 {
 	/* Size must be multiple of hard sectorsize */
 	if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
@@ -1147,7 +1158,7 @@ __getblk_slow(struct block_device *bdev, sector_t block,
 		if (bh)
 			return bh;
 
-		ret = grow_buffers(bdev, block, size, gfp);
+		ret = grow_buffers(bdev, block, size, gfp, noblocking);
 		if (ret < 0)
 			return NULL;
 	}
@@ -1436,13 +1447,13 @@ EXPORT_SYMBOL(__find_get_block);
  */
 struct buffer_head *
 __getblk_gfp(struct block_device *bdev, sector_t block,
-	     unsigned size, gfp_t gfp)
+	     unsigned size, gfp_t gfp, bool noblocking)
 {
 	struct buffer_head *bh = __find_get_block(bdev, block, size);
 
 	might_sleep();
 	if (bh == NULL)
-		bh = __getblk_slow(bdev, block, size, gfp);
+		bh = __getblk_slow(bdev, block, size, gfp, noblocking);
 	return bh;
 }
 EXPORT_SYMBOL(__getblk_gfp);
@@ -1476,7 +1487,7 @@ struct buffer_head *
 __bread_gfp(struct block_device *bdev, sector_t block,
 		   unsigned size, gfp_t gfp)
 {
-	struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
+	struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp, false);
 
 	if (likely(bh) && !buffer_uptodate(bh))
 		bh = __bread_slow(bh);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c94ebf704616..d13b6f5c21eb 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -254,7 +254,7 @@ struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
 
 void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block)
 {
-	struct buffer_head *bh = sb_getblk_gfp(sb, block, 0);
+	struct buffer_head *bh = sb_getblk_noblocking(sb, block);
 
 	if (likely(bh)) {
 		if (trylock_buffer(bh))
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 6cb3e9af78c9..50cb0cc81962 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -234,7 +234,7 @@ wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
 struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block,
 			unsigned size);
 struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block,
-				  unsigned size, gfp_t gfp);
+				  unsigned size, gfp_t gfp, bool noblocking);
 void __brelse(struct buffer_head *);
 void __bforget(struct buffer_head *);
 void __breadahead(struct block_device *, sector_t block, unsigned int size);
@@ -363,17 +363,24 @@ sb_breadahead(struct super_block *sb, sector_t block)
 	__breadahead(sb->s_bdev, block, sb->s_blocksize);
 }
 
+static inline struct buffer_head *
+sb_getblk_noblocking(struct super_block *sb, sector_t block)
+{
+	return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, 0, true);
+}
+
 static inline struct buffer_head *
 sb_getblk(struct super_block *sb, sector_t block)
 {
-	return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE);
+	return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE,
+			    false);
 }
 
 
 static inline struct buffer_head *
 sb_getblk_gfp(struct super_block *sb, sector_t block, gfp_t gfp)
 {
-	return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, gfp);
+	return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, gfp, false);
 }
 
 static inline struct buffer_head *
@@ -414,14 +421,14 @@ static inline struct buffer_head *getblk_unmovable(struct block_device *bdev,
 						   sector_t block,
 						   unsigned size)
 {
-	return __getblk_gfp(bdev, block, size, 0);
+	return __getblk_gfp(bdev, block, size, 0, false);
 }
 
 static inline struct buffer_head *__getblk(struct block_device *bdev,
 					   sector_t block,
 					   unsigned size)
 {
-	return __getblk_gfp(bdev, block, size, __GFP_MOVABLE);
+	return __getblk_gfp(bdev, block, size, __GFP_MOVABLE, false);
 }
 
 static inline void bh_readahead(struct buffer_head *bh, blk_opf_t op_flags)
-- 
2.19.1.6.gb485710b

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ