lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <6933ee6c.a70a0220.38f243.001c.GAE@google.com>
Date: Sat, 06 Dec 2025 00:50:52 -0800
From: syzbot <syzbot+4235e4d7b6fd75704528@...kaller.appspotmail.com>
To: linux-kernel@...r.kernel.org, syzkaller-bugs@...glegroups.com
Subject: Forwarded: [PATCH]     f2fs: fix hung task in block_operations during checkpoint

For archival purposes, forwarding an incoming command email to
linux-kernel@...r.kernel.org, syzkaller-bugs@...glegroups.com.

***

Subject: [PATCH]     f2fs: fix hung task in block_operations during checkpoint
Author: kartikey406@...il.com

#syz test: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master

        f2fs_sync_inode_meta() can return 0 (success) even when
        f2fs_update_inode_page() fails and triggers f2fs_stop_checkpoint().
        This happens because the error flag check only occurs at the start
        of each loop iteration, not after f2fs_update_inode_page() returns.

        When I/O errors occur:
        1. f2fs_update_inode_page() retries 8 times then calls
           f2fs_stop_checkpoint(), which sets CP_ERROR_FLAG
        2. f2fs_sync_inode_meta() returns 0 without checking the error flag
        3. block_operations() sees success and loops back to retry_flush_quotas
        4. Dirty inodes remain on list (sync failed), loop repeats forever
        5. Checkpoint never completes, waiters block indefinitely

        This causes hung tasks when operations like unlink wait for checkpoint
        completion while holding locks that other tasks need.

        Fix by checking f2fs_cp_error() after processing each inode in
        f2fs_sync_inode_meta() to detect errors from f2fs_update_inode_page().

        Reported-by: syzbot+4235e4d7b6fd75704528@...kaller.appspotmail.com
        Closes: https://syzkaller.appspot.com/bug?extid=4235e4d7b6fd75704528
        Signed-off-by: Deepanshu Kartikey <kartikey406@...il.com>
---
 fs/f2fs/checkpoint.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index bbe07e3a6c75..efe72e517955 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1216,8 +1216,9 @@ static int block_operations(struct f2fs_sb_info *sbi)
 	 * Let's flush inline_data in dirty node pages.
 	 */
 	f2fs_flush_inline_data(sbi);
-
+	printk("f2fs: block_ops enter\n");
 retry_flush_quotas:
+	printk("f2fs: block_ops retry_flush_quotas cnt=%d\n", cnt);
 	f2fs_lock_all(sbi);
 	if (__need_flush_quota(sbi)) {
 		bool need_lock = sbi->umount_lock_holder != current;
@@ -1242,6 +1243,8 @@ static int block_operations(struct f2fs_sb_info *sbi)
 
 retry_flush_dents:
 	/* write all the dirty dentry pages */
+	printk("f2fs: block_ops retry_flush_dents dirty_dents=%lld\n", 
+	       get_pages(sbi, F2FS_DIRTY_DENTS));
 	if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
 		f2fs_unlock_all(sbi);
 		err = f2fs_sync_dirty_inodes(sbi, DIR_INODE, true);
@@ -1256,7 +1259,8 @@ static int block_operations(struct f2fs_sb_info *sbi)
 	 * until finishing nat/sit flush. inode->i_blocks can be updated.
 	 */
 	f2fs_down_write(&sbi->node_change);
-
+	printk("f2fs: block_ops check DIRTY_IMETA=%lld\n",
+	       get_pages(sbi, F2FS_DIRTY_IMETA));
 	if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
 		f2fs_up_write(&sbi->node_change);
 		f2fs_unlock_all(sbi);
@@ -1268,6 +1272,8 @@ static int block_operations(struct f2fs_sb_info *sbi)
 	}
 
 retry_flush_nodes:
+	printk("f2fs: block_ops retry_flush_nodes dirty_nodes=%lld\n",
+	       get_pages(sbi, F2FS_DIRTY_NODES));
 	f2fs_down_write(&sbi->node_write);
 
 	if (get_pages(sbi, F2FS_DIRTY_NODES)) {
@@ -1290,6 +1296,7 @@ static int block_operations(struct f2fs_sb_info *sbi)
 	 */
 	__prepare_cp_block(sbi);
 	f2fs_up_write(&sbi->node_change);
+	printk("f2fs: block_ops done\n");
 	return err;
 }
 
@@ -1659,9 +1666,10 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 			return 0;
 		f2fs_warn(sbi, "Start checkpoint disabled!");
 	}
+	printk("f2fs_cp: 1 before cp_global_sem\n");
 	if (cpc->reason != CP_RESIZE)
 		f2fs_down_write(&sbi->cp_global_sem);
-
+	 printk("f2fs_cp: 2 after cp_global_sem\n");
 	stat_cp_time(cpc, CP_TIME_LOCK);
 
 	if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
@@ -1669,16 +1677,18 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 		((cpc->reason & CP_DISCARD) && !sbi->discard_blks)))
 		goto out;
 	if (unlikely(f2fs_cp_error(sbi))) {
+		printk("f2fs_cp: 3 cp_error detected early\n");
 		err = -EIO;
 		goto out;
 	}
 
 	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
-
+	printk("f2fs_cp: 4 before block_operations\n");
 	err = block_operations(sbi);
+	printk("f2fs_cp: 5 after block_operations err=%d cp_error=%d\n", err, f2fs_cp_error(sbi));
 	if (err)
 		goto out;
-
+	//printk("f2fs_cp: 6 before do_checkpoint\n");
 	stat_cp_time(cpc, CP_TIME_OP_LOCK);
 
 	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
@@ -1724,8 +1734,10 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	/* save inmem log status */
 	f2fs_save_inmem_curseg(sbi);
+	printk("f2fs_cp: 4 before block_operations\n");
 
 	err = do_checkpoint(sbi, cpc);
+	printk("f2fs_cp: 7 after do_checkpoint err=%d\n", err);
 	if (err) {
 		f2fs_err(sbi, "do_checkpoint failed err:%d, stop checkpoint", err);
 		f2fs_bug_on(sbi, !f2fs_cp_error(sbi));
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ