>From 615ee076495ca9264ef09040f56eae54004e4c26 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 21 Apr 2009 14:24:01 +0200 Subject: [PATCH] vfs: Write inodes reliably before calling ->write_super() and ->sync_fs() So far, __fsync_super() and do_sync() called sync_inodes(sb, 0), then called ->write_super(), ->sync_fs() and after that called sync_inodes(sb, 1). This ordering makes it kind of hard for filesystems as sync_inodes(sb, 0) need not submit all the IO (for example it skips inodes with I_SYNC set) so e.g. forcing transaction to disk in ->sync_fs() is not really enough. Yes, it means sys_sync has not been completely reliable on some filesystems (ext3, ext4, reiserfs, ocfs2 and others are hit by this) when racing e.g. with background writeback. A similar problem hits also other filesystems (e.g. ext2) because of write_super() being called before the sync_inodes(sb, 1). Change the ordering so that inodes are first reliably written out and write_super() and sync_fs() are called only after that. Signed-off-by: Jan Kara --- fs/super.c | 2 +- fs/sync.c | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/super.c b/fs/super.c index 786fe7d..4c12918 100644 --- a/fs/super.c +++ b/fs/super.c @@ -267,6 +267,7 @@ void __fsync_super(struct super_block *sb) { sync_inodes_sb(sb, 0); vfs_dq_sync(sb); + sync_inodes_sb(sb, 1); lock_super(sb); if (sb->s_dirt && sb->s_op->write_super) sb->s_op->write_super(sb); @@ -274,7 +275,6 @@ void __fsync_super(struct super_block *sb) if (sb->s_op->sync_fs) sb->s_op->sync_fs(sb, 1); sync_blockdev(sb->s_bdev); - sync_inodes_sb(sb, 1); } /* diff --git a/fs/sync.c b/fs/sync.c index 7abc65f..4de4c89 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -26,10 +26,15 @@ static void do_sync(unsigned long wait) wakeup_pdflush(0); sync_inodes(0); /* All mappings, inodes and their blockdevs */ vfs_dq_sync(NULL); - sync_supers(); /* Write the superblocks */ sync_filesystems(0); /* Start syncing the filesystems */ + /* + * We have to reliably submit IO for all the inodes before writing + * super blocks and calling sync_fs(). Otherwise superblock could miss + * some updates or journal could still have uncommitted data. + */ + sync_inodes(wait); + sync_supers(); /* Write the superblocks */ sync_filesystems(wait); /* Waitingly sync the filesystems */ - sync_inodes(wait); /* Mappings, inodes and blockdevs, again. */ if (!wait) printk("Emergency Sync complete\n"); if (unlikely(laptop_mode)) -- 1.6.0.2