>From 2834bd2727c93055bb7373d8849492044f70c530 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jul 2011 22:01:51 +0200 Subject: [PATCH] vfs: Make sync(1) writeout also block device inodes In case block device does not have filesystem mounted on it, sync(1) will just ignore it and doesn't writeout dirty pages because it iterates over filesystems with s_bdi != noop_backing_dev_info and thus it avoids blockdev_superblock. Since it's unexpected that sync doesn't writeout dirty data for block devices be nice to users and change the behavior to do so. This requires a change to how syncing is done. We now first traverse all superblocks with s_bdi != noop_backing_dev_info, writeout their inodes and call sync_fs and when this is done, we traverse all block devices and sync them. Signed-off-by: Jan Kara --- fs/sync.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++------------- 1 files changed, 55 insertions(+), 15 deletions(-) diff --git a/fs/sync.c b/fs/sync.c index c38ec16..f8f21d9 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -23,20 +23,13 @@ /* * Do the filesystem syncing work. For simple filesystems - * writeback_inodes_sb(sb) just dirties buffers with inodes so we have to - * submit IO for these buffers via __sync_blockdev(). This also speeds up the - * wait == 1 case since in that case write_inode() functions do + * writeback_inodes_sb(sb) just dirties buffers with inodes so the caller has + * to additionally submit IO for these buffers via __sync_blockdev(). This also + * speeds up the wait == 1 case since in that case write_inode() functions do * sync_dirty_buffer() and thus effectively write one block at a time. */ -static int __sync_filesystem(struct super_block *sb, int wait) +static void __sync_filesystem(struct super_block *sb, int wait) { - /* - * This should be safe, as we require bdi backing to actually - * write out data in the first place - */ - if (sb->s_bdi == &noop_backing_dev_info) - return 0; - if (sb->s_qcop && sb->s_qcop->quota_sync) sb->s_qcop->quota_sync(sb, -1, wait); @@ -47,7 +40,6 @@ static int __sync_filesystem(struct super_block *sb, int wait) if (sb->s_op->sync_fs) sb->s_op->sync_fs(sb, wait); - return __sync_blockdev(sb->s_bdev, wait); } /* @@ -71,16 +63,26 @@ int sync_filesystem(struct super_block *sb) if (sb->s_flags & MS_RDONLY) return 0; - ret = __sync_filesystem(sb, 0); + /* + * This should be safe, as we require bdi backing to actually + * write out data in the first place. + */ + if (sb->s_bdi == &noop_backing_dev_info) + return 0; + + __sync_filesystem(sb, 0); + ret = __sync_blockdev(sb->s_bdev, 0); if (ret < 0) return ret; - return __sync_filesystem(sb, 1); + __sync_filesystem(sb, 1); + return __sync_blockdev(sb->s_bdev, 1); } EXPORT_SYMBOL_GPL(sync_filesystem); static void sync_one_sb(struct super_block *sb, void *arg) { - if (!(sb->s_flags & MS_RDONLY)) + /* Avoid read-only filesystems and filesystems without backing device */ + if (!(sb->s_flags & MS_RDONLY) && sb->s_bdi != &noop_backing_dev_info) __sync_filesystem(sb, *(int *)arg); } /* @@ -92,6 +94,42 @@ static void sync_filesystems(int wait) iterate_supers(sync_one_sb, &wait); } +static void sync_all_bdevs(int wait) +{ + struct inode *inode, *old_inode = NULL; + + spin_lock(&inode_sb_list_lock); + list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) { + struct address_space *mapping = inode->i_mapping; + + spin_lock(&inode->i_lock); + if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) || + mapping->nrpages == 0) { + spin_unlock(&inode->i_lock); + continue; + } + __iget(inode); + spin_unlock(&inode->i_lock); + spin_unlock(&inode_sb_list_lock); + /* + * We hold a reference to 'inode' so it couldn't have been + * removed from s_inodes list while we dropped the + * inode_sb_list_lock. We cannot iput the inode now as we can + * be holding the last reference and we cannot iput it under + * inode_sb_list_lock. So we keep the reference and iput it + * later. + */ + iput(old_inode); + old_inode = inode; + + __sync_blockdev(I_BDEV(inode), wait); + + spin_lock(&inode_sb_list_lock); + } + spin_unlock(&inode_sb_list_lock); + iput(old_inode); +} + /* * sync everything. Start out by waking pdflush, because that writes back * all queues in parallel. @@ -101,6 +139,8 @@ SYSCALL_DEFINE0(sync) wakeup_flusher_threads(0); sync_filesystems(0); sync_filesystems(1); + sync_all_bdevs(0); + sync_all_bdevs(1); if (unlikely(laptop_mode)) laptop_sync_completion(); return 0; -- 1.7.1