From: Hans Reiser This patch adds new operation to struct super_operations - sync_inodes, generic implementaion and changes fs-writeback.c:sync_sb_inodes() to call filesystem's sync_inodes if it is defined or generic implementaion otherwise. This new operation allows filesystem to decide itself what to flush. Reiser4 flushes dirty pages on basic of atoms, not of inodes. sync_sb_inodes used to call address space flushing method (writepages) for every dirty inode. For reiser4 it caused having to commit atoms unnecessarily often. This turned into substantial slowdown. Having this method helped to fix that problem. Also, make generic_sync_sb_inodes spin lock itself. It helps reiser4 to get rid of some oddities. sync_sb_inodes is always called like: spin_lock(&inode_lock); sync_sb_inodes(sb, wbc); spin_unlock(&inode_lock); This patch moves spin_lock/spin_unlock down to sync_sb_inodes. [deweerdt@free.fr: lockdep: unbalance at generic_sync_sb_inodes] Signed-off-by: Frederik Deweerdt Signed-off-by: Andrew Morton --- fs/fs-writeback.c | 31 ++++++++++++++++--------------- include/linux/fs.h | 3 +++ 2 files changed, 19 insertions(+), 15 deletions(-) --- linux-2.6.23-rc2-mm2.orig/fs/fs-writeback.c +++ linux-2.6.23-rc2-mm2/fs/fs-writeback.c @@ -375,8 +375,6 @@ __writeback_single_inode(struct inode *i * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so * that it can be located for waiting on in __writeback_single_inode(). * - * Called under inode_lock. - * * If `bdi' is non-zero then we're being asked to writeback a specific queue. * This function assumes that the blockdev superblock's inodes are backed by * a variety of queues, so all inodes are searched. For other superblocks, @@ -392,11 +390,13 @@ __writeback_single_inode(struct inode *i * on the writer throttling path, and we get decent balancing between many * throttled threads: we don't want them all piling up on inode_sync_wait. */ -static int -sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) +int generic_sync_sb_inodes(struct super_block *sb, + struct writeback_control *wbc) { int ret = 0; + spin_lock(&inode_lock); + if (!wbc->for_kupdate || list_empty(&sb->s_io)) queue_io(sb, wbc->older_than_this); @@ -474,9 +474,18 @@ sync_sb_inodes(struct super_block *sb, s if (list_empty(&sb->s_io)) list_splice_init(&sb->s_more_io, &sb->s_io); - + spin_unlock(&inode_lock); return ret; /* Leave any unwritten inodes on s_io */ } +EXPORT_SYMBOL(generic_sync_sb_inodes); + +static int sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) +{ + if (sb->s_op->sync_inodes) + return sb->s_op->sync_inodes(sb, wbc); + else + return generic_sync_sb_inodes(sb, wbc); +} /* * Start writeback of dirty pagecache data against all unlocked inodes. @@ -518,11 +527,7 @@ restart: */ if (down_read_trylock(&sb->s_umount)) { if (sb->s_root) { - int err; - - spin_lock(&inode_lock); - err = sync_sb_inodes(sb, wbc); - spin_unlock(&inode_lock); + int err = sync_sb_inodes(sb, wbc); if (!ret) ret = err; } @@ -559,16 +564,12 @@ int sync_inodes_sb(struct super_block *s }; unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); - int ret; wbc.nr_to_write = nr_dirty + nr_unstable + (inodes_stat.nr_inodes - inodes_stat.nr_unused) + nr_dirty + nr_unstable; wbc.nr_to_write += wbc.nr_to_write / 2; /* Bit more for luck */ - spin_lock(&inode_lock); - ret = sync_sb_inodes(sb, &wbc); - spin_unlock(&inode_lock); - return ret; + return sync_sb_inodes(sb, &wbc); } /* --- linux-2.6.23-rc2-mm2.orig/include/linux/fs.h +++ linux-2.6.23-rc2-mm2/include/linux/fs.h @@ -1260,6 +1260,8 @@ struct super_operations { void (*clear_inode) (struct inode *); void (*umount_begin) (struct vfsmount *, int); + int (*sync_inodes) (struct super_block *sb, + struct writeback_control *wbc); int (*show_options)(struct seq_file *, struct vfsmount *); int (*show_stats)(struct seq_file *, struct vfsmount *); #ifdef CONFIG_QUOTA @@ -1654,6 +1656,7 @@ extern int invalidate_inode_pages2(struc extern int invalidate_inode_pages2_range(struct address_space *mapping, pgoff_t start, pgoff_t end); extern int write_inode_now(struct inode *, int); +extern int generic_sync_sb_inodes(struct super_block *, struct writeback_control *); extern int filemap_fdatawrite(struct address_space *); extern int filemap_flush(struct address_space *); extern int filemap_fdatawait(struct address_space *);