linux-kernel - Re: linux-next: manual merge of the writeback tree with the vfs tree

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20110723144454.GB5946@localhost>
Date:	Sat, 23 Jul 2011 22:44:54 +0800
From:	Wu Fengguang <fengguang.wu@...el.com>
To:	Stephen Rothwell <sfr@...b.auug.org.au>
Cc:	"linux-next@...r.kernel.org" <linux-next@...r.kernel.org>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
	Dave Chinner <dchinner@...hat.com>,
	Christoph Hellwig <hch@...radead.org>,
	Al Viro <viro@...IV.linux.org.uk>
Subject: Re: linux-next: manual merge of the writeback tree with the vfs
 tree

Hi Stephen,

On Mon, Jul 18, 2011 at 03:34:09PM +0800, Stephen Rothwell wrote:
> Hi Wu,
> 
> Today's linux-next merge of the writeback tree got a conflict in
> fs/fs-writeback.c between commit dffe5a6c5214 ("superblock: move
> pin_sb_for_writeback() to fs/super.c") from the vfs tree and commits
> f758eeabeb96 ("writeback: split inode_wb_list_lock into
> bdi_writeback.list_lock") and d46db3d58233 ("writeback: make
> writeback_control.nr_to_write straight") from the writeback tree.
> 
> I fixed it up (I think - see below) and can carry the fix as necessary.

OK, please. Thank you very much!

Cheers,
Fengguang

> diff --cc fs/fs-writeback.c
> index b8c507c,6d49439..0000000
> --- a/fs/fs-writeback.c
> +++ b/fs/fs-writeback.c
> @@@ -460,6 -480,63 +480,37 @@@ writeback_single_inode(struct inode *in
>   	return ret;
>   }
>   
>  -/*
>  - * For background writeback the caller does not have the sb pinned
>  - * before calling writeback. So make sure that we do pin it, so it doesn't
>  - * go away while we are writing inodes from it.
>  - */
>  -static bool pin_sb_for_writeback(struct super_block *sb)
>  -{
>  -	spin_lock(&sb_lock);
>  -	if (list_empty(&sb->s_instances)) {
>  -		spin_unlock(&sb_lock);
>  -		return false;
>  -	}
>  -
>  -	sb->s_count++;
>  -	spin_unlock(&sb_lock);
>  -
>  -	if (down_read_trylock(&sb->s_umount)) {
>  -		if (sb->s_root)
>  -			return true;
>  -		up_read(&sb->s_umount);
>  -	}
>  -
>  -	put_super(sb);
>  -	return false;
>  -}
>  -
> + static long writeback_chunk_size(struct backing_dev_info *bdi,
> + 				 struct wb_writeback_work *work)
> + {
> + 	long pages;
> + 
> + 	/*
> + 	 * WB_SYNC_ALL mode does livelock avoidance by syncing dirty
> + 	 * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX
> + 	 * here avoids calling into writeback_inodes_wb() more than once.
> + 	 *
> + 	 * The intended call sequence for WB_SYNC_ALL writeback is:
> + 	 *
> + 	 *      wb_writeback()
> + 	 *          writeback_sb_inodes()       <== called only once
> + 	 *              write_cache_pages()     <== called once for each inode
> + 	 *                   (quickly) tag currently dirty pages
> + 	 *                   (maybe slowly) sync all tagged pages
> + 	 */
> + 	if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages)
> + 		pages = LONG_MAX;
> + 	else {
> + 		pages = min(bdi->avg_write_bandwidth / 2,
> + 			    global_dirty_limit / DIRTY_SCOPE);
> + 		pages = min(pages, work->nr_pages);
> + 		pages = round_down(pages + MIN_WRITEBACK_PAGES,
> + 				   MIN_WRITEBACK_PAGES);
> + 	}
> + 
> + 	return pages;
> + }
> + 
>   /*
>    * Write a portion of b_io inodes which belong to @sb.
>    *
> @@@ -559,40 -643,41 +617,41 @@@ static long __writeback_inodes_wb(struc
>   		struct inode *inode = wb_inode(wb->b_io.prev);
>   		struct super_block *sb = inode->i_sb;
>   
>  -		if (!pin_sb_for_writeback(sb)) {
>  +		if (!grab_super_passive(sb)) {
> - 			requeue_io(inode);
> + 			requeue_io(inode, wb);
>   			continue;
>   		}
> - 		ret = writeback_sb_inodes(sb, wb, wbc, false);
> + 		wrote += writeback_sb_inodes(sb, wb, work);
>   		drop_super(sb);
>   
> - 		if (ret)
> - 			break;
> + 		/* refer to the same tests at the end of writeback_sb_inodes */
> + 		if (wrote) {
> + 			if (time_is_before_jiffies(start_time + HZ / 10UL))
> + 				break;
> + 			if (work->nr_pages <= 0)
> + 				break;
> + 		}
>   	}
> - 	spin_unlock(&inode_wb_list_lock);
>   	/* Leave any unwritten inodes on b_io */
> + 	return wrote;
>   }
>   
> - static void __writeback_inodes_sb(struct super_block *sb,
> - 		struct bdi_writeback *wb, struct writeback_control *wbc)
> + long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages)
>   {
> - 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
> + 	struct wb_writeback_work work = {
> + 		.nr_pages	= nr_pages,
> + 		.sync_mode	= WB_SYNC_NONE,
> + 		.range_cyclic	= 1,
> + 	};
>   
> - 	spin_lock(&inode_wb_list_lock);
> - 	if (!wbc->for_kupdate || list_empty(&wb->b_io))
> - 		queue_io(wb, wbc->older_than_this);
> - 	writeback_sb_inodes(sb, wb, wbc, true);
> - 	spin_unlock(&inode_wb_list_lock);
> - }
> + 	spin_lock(&wb->list_lock);
> + 	if (list_empty(&wb->b_io))
> + 		queue_io(wb, NULL);
> + 	__writeback_inodes_wb(wb, &work);
> + 	spin_unlock(&wb->list_lock);
>   
> - /*
> -  * The maximum number of pages to writeout in a single bdi flush/kupdate
> -  * operation.  We do this so we don't hold I_SYNC against an inode for
> -  * enormous amounts of time, which would block a userspace task which has
> -  * been forced to throttle against that inode.  Also, the code reevaluates
> -  * the dirty each time it has written this many pages.
> -  */
> - #define MAX_WRITEBACK_PAGES     1024
> + 	return nr_pages - work.nr_pages;
> + }
>   
>   static inline bool over_bground_thresh(void)
>   {
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/