linux-kernel - Re: merging the per-bdi writeback patchset

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 23 Jun 2009 14:20:09 +0200
From:	Jens Axboe <jens.axboe@...cle.com>
To:	Christoph Hellwig <hch@...radead.org>
Cc:	Linus Torvalds <torvalds@...ux-foundation.org>,
	Linux Kernel <linux-kernel@...r.kernel.org>,
	akpm@...ux-foundation.org
Subject: Re: merging the per-bdi writeback patchset

On Tue, Jun 23 2009, Jens Axboe wrote:
> On Tue, Jun 23 2009, Christoph Hellwig wrote:
> > On Tue, Jun 23, 2009 at 01:12:10PM +0200, Jens Axboe wrote:
> > > > Last time we discussed this you said you're happy with 2.6.32.  I really
> > > > want to take a more detailed look and put that on the not so urgent list
> > > > because ou didn't seem to rush for .31.  So my vote goes for waiting a
> > > > bit longer.
> > > 
> > > Yeah, 2.6.32 works for me too, .31 would have been nice though so I
> > > don't have to carry it around anymore. But either is fine, if you and
> > > Andrew want more time to review this stuff, then lets just settle for
> > > .32.
> > 
> > Yes, I'd really prefer more time.  I also expect to come up with some
> > more changes in that area.  Your patch makes the differences between
> > kupdate and pdflush-stye writeback look even more ugly then it already
> > is, so I want to see i there's some nicer way to handle it.  I also want
> 
> Good point, should be easy enough to fold the two together.

Something like the below, untested except for checking that it compiles.
So this doesn't dual-path the two reasons we wake up to flush data
anymore, rather it just flushes kupdated style if we didn't have any
work to do (or did any work).

 fs-writeback.c |  111 +++++++++++++++----------------------------------
 1 file changed, 36 insertions(+), 75 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d589db3..c5996e5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -269,8 +269,18 @@ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
  */
 #define MAX_WRITEBACK_PAGES     1024
 
+static inline bool over_bground_thresh(void)
+{
+	unsigned long background_thresh, dirty_thresh;
+
+	get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
+
+	return (global_page_state(NR_FILE_DIRTY) +
+		global_page_state(NR_UNSTABLE_NFS) >= background_thresh);
+}
+
 /*
- * Periodic writeback of "old" data.
+ * Retrieve work items queued, or perform periodic writeback of "old" data.
  *
  * Define "old": the first time one of an inode's pages is dirtied, we mark the
  * dirtying-time in the inode's address_space.  So this periodic writeback code
@@ -284,61 +294,26 @@ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
  * older_than_this takes precedence over nr_to_write.  So we'll only write back
  * all dirty pages if they are all attached to "old" mappings.
  */
-static long wb_kupdated(struct bdi_writeback *wb)
-{
-	unsigned long oldest_jif;
-	long nr_to_write, wrote = 0;
-	struct writeback_control wbc = {
-		.bdi			= wb->bdi,
-		.sync_mode		= WB_SYNC_NONE,
-		.older_than_this	= &oldest_jif,
-		.nr_to_write		= 0,
-		.for_kupdate		= 1,
-		.range_cyclic		= 1,
-	};
-
-	oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval * 10);
-
-	nr_to_write = global_page_state(NR_FILE_DIRTY) +
-			global_page_state(NR_UNSTABLE_NFS) +
-			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
-
-	while (nr_to_write > 0) {
-		wbc.more_io = 0;
-		wbc.encountered_congestion = 0;
-		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
-		generic_sync_wb_inodes(wb, NULL, &wbc);
-		wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
-		if (wbc.nr_to_write > 0)
-			break;	/* All the old data is written */
-		nr_to_write -= MAX_WRITEBACK_PAGES;
-	}
-
-	return wrote;
-}
-
-static inline bool over_bground_thresh(void)
-{
-	unsigned long background_thresh, dirty_thresh;
-
-	get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
-
-	return (global_page_state(NR_FILE_DIRTY) +
-		global_page_state(NR_UNSTABLE_NFS) >= background_thresh);
-}
-
-static long __wb_writeback(struct bdi_writeback *wb, long nr_pages,
-			   struct super_block *sb,
-			   enum writeback_sync_modes sync_mode)
+static long wb_writeback(struct bdi_writeback *wb, long nr_pages,
+			 struct super_block *sb,
+			 enum writeback_sync_modes sync_mode, int for_kupdate)
 {
 	struct writeback_control wbc = {
 		.bdi			= wb->bdi,
 		.sync_mode		= sync_mode,
 		.older_than_this	= NULL,
+		.for_kupdate		= for_kupdate,
 		.range_cyclic		= 1,
 	};
+	unsigned long oldest_jif;
 	long wrote = 0;
 
+	if (wbc.for_kupdate) {
+		wbc.older_than_this = &oldest_jif;
+		oldest_jif = jiffies -
+				msecs_to_jiffies(dirty_expire_interval * 10);
+	}
+
 	for (;;) {
 		if (sync_mode == WB_SYNC_NONE && nr_pages <= 0 &&
 		    !over_bground_thresh())
@@ -355,7 +330,7 @@ static long __wb_writeback(struct bdi_writeback *wb, long nr_pages,
 		 * If we ran out of stuff to write, bail unless more_io got set
 		 */
 		if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
-			if (wbc.more_io)
+			if (wbc.more_io && !wbc.for_kupdate)
 				continue;
 			break;
 		}
@@ -390,17 +365,18 @@ static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi,
 /*
  * Retrieve work items and do the writeback they describe
  */
-static long wb_writeback(struct bdi_writeback *wb, int force_wait)
+long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
 {
 	struct backing_dev_info *bdi = wb->bdi;
 	struct bdi_work *work;
-	long wrote = 0;
+	long nr_pages, wrote = 0;
 
 	while ((work = get_next_work_item(bdi, wb)) != NULL) {
 		struct super_block *sb = bdi_work_sb(work);
-		long nr_pages = work->nr_pages;
 		enum writeback_sync_modes sync_mode;
 
+		nr_pages = work->nr_pages;
+
 		/*
 		 * Override sync mode, in case we must wait for completion
 		 */
@@ -416,7 +392,7 @@ static long wb_writeback(struct bdi_writeback *wb, int force_wait)
 		if (sync_mode == WB_SYNC_NONE)
 			wb_clear_pending(wb, work);
 
-		wrote += __wb_writeback(wb, nr_pages, sb, sync_mode);
+		wrote += wb_writeback(wb, nr_pages, sb, sync_mode, 0);
 
 		/*
 		 * This is a data integrity writeback, so only do the
@@ -426,31 +402,16 @@ static long wb_writeback(struct bdi_writeback *wb, int force_wait)
 			wb_clear_pending(wb, work);
 	}
 
-	return wrote;
-}
-
-/*
- * This will be inlined in bdi_writeback_task() once we get rid of any
- * dirty inodes on the default_backing_dev_info
- */
-long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
-{
-	long wrote;
-
 	/*
-	 * We get here in two cases:
-	 *
-	 *  schedule_timeout() returned because the dirty writeback
-	 *  interval has elapsed. If that happens, the work item list
-	 *  will be empty and we will proceed to do kupdated style writeout.
-	 *
-	 *  Someone called bdi_start_writeback(), which put one/more work
-	 *  items on the work_list. Process those.
+	 * Check for periodic writeback, kupdated() style
 	 */
-	if (list_empty(&wb->bdi->work_list))
-		wrote = wb_kupdated(wb);
-	else
-		wrote = wb_writeback(wb, force_wait);
+	if (!wrote) {
+		nr_pages = global_page_state(NR_FILE_DIRTY) +
+				global_page_state(NR_UNSTABLE_NFS) +
+				(inodes_stat.nr_inodes - inodes_stat.nr_unused);
+
+		wrote = wb_writeback(wb, nr_pages, NULL, WB_SYNC_NONE, 1);
+	}
 
 	return wrote;
 }

-- 
Jens Axboe

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/