lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1243330430-9964-7-git-send-email-jens.axboe@oracle.com>
Date:	Tue, 26 May 2009 11:33:44 +0200
From:	Jens Axboe <jens.axboe@...cle.com>
To:	linux-kernel@...r.kernel.org, linux-fsdevel@...r.kernel.org
Cc:	chris.mason@...cle.com, david@...morbit.com, hch@...radead.org,
	akpm@...ux-foundation.org, jack@...e.cz,
	yanmin_zhang@...ux.intel.com, richard@....demon.co.uk,
	Jens Axboe <jens.axboe@...cle.com>
Subject: [PATCH 06/12] writeback: separate the flushing state/task from the bdi

Add a struct bdi_writeback for tracking and handling dirty IO. This
is in preparation for adding > 1 flusher task per bdi.

Signed-off-by: Jens Axboe <jens.axboe@...cle.com>
---
 fs/fs-writeback.c           |  145 ++++++++++++++++++++++++++----------------
 include/linux/backing-dev.h |   40 +++++++-----
 mm/backing-dev.c            |  128 ++++++++++++++++++++++++++++++--------
 3 files changed, 215 insertions(+), 98 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 7a558a6..e72db8b 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -46,9 +46,11 @@ int nr_pdflush_threads;
  * unless they implement their own.  Which is somewhat inefficient, as this
  * may prevent concurrent writeback against multiple devices.
  */
-static int writeback_acquire(struct backing_dev_info *bdi)
+static int writeback_acquire(struct bdi_writeback *wb)
 {
-	return !test_and_set_bit(BDI_pdflush, &bdi->state);
+	struct backing_dev_info *bdi = wb->bdi;
+
+	return !test_and_set_bit(wb->nr, &bdi->wb_active);
 }
 
 /**
@@ -59,19 +61,40 @@ static int writeback_acquire(struct backing_dev_info *bdi)
  */
 int writeback_in_progress(struct backing_dev_info *bdi)
 {
-	return test_bit(BDI_pdflush, &bdi->state);
+	return bdi->wb_active != 0;
 }
 
 /**
  * writeback_release - relinquish exclusive writeback access against a device.
  * @bdi: the device's backing_dev_info structure
  */
-static void writeback_release(struct backing_dev_info *bdi)
+static void writeback_release(struct bdi_writeback *wb)
 {
-	WARN_ON_ONCE(!writeback_in_progress(bdi));
-	bdi->wb_arg.nr_pages = 0;
-	bdi->wb_arg.sb = NULL;
-	clear_bit(BDI_pdflush, &bdi->state);
+	struct backing_dev_info *bdi = wb->bdi;
+
+	wb->nr_pages = 0;
+	wb->sb = NULL;
+	clear_bit(wb->nr, &bdi->wb_active);
+}
+
+static void wb_start_writeback(struct bdi_writeback *wb, struct super_block *sb,
+			       long nr_pages,
+			       enum writeback_sync_modes sync_mode)
+{
+	if (!wb_has_dirty_io(wb))
+		return;
+
+	if (writeback_acquire(wb)) {
+		wb->nr_pages = nr_pages;
+		wb->sb = sb;
+		wb->sync_mode = sync_mode;
+
+		/*
+		 * make above store seen before the task is woken
+		 */
+		smp_mb();
+		wake_up(&wb->wait);
+	}
 }
 
 int bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
@@ -81,22 +104,12 @@ int bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
 	 * This only happens the first time someone kicks this bdi, so put
 	 * it out-of-line.
 	 */
-	if (unlikely(!bdi->task)) {
+	if (unlikely(!bdi->wb.task)) {
 		bdi_add_default_flusher_task(bdi);
 		return 1;
 	}
 
-	if (writeback_acquire(bdi)) {
-		bdi->wb_arg.nr_pages = nr_pages;
-		bdi->wb_arg.sb = sb;
-		bdi->wb_arg.sync_mode = sync_mode;
-		/*
-		 * make above store seen before the task is woken
-		 */
-		smp_mb();
-		wake_up(&bdi->wait);
-	}
-
+	wb_start_writeback(&bdi->wb, sb, nr_pages, sync_mode);
 	return 0;
 }
 
@@ -124,12 +137,12 @@ int bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
  * older_than_this takes precedence over nr_to_write.  So we'll only write back
  * all dirty pages if they are all attached to "old" mappings.
  */
-static void bdi_kupdated(struct backing_dev_info *bdi)
+static void wb_kupdated(struct bdi_writeback *wb)
 {
 	unsigned long oldest_jif;
 	long nr_to_write;
 	struct writeback_control wbc = {
-		.bdi			= bdi,
+		.bdi			= wb->bdi,
 		.sync_mode		= WB_SYNC_NONE,
 		.older_than_this	= &oldest_jif,
 		.nr_to_write		= 0,
@@ -164,15 +177,19 @@ static inline bool over_bground_thresh(void)
 		global_page_state(NR_UNSTABLE_NFS) >= background_thresh);
 }
 
-static void bdi_pdflush(struct backing_dev_info *bdi)
+static void generic_sync_wb_inodes(struct bdi_writeback *wb,
+				   struct super_block *sb,
+				   struct writeback_control *wbc);
+
+static void wb_writeback(struct bdi_writeback *wb)
 {
 	struct writeback_control wbc = {
-		.bdi			= bdi,
-		.sync_mode		= bdi->wb_arg.sync_mode,
+		.bdi			= wb->bdi,
+		.sync_mode		= wb->sync_mode,
 		.older_than_this	= NULL,
 		.range_cyclic		= 1,
 	};
-	long nr_pages = bdi->wb_arg.nr_pages;
+	long nr_pages = wb->nr_pages;
 
 	for (;;) {
 		if (wbc.sync_mode == WB_SYNC_NONE && nr_pages <= 0 &&
@@ -183,7 +200,7 @@ static void bdi_pdflush(struct backing_dev_info *bdi)
 		wbc.encountered_congestion = 0;
 		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
 		wbc.pages_skipped = 0;
-		generic_sync_bdi_inodes(bdi->wb_arg.sb, &wbc);
+		generic_sync_wb_inodes(wb, wb->sb, &wbc);
 		nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
 		/*
 		 * If we ran out of stuff to write, bail unless more_io got set
@@ -200,13 +217,13 @@ static void bdi_pdflush(struct backing_dev_info *bdi)
  * Handle writeback of dirty data for the device backed by this bdi. Also
  * wakes up periodically and does kupdated style flushing.
  */
-int bdi_writeback_task(struct backing_dev_info *bdi)
+int bdi_writeback_task(struct bdi_writeback *wb)
 {
 	while (!kthread_should_stop()) {
 		unsigned long wait_jiffies;
 		DEFINE_WAIT(wait);
 
-		prepare_to_wait(&bdi->wait, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(&wb->wait, &wait, TASK_INTERRUPTIBLE);
 		wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
 		schedule_timeout(wait_jiffies);
 		try_to_freeze();
@@ -225,13 +242,13 @@ int bdi_writeback_task(struct backing_dev_info *bdi)
 		 *  pdflush style writeout.
 		 *
 		 */
-		if (writeback_acquire(bdi))
-			bdi_kupdated(bdi);
+		if (writeback_acquire(wb))
+			wb_kupdated(wb);
 		else
-			bdi_pdflush(bdi);
+			wb_writeback(wb);
 
-		writeback_release(bdi);
-		finish_wait(&bdi->wait, &wait);
+		writeback_release(wb);
+		finish_wait(&wb->wait, &wait);
 	}
 
 	return 0;
@@ -253,6 +270,14 @@ void bdi_writeback_all(struct super_block *sb, long nr_pages,
 	mutex_unlock(&bdi_lock);
 }
 
+/*
+ * We have only a single wb per bdi, so just return that.
+ */
+static inline struct bdi_writeback *inode_get_wb(struct inode *inode)
+{
+	return &inode_to_bdi(inode)->wb;
+}
+
 /**
  *	__mark_inode_dirty -	internal function
  *	@inode: inode to mark
@@ -351,9 +376,10 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 		 * reposition it (that would break b_dirty time-ordering).
 		 */
 		if (!was_dirty) {
+			struct bdi_writeback *wb = inode_get_wb(inode);
+
 			inode->dirtied_when = jiffies;
-			list_move(&inode->i_list,
-					&inode_to_bdi(inode)->b_dirty);
+			list_move(&inode->i_list, &wb->b_dirty);
 		}
 	}
 out:
@@ -380,16 +406,16 @@ static int write_inode(struct inode *inode, int sync)
  */
 static void redirty_tail(struct inode *inode)
 {
-	struct backing_dev_info *bdi = inode_to_bdi(inode);
+	struct bdi_writeback *wb = inode_get_wb(inode);
 
-	if (!list_empty(&bdi->b_dirty)) {
+	if (!list_empty(&wb->b_dirty)) {
 		struct inode *tail;
 
-		tail = list_entry(bdi->b_dirty.next, struct inode, i_list);
+		tail = list_entry(wb->b_dirty.next, struct inode, i_list);
 		if (time_before(inode->dirtied_when, tail->dirtied_when))
 			inode->dirtied_when = jiffies;
 	}
-	list_move(&inode->i_list, &bdi->b_dirty);
+	list_move(&inode->i_list, &wb->b_dirty);
 }
 
 /*
@@ -397,7 +423,9 @@ static void redirty_tail(struct inode *inode)
  */
 static void requeue_io(struct inode *inode)
 {
-	list_move(&inode->i_list, &inode_to_bdi(inode)->b_more_io);
+	struct bdi_writeback *wb = inode_get_wb(inode);
+
+	list_move(&inode->i_list, &wb->b_more_io);
 }
 
 static void inode_sync_complete(struct inode *inode)
@@ -444,11 +472,10 @@ static void move_expired_inodes(struct list_head *delaying_queue,
 /*
  * Queue all expired dirty inodes for io, eldest first.
  */
-static void queue_io(struct backing_dev_info *bdi,
-		     unsigned long *older_than_this)
+static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
 {
-	list_splice_init(&bdi->b_more_io, bdi->b_io.prev);
-	move_expired_inodes(&bdi->b_dirty, &bdi->b_io, older_than_this);
+	list_splice_init(&wb->b_more_io, wb->b_io.prev);
+	move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
 }
 
 /*
@@ -609,20 +636,20 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 	return __sync_single_inode(inode, wbc);
 }
 
-void generic_sync_bdi_inodes(struct super_block *sb,
-			     struct writeback_control *wbc)
+static void generic_sync_wb_inodes(struct bdi_writeback *wb,
+				   struct super_block *sb,
+				   struct writeback_control *wbc)
 {
 	const int is_blkdev_sb = sb_is_blkdev_sb(sb);
-	struct backing_dev_info *bdi = wbc->bdi;
 	const unsigned long start = jiffies;	/* livelock avoidance */
 
 	spin_lock(&inode_lock);
 
-	if (!wbc->for_kupdate || list_empty(&bdi->b_io))
-		queue_io(bdi, wbc->older_than_this);
+	if (!wbc->for_kupdate || list_empty(&wb->b_io))
+		queue_io(wb, wbc->older_than_this);
 
-	while (!list_empty(&bdi->b_io)) {
-		struct inode *inode = list_entry(bdi->b_io.prev,
+	while (!list_empty(&wb->b_io)) {
+		struct inode *inode = list_entry(wb->b_io.prev,
 						struct inode, i_list);
 		long pages_skipped;
 
@@ -634,7 +661,7 @@ void generic_sync_bdi_inodes(struct super_block *sb,
 			continue;
 		}
 
-		if (!bdi_cap_writeback_dirty(bdi)) {
+		if (!bdi_cap_writeback_dirty(wb->bdi)) {
 			redirty_tail(inode);
 			if (is_blkdev_sb) {
 				/*
@@ -656,7 +683,7 @@ void generic_sync_bdi_inodes(struct super_block *sb,
 			continue;
 		}
 
-		if (wbc->nonblocking && bdi_write_congested(bdi)) {
+		if (wbc->nonblocking && bdi_write_congested(wb->bdi)) {
 			wbc->encountered_congestion = 1;
 			if (!is_blkdev_sb)
 				break;		/* Skip a congested fs */
@@ -690,7 +717,7 @@ void generic_sync_bdi_inodes(struct super_block *sb,
 			wbc->more_io = 1;
 			break;
 		}
-		if (!list_empty(&bdi->b_more_io))
+		if (!list_empty(&wb->b_more_io))
 			wbc->more_io = 1;
 	}
 
@@ -698,6 +725,14 @@ void generic_sync_bdi_inodes(struct super_block *sb,
 	/* Leave any unwritten inodes on b_io */
 }
 
+void generic_sync_bdi_inodes(struct super_block *sb,
+			     struct writeback_control *wbc)
+{
+	struct backing_dev_info *bdi = wbc->bdi;
+
+	generic_sync_wb_inodes(&bdi->wb, sb, wbc);
+}
+
 /*
  * Write out a superblock's list of dirty inodes.  A wait will be performed
  * upon no inodes, all inodes or the final one, depending upon sync_mode.
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index f164925..77dc62c 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -24,8 +24,8 @@ struct dentry;
  * Bits in backing_dev_info.state
  */
 enum bdi_state {
-	BDI_pdflush,		/* A pdflush thread is working this device */
 	BDI_pending,		/* On its way to being activated */
+	BDI_wb_alloc,		/* Default embedded wb allocated */
 	BDI_async_congested,	/* The async (write) queue is getting full */
 	BDI_sync_congested,	/* The sync queue is getting full */
 	BDI_unused,		/* Available bits start here */
@@ -41,15 +41,23 @@ enum bdi_stat_item {
 
 #define BDI_STAT_BATCH (8*(1+ilog2(nr_cpu_ids)))
 
-struct bdi_writeback_arg {
-	unsigned long nr_pages;
-	struct super_block *sb;
+struct bdi_writeback {
+	struct backing_dev_info *bdi;		/* our parent bdi */
+	unsigned int nr;
+
+	struct task_struct	*task;		/* writeback task */
+	wait_queue_head_t	wait;
+	struct list_head	b_dirty;	/* dirty inodes */
+	struct list_head	b_io;		/* parked for writeback */
+	struct list_head	b_more_io;	/* parked for more writeback */
+
+	unsigned long		nr_pages;
+	struct super_block	*sb;
 	enum writeback_sync_modes sync_mode;
 };
 
 struct backing_dev_info {
 	struct list_head bdi_list;
-
 	unsigned long ra_pages;	/* max readahead in PAGE_CACHE_SIZE units */
 	unsigned long state;	/* Always use atomic bitops on this */
 	unsigned int capabilities; /* Device capabilities */
@@ -66,14 +74,11 @@ struct backing_dev_info {
 	unsigned int min_ratio;
 	unsigned int max_ratio, max_prop_frac;
 
-	struct device *dev;
+	struct bdi_writeback wb;  /* default writeback info for this bdi */
+	unsigned long wb_active;  /* bitmap of active tasks */
+	unsigned long wb_mask;	  /* number of registered tasks */
 
-	struct task_struct	*task;		/* writeback task */
-	wait_queue_head_t	wait;
-	struct bdi_writeback_arg wb_arg;	/* protected by BDI_pdflush */
-	struct list_head	b_dirty;	/* dirty inodes */
-	struct list_head	b_io;		/* parked for writeback */
-	struct list_head	b_more_io;	/* parked for more writeback */
+	struct device *dev;
 
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *debug_dir;
@@ -90,19 +95,20 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
 void bdi_unregister(struct backing_dev_info *bdi);
 int bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
 			 long nr_pages, enum writeback_sync_modes sync_mode);
-int bdi_writeback_task(struct backing_dev_info *bdi);
+int bdi_writeback_task(struct bdi_writeback *wb);
 void bdi_writeback_all(struct super_block *sb, long nr_pages,
 			enum writeback_sync_modes sync_mode);
 void bdi_add_default_flusher_task(struct backing_dev_info *bdi);
+int bdi_has_dirty_io(struct backing_dev_info *bdi);
 
 extern struct mutex bdi_lock;
 extern struct list_head bdi_list;
 
-static inline int bdi_has_dirty_io(struct backing_dev_info *bdi)
+static inline int wb_has_dirty_io(struct bdi_writeback *wb)
 {
-	return !list_empty(&bdi->b_dirty) ||
-	       !list_empty(&bdi->b_io) ||
-	       !list_empty(&bdi->b_more_io);
+	return !list_empty(&wb->b_dirty) ||
+	       !list_empty(&wb->b_io) ||
+	       !list_empty(&wb->b_more_io);
 }
 
 static inline void __add_bdi_stat(struct backing_dev_info *bdi,
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index bae3d4f..c8201f0 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -199,10 +199,46 @@ static int __init default_bdi_init(void)
 }
 subsys_initcall(default_bdi_init);
 
+static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
+{
+	memset(wb, 0, sizeof(*wb));
+
+	wb->bdi = bdi;
+	init_waitqueue_head(&wb->wait);
+	INIT_LIST_HEAD(&wb->b_dirty);
+	INIT_LIST_HEAD(&wb->b_io);
+	INIT_LIST_HEAD(&wb->b_more_io);
+}
+
+static int wb_assign_nr(struct backing_dev_info *bdi, struct bdi_writeback *wb)
+{
+	set_bit(0, &bdi->wb_mask);
+	wb->nr = 0;
+	return 0;
+}
+
+static void bdi_put_wb(struct backing_dev_info *bdi, struct bdi_writeback *wb)
+{
+	clear_bit(wb->nr, &bdi->wb_mask);
+	clear_bit(BDI_wb_alloc, &bdi->state);
+}
+
+static struct bdi_writeback *bdi_new_wb(struct backing_dev_info *bdi)
+{
+	struct bdi_writeback *wb;
+
+	set_bit(BDI_wb_alloc, &bdi->state);
+	wb = &bdi->wb;
+	wb_assign_nr(bdi, wb);
+	return wb;
+}
+
 static int bdi_start_fn(void *ptr)
 {
-	struct backing_dev_info *bdi = ptr;
+	struct bdi_writeback *wb = ptr;
+	struct backing_dev_info *bdi = wb->bdi;
 	struct task_struct *tsk = current;
+	int ret;
 
 	/*
 	 * Add us to the active bdi_list
@@ -226,7 +262,15 @@ static int bdi_start_fn(void *ptr)
 	smp_mb__after_clear_bit();
 	wake_up_bit(&bdi->state, BDI_pending);
 
-	return bdi_writeback_task(bdi);
+	ret = bdi_writeback_task(wb);
+
+	bdi_put_wb(bdi, wb);
+	return ret;
+}
+
+int bdi_has_dirty_io(struct backing_dev_info *bdi)
+{
+	return wb_has_dirty_io(&bdi->wb);
 }
 
 static void bdi_flush_io(struct backing_dev_info *bdi)
@@ -244,11 +288,12 @@ static void bdi_flush_io(struct backing_dev_info *bdi)
 
 static int bdi_forker_task(void *ptr)
 {
-	struct backing_dev_info *me = ptr;
+	struct bdi_writeback *me = ptr;
 	DEFINE_WAIT(wait);
 
 	for (;;) {
 		struct backing_dev_info *bdi, *tmp;
+		struct bdi_writeback *wb;
 
 		/*
 		 * Do this periodically, like kupdated() did before.
@@ -259,8 +304,8 @@ static int bdi_forker_task(void *ptr)
 		 * Temporary measure, we want to make sure we don't see
 		 * dirty data on the default backing_dev_info
 		 */
-		if (bdi_has_dirty_io(me))
-			bdi_flush_io(me);
+		if (wb_has_dirty_io(me))
+			bdi_flush_io(me->bdi);
 
 		prepare_to_wait(&me->wait, &wait, TASK_INTERRUPTIBLE);
 
@@ -271,7 +316,7 @@ static int bdi_forker_task(void *ptr)
 		 * a thread registered. If so, set that up.
 		 */
 		list_for_each_entry_safe(bdi, tmp, &bdi_list, bdi_list) {
-			if (bdi->task || !bdi_has_dirty_io(bdi))
+			if (bdi->wb.task || !bdi_has_dirty_io(bdi))
 				continue;
 
 			bdi_add_default_flusher_task(bdi);
@@ -292,17 +337,22 @@ static int bdi_forker_task(void *ptr)
 		list_del_init(&bdi->bdi_list);
 		mutex_unlock(&bdi_lock);
 
-		BUG_ON(bdi->task);
+		wb = bdi_new_wb(bdi);
+		if (!wb)
+			goto readd_flush;
 
-		bdi->task = kthread_run(bdi_start_fn, bdi, "bdi-%s",
+		wb->task = kthread_run(bdi_start_fn, wb, "bdi-%s",
 					dev_name(bdi->dev));
+
 		/*
 		 * If task creation fails, then readd the bdi to
 		 * the pending list and force writeout of the bdi
 		 * from this forker thread. That will free some memory
 		 * and we can try again.
 		 */
-		if (!bdi->task) {
+		if (!wb->task) {
+			bdi_put_wb(bdi, wb);
+readd_flush:
 			/*
 			 * Add this 'bdi' to the back, so we get
 			 * a chance to flush other bdi's to free
@@ -320,8 +370,18 @@ static int bdi_forker_task(void *ptr)
 	return 0;
 }
 
+/*
+ * Add a new flusher task that gets created for any bdi
+ * that has dirty data pending writeout
+ */
 void bdi_add_default_flusher_task(struct backing_dev_info *bdi)
 {
+	if (!bdi_cap_writeback_dirty(bdi))
+		return;
+
+	/*
+	 * Someone already marked this pending for task creation
+	 */
 	if (test_and_set_bit(BDI_pending, &bdi->state))
 		return;
 
@@ -329,7 +389,7 @@ void bdi_add_default_flusher_task(struct backing_dev_info *bdi)
 	list_move_tail(&bdi->bdi_list, &bdi_pending_list);
 	mutex_unlock(&bdi_lock);
 
-	wake_up(&default_backing_dev_info.wait);
+	wake_up(&default_backing_dev_info.wb.wait);
 }
 
 int bdi_register(struct backing_dev_info *bdi, struct device *parent,
@@ -362,13 +422,23 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 	 * on-demand when they need it.
 	 */
 	if (bdi_cap_flush_forker(bdi)) {
-		bdi->task = kthread_run(bdi_forker_task, bdi, "bdi-%s",
+		struct bdi_writeback *wb;
+
+		wb = bdi_new_wb(bdi);
+		if (!wb) {
+			ret = -ENOMEM;
+			goto remove_err;
+		}
+
+		wb->task = kthread_run(bdi_forker_task, wb, "bdi-%s",
 						dev_name(dev));
-		if (!bdi->task) {
+		if (!wb->task) {
+			bdi_put_wb(bdi, wb);
+			ret = -ENOMEM;
+remove_err:
 			mutex_lock(&bdi_lock);
 			list_del(&bdi->bdi_list);
 			mutex_unlock(&bdi_lock);
-			ret = -ENOMEM;
 			goto exit;
 		}
 	}
@@ -391,28 +461,37 @@ static int sched_wait(void *word)
 	return 0;
 }
 
+/*
+ * Remove bdi from global list and shutdown any threads we have running
+ */
 static void bdi_wb_shutdown(struct backing_dev_info *bdi)
 {
+	if (!bdi_cap_writeback_dirty(bdi))
+		return;
+
 	/*
 	 * If setup is pending, wait for that to complete first
 	 */
 	wait_on_bit(&bdi->state, BDI_pending, sched_wait, TASK_UNINTERRUPTIBLE);
 
+	/*
+	 * Make sure nobody finds us on the bdi_list anymore
+	 */
 	mutex_lock(&bdi_lock);
 	list_del(&bdi->bdi_list);
 	mutex_unlock(&bdi_lock);
+
+	/*
+	 * Finally, kill the kernel thread
+	 */
+	kthread_stop(bdi->wb.task);
 }
 
 void bdi_unregister(struct backing_dev_info *bdi)
 {
 	if (bdi->dev) {
-		if (!bdi_cap_flush_forker(bdi)) {
+		if (!bdi_cap_flush_forker(bdi))
 			bdi_wb_shutdown(bdi);
-			if (bdi->task) {
-				kthread_stop(bdi->task);
-				bdi->task = NULL;
-			}
-		}
 		bdi_debug_unregister(bdi);
 		device_unregister(bdi->dev);
 		bdi->dev = NULL;
@@ -429,11 +508,10 @@ int bdi_init(struct backing_dev_info *bdi)
 	bdi->min_ratio = 0;
 	bdi->max_ratio = 100;
 	bdi->max_prop_frac = PROP_FRAC_BASE;
-	init_waitqueue_head(&bdi->wait);
 	INIT_LIST_HEAD(&bdi->bdi_list);
-	INIT_LIST_HEAD(&bdi->b_io);
-	INIT_LIST_HEAD(&bdi->b_dirty);
-	INIT_LIST_HEAD(&bdi->b_more_io);
+	bdi->wb_mask = bdi->wb_active = 0;
+
+	bdi_wb_init(&bdi->wb, bdi);
 
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
 		err = percpu_counter_init(&bdi->bdi_stat[i], 0);
@@ -458,9 +536,7 @@ void bdi_destroy(struct backing_dev_info *bdi)
 {
 	int i;
 
-	WARN_ON(!list_empty(&bdi->b_dirty));
-	WARN_ON(!list_empty(&bdi->b_io));
-	WARN_ON(!list_empty(&bdi->b_more_io));
+	WARN_ON(bdi_has_dirty_io(bdi));
 
 	bdi_unregister(bdi);
 
-- 
1.6.3.rc0.1.gf800

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ