>From f5038c6e7a3d1a4a91879187b92ede8c868988ac Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 11 Jun 2018 10:56:04 +0200
Subject: [PATCH] bdi: Fix another oops in wb_workfn()

syzbot is reporting NULL pointer dereference at wb_workfn() [1] due to
wb->bdi->dev being NULL. And Dmitry confirmed that wb->state was
WB_shutting_down after wb->bdi->dev became NULL. This indicates that
unregister_bdi() failed to call wb_shutdown() on one of wb objects.

The problem is in cgwb_bdi_unregister() which does cgwb_kill() and thus
drops bdi's reference to wb structures before going through the list of
wbs again and calling wb_shutdown() on each of them. This way the loop
iterating through all wbs can easily miss a wb if that wb has already
passed through cgwb_remove_from_bdi_list() called from wb_shutdown()
from cgwb_release_workfn() and as a result fully shutdown bdi although
wb_workfn() for this wb structure is still running. In fact there are
also other ways cgwb_bdi_unregister() can race with
cgwb_release_workfn() leading e.g. to use-after-free issues:

CPU1                            CPU2
                                cgwb_bdi_unregister()
                                  cgwb_kill(*slot);

cgwb_release()
  queue_work(cgwb_release_wq, &wb->release_work);
cgwb_release_workfn()
                                  wb = list_first_entry(&bdi->wb_list, ...)
                                  spin_unlock_irq(&cgwb_lock);
  wb_shutdown(wb);
  ...
  kfree_rcu(wb, rcu);
                                  wb_shutdown(wb); -> oops use-after-free

We solve all these issues by making cgwb_bdi_unregister() wait for
shutdown of all wb structures instead of going through them and trying
to actively shut them down.

[1] https://syzkaller.appspot.com/bug?id=e0818ccb7e46190b3f1038b0c794299208ed4206

Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Tejun Heo <tj@kernel.org>
Reported-and-analyzed-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reported-by: syzbot <syzbot+4a7438e774b21ddd8eca@syzkaller.appspotmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 mm/backing-dev.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 347cc834c04a..39fa5f4ddd5c 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -370,7 +370,6 @@ static void wb_shutdown(struct bdi_writeback *wb)
 	set_bit(WB_shutting_down, &wb->state);
 	spin_unlock_bh(&wb->work_lock);
 
-	cgwb_remove_from_bdi_list(wb);
 	/*
 	 * Drain work list and shutdown the delayed_work.  !WB_registered
 	 * tells wb_workfn() that @wb is dying and its work_list needs to
@@ -379,6 +378,7 @@ static void wb_shutdown(struct bdi_writeback *wb)
 	mod_delayed_work(bdi_wq, &wb->dwork, 0);
 	flush_delayed_work(&wb->dwork);
 	WARN_ON(!list_empty(&wb->work_list));
+	cgwb_remove_from_bdi_list(wb);
 	/*
 	 * Make sure bit gets cleared after shutdown is finished. Matches with
 	 * the barrier provided by test_and_clear_bit() above.
@@ -541,6 +541,9 @@ static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb)
 	spin_lock_irq(&cgwb_lock);
 	list_del_rcu(&wb->bdi_node);
 	spin_unlock_irq(&cgwb_lock);
+	/* Last wb of the bdi? Wake up waiters for shutdown of all wbs. */
+	if (list_empty(&wb->bdi->wb_list))
+		wake_up_all(&wb->bdi->wb_waitq);
 }
 
 static int cgwb_create(struct backing_dev_info *bdi,
@@ -710,22 +713,16 @@ static void cgwb_bdi_unregister(struct backing_dev_info *bdi)
 {
 	struct radix_tree_iter iter;
 	void **slot;
-	struct bdi_writeback *wb;
 
 	WARN_ON(test_bit(WB_registered, &bdi->wb.state));
 
 	spin_lock_irq(&cgwb_lock);
 	radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0)
 		cgwb_kill(*slot);
-
-	while (!list_empty(&bdi->wb_list)) {
-		wb = list_first_entry(&bdi->wb_list, struct bdi_writeback,
-				      bdi_node);
-		spin_unlock_irq(&cgwb_lock);
-		wb_shutdown(wb);
-		spin_lock_irq(&cgwb_lock);
-	}
 	spin_unlock_irq(&cgwb_lock);
+
+	/* Wait for all writeback structures to shutdown */
+	wait_event(bdi->wb_waitq, list_empty(&bdi->wb_list));
 }
 
 /**
-- 
2.13.7