netdev - Re: INFO: task hung in io_wq

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <09cdf1d6-4660-9712-e374-4bbb120d6858@kernel.dk>
Date:   Tue, 19 Nov 2019 08:20:58 -0700
From:   Jens Axboe <axboe@...nel.dk>
To:     Eric Biggers <ebiggers@...nel.org>
Cc:     syzbot <syzbot+0f1cc17f85154f400465@...kaller.appspotmail.com>,
        andriy.shevchenko@...ux.intel.com, davem@...emloft.net,
        f.fainelli@...il.com, gregkh@...uxfoundation.org,
        idosch@...lanox.com, kimbrownkd@...il.com,
        linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org,
        netdev@...r.kernel.org, petrm@...lanox.com,
        syzkaller-bugs@...glegroups.com, tglx@...utronix.de,
        viro@...iv.linux.org.uk, wanghai26@...wei.com,
        yuehaibing@...wei.com
Subject: Re: INFO: task hung in io_wq_destroy

On 11/18/19 9:34 PM, Jens Axboe wrote:
> On 11/18/19 8:15 PM, Jens Axboe wrote:
>> On 11/18/19 7:23 PM, Eric Biggers wrote:
>>> Hi Jens,
>>>
>>> On Mon, Oct 28, 2019 at 03:00:08PM -0600, Jens Axboe wrote:
>>>> This is fixed in my for-next branch for a few days at least, unfortunately
>>>> linux-next is still on the old one. Next version should be better.
>>>
>>> This is still occurring on linux-next.  Here's a report on next-20191115 from
>>> https://syzkaller.appspot.com/text?tag=CrashReport&x=16fa3d1ce00000
>>
>> Hmm, I'll take a look. Looking at the reproducer, it's got a massive
>> sleep at the end. I take it this triggers before that time actually
>> passes? Because that's around 11.5 days of sleep.
>>
>> No luck reproducing this so far, I'll try on linux-next.
> 
> I see what it is - if the io-wq is setup and torn down before the
> manager thread is started, then we won't create the workers we already
> expected. The manager thread will exit without doing anything, but
> teardown will wait for the expected workers to exit before being
> allowed to proceed. That never happens.
> 
> I've got a patch for this, but I'll test it a bit and send it out
> tomorrow.

This should fix it - wait until the manager is started and has created
the required fixed workers, then check if it failed or not. That closes
the gap between startup and teardown, as we have settled things before
anyone is allowed to call io_wq_destroy().


diff --git a/fs/io-wq.c b/fs/io-wq.c
index 9174007ce107..1f640c489f7c 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -33,6 +33,7 @@ enum {
 enum {
 	IO_WQ_BIT_EXIT		= 0,	/* wq exiting */
 	IO_WQ_BIT_CANCEL	= 1,	/* cancel work on list */
+	IO_WQ_BIT_ERROR		= 2,	/* error on setup */
 };
 
 enum {
@@ -562,14 +563,14 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
 	spin_unlock_irq(&wqe->lock);
 }
 
-static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
+static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
 {
 	struct io_wqe_acct *acct =&wqe->acct[index];
 	struct io_worker *worker;
 
 	worker = kcalloc_node(1, sizeof(*worker), GFP_KERNEL, wqe->node);
 	if (!worker)
-		return;
+		return false;
 
 	refcount_set(&worker->ref, 1);
 	worker->nulls_node.pprev = NULL;
@@ -581,7 +582,7 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
 				"io_wqe_worker-%d/%d", index, wqe->node);
 	if (IS_ERR(worker->task)) {
 		kfree(worker);
-		return;
+		return false;
 	}
 
 	spin_lock_irq(&wqe->lock);
@@ -599,6 +600,7 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
 		atomic_inc(&wq->user->processes);
 
 	wake_up_process(worker->task);
+	return true;
 }
 
 static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index)
@@ -606,9 +608,6 @@ static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index)
 {
 	struct io_wqe_acct *acct = &wqe->acct[index];
 
-	/* always ensure we have one bounded worker */
-	if (index == IO_WQ_ACCT_BOUND && !acct->nr_workers)
-		return true;
 	/* if we have available workers or no work, no need */
 	if (!hlist_nulls_empty(&wqe->free_list) || !io_wqe_run_queue(wqe))
 		return false;
@@ -621,10 +620,19 @@ static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index)
 static int io_wq_manager(void *data)
 {
 	struct io_wq *wq = data;
+	int i;
 
-	while (!kthread_should_stop()) {
-		int i;
+	/* create fixed workers */
+	for (i = 0; i < wq->nr_wqes; i++) {
+		if (create_io_worker(wq, wq->wqes[i], IO_WQ_ACCT_BOUND))
+			continue;
+		goto err;
+	}
 
+	refcount_set(&wq->refs, wq->nr_wqes);
+	complete(&wq->done);
+
+	while (!kthread_should_stop()) {
 		for (i = 0; i < wq->nr_wqes; i++) {
 			struct io_wqe *wqe = wq->wqes[i];
 			bool fork_worker[2] = { false, false };
@@ -644,6 +652,10 @@ static int io_wq_manager(void *data)
 		schedule_timeout(HZ);
 	}
 
+	return 0;
+err:
+	set_bit(IO_WQ_BIT_ERROR, &wq->state);
+	complete(&wq->done);
 	return 0;
 }
 
@@ -982,7 +994,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct mm_struct *mm,
 	wq->user = user;
 
 	i = 0;
-	refcount_set(&wq->refs, wq->nr_wqes);
 	for_each_online_node(node) {
 		struct io_wqe *wqe;
 
@@ -1020,6 +1031,10 @@ struct io_wq *io_wq_create(unsigned bounded, struct mm_struct *mm,
 	wq->manager = kthread_create(io_wq_manager, wq, "io_wq_manager");
 	if (!IS_ERR(wq->manager)) {
 		wake_up_process(wq->manager);
+		wait_for_completion(&wq->done);
+		if (test_bit(IO_WQ_BIT_ERROR, &wq->state))
+			goto err;
+		reinit_completion(&wq->done);
 		return wq;
 	}
 
@@ -1041,10 +1056,9 @@ void io_wq_destroy(struct io_wq *wq)
 {
 	int i;
 
-	if (wq->manager) {
-		set_bit(IO_WQ_BIT_EXIT, &wq->state);
+	set_bit(IO_WQ_BIT_EXIT, &wq->state);
+	if (wq->manager)
 		kthread_stop(wq->manager);
-	}
 
 	rcu_read_lock();
 	for (i = 0; i < wq->nr_wqes; i++) {

-- 
Jens Axboe