[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4020304.1732897192@warthog.procyon.org.uk>
Date: Fri, 29 Nov 2024 16:19:52 +0000
From: David Howells <dhowells@...hat.com>
To: syzbot <syzbot+8965fea6a159ab9aa32d@...kaller.appspotmail.com>
Cc: dhowells@...hat.com, asmadeus@...ewreck.org, bharathsm@...rosoft.com,
brauner@...nel.org, ericvh@...nel.org, jlayton@...nel.org,
linux-afs@...ts.infradead.org, linux-cifs@...r.kernel.org,
linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org,
linux-trace-kernel@...r.kernel.org, linux_oss@...debyte.com,
lucho@...kov.net, marc.dionne@...istor.com,
mathieu.desnoyers@...icios.com, mhiramat@...nel.org,
netfs@...ts.linux.dev, pc@...guebit.com, ronniesahlberg@...il.com,
rostedt@...dmis.org, samba-technical@...ts.samba.org,
sfrench@...ba.org, sprasad@...rosoft.com,
syzkaller-bugs@...glegroups.com, tom@...pey.com,
v9fs@...ts.linux.dev
Subject: Re: [syzbot] [netfs?] INFO: task hung in netfs_unbuffered_read_iter
#syz test: git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git netfs-writeback
commit 1daca71a815b0d8cfe3db81a31b6dd3fc0da4b50
Author: David Howells <dhowells@...hat.com>
Date: Fri Nov 29 16:19:03 2024 +0000
netfs: Fix hang in synchronous read due to failed subreq
When netfs is performing a synchronous read, it doesn't offload the
collection of results off to a workqueue, but rather does the collection in
the app thread - thereby avoiding the overhead of using a work queue.
However, if a failure occurs and we might want to retry or if it wants to
throttle the production of new subreqs, netfs can throw a pause on the
producer by setting NETFS_RREQ_PAUSE. This is fine if collection is done
by workqueue, but in synchronous mode, the collection and retry is done
after the producer loop completes - thereby deadlocking the two parts.
Fix this by making the synchronous read producer loop, when it sees the
PAUSE flag, go and wait for the flag to be cleared or the op to complete
whilst running the collector to process results.
This fixes "netfs: Change the read result collector to only use one work
item" - but there's no stable commit ID yet.
Reported-by: syzbot+8965fea6a159ab9aa32d@...kaller.appspotmail.com
Signed-off-by: David Howells <dhowells@...hat.com>
cc: Jeff Layton <jlayton@...nel.org>
cc: netfs@...ts.linux.dev
cc: linux-fsdevel@...r.kernel.org
diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c
index dedcfc2bab2d..0bf3c2f5a710 100644
--- a/fs/netfs/direct_read.c
+++ b/fs/netfs/direct_read.c
@@ -102,10 +102,8 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
rreq->netfs_ops->issue_read(subreq);
- if (test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) {
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause);
- wait_on_bit(&rreq->flags, NETFS_RREQ_PAUSE, TASK_UNINTERRUPTIBLE);
- }
+ if (test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
+ netfs_wait_for_pause(rreq);
if (test_bit(NETFS_RREQ_FAILED, &rreq->flags))
break;
if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) &&
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index 334bf9f6e6f2..db59ed8880e3 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -96,6 +96,7 @@ void netfs_read_collection_worker(struct work_struct *work);
void netfs_wake_read_collector(struct netfs_io_request *rreq);
void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async);
ssize_t netfs_wait_for_read(struct netfs_io_request *rreq);
+void netfs_wait_for_pause(struct netfs_io_request *rreq);
/*
* read_pgpriv2.c
diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
index f1b15c20b6f8..3803ef5894c8 100644
--- a/fs/netfs/read_collect.c
+++ b/fs/netfs/read_collect.c
@@ -312,7 +312,7 @@ static void netfs_collect_read_results(struct netfs_io_request *rreq)
if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) {
trace_netfs_rreq(rreq, netfs_rreq_trace_unpause);
clear_bit_unlock(NETFS_RREQ_PAUSE, &rreq->flags);
- wake_up_bit(&rreq->flags, NETFS_RREQ_PAUSE);
+ wake_up(&rreq->waitq);
}
if (notes & MADE_PROGRESS) {
@@ -659,3 +659,39 @@ ssize_t netfs_wait_for_read(struct netfs_io_request *rreq)
return ret;
}
+
+/*
+ * Wait for a paused read operation to unpause or complete in some manner.
+ */
+void netfs_wait_for_pause(struct netfs_io_request *rreq)
+{
+ struct netfs_io_subrequest *subreq;
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
+ DEFINE_WAIT(myself);
+
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause);
+
+ for (;;) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
+ prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
+
+ subreq = list_first_entry_or_null(&stream->subrequests,
+ struct netfs_io_subrequest, rreq_link);
+ if (subreq &&
+ (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) ||
+ test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) {
+ __set_current_state(TASK_RUNNING);
+ netfs_read_collection(rreq);
+ continue;
+ }
+
+ if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) ||
+ !test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
+ break;
+
+ schedule();
+ trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
+ }
+
+ finish_wait(&rreq->waitq, &myself);
+}
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index 4a1499167770..bb74f30a4216 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -324,7 +324,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq)
if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) {
trace_netfs_rreq(wreq, netfs_rreq_trace_unpause);
clear_bit_unlock(NETFS_RREQ_PAUSE, &wreq->flags);
- wake_up_bit(&wreq->flags, NETFS_RREQ_PAUSE);
+ wake_up(&wreq->waitq);
}
if (notes & NEED_REASSESS) {
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index cd2b349243b3..6506bf1d970e 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -721,7 +721,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
rolling_buffer_advance(&wreq->buffer, part);
if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) {
trace_netfs_rreq(wreq, netfs_rreq_trace_wait_pause);
- wait_on_bit(&wreq->flags, NETFS_RREQ_PAUSE, TASK_UNINTERRUPTIBLE);
+ wait_event(wreq->waitq, !test_bit(NETFS_RREQ_PAUSE, &wreq->flags));
}
if (test_bit(NETFS_RREQ_FAILED, &wreq->flags))
break;
Powered by blists - more mailing lists