linux-kernel - Re: [PATCH v3 28/33] netfs: Change the read result collector to only use one work item

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1321312.1731085403@warthog.procyon.org.uk>
Date: Fri, 08 Nov 2024 17:03:23 +0000
From: David Howells <dhowells@...hat.com>
To: Christian Brauner <christian@...uner.io>,
    Steve French <smfrench@...il.com>,
    Matthew Wilcox <willy@...radead.org>
Cc: dhowells@...hat.com, Jeff Layton <jlayton@...nel.org>,
    Gao Xiang <hsiangkao@...ux.alibaba.com>,
    Dominique Martinet <asmadeus@...ewreck.org>,
    Marc Dionne <marc.dionne@...istor.com>,
    Paulo Alcantara <pc@...guebit.com>,
    Shyam Prasad N <sprasad@...rosoft.com>, Tom Talpey <tom@...pey.com>,
    Eric Van Hensbergen <ericvh@...nel.org>,
    Ilya Dryomov <idryomov@...il.com>, netfs@...ts.linux.dev,
    linux-afs@...ts.infradead.org, linux-cifs@...r.kernel.org,
    linux-nfs@...r.kernel.org, ceph-devel@...r.kernel.org,
    v9fs@...ts.linux.dev, linux-erofs@...ts.ozlabs.org,
    linux-fsdevel@...r.kernel.org, linux-mm@...ck.org,
    netdev@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v3 28/33] netfs: Change the read result collector to only use one work item

This patch needs the attached adjustment folding in.

David
---
commit 2c0cccc7b29a051fadb6816d31f526e4dd45ddf5
Author: David Howells <dhowells@...hat.com>
Date:   Thu Nov 7 22:22:49 2024 +0000

    netfs: Fix folio abandonment
    
    The mechanism to handle the abandonment of a folio being read due to an
    error occurring in a subrequest (such as if a signal occurs) will correctly
    abandon folios if they're entirely processed in one go; but if the
    constituent subrequests aren't processed in the same scheduled work item,
    the note that the first one failed will get lost if no folios are processed
    (the ABANDON_SREQ note isn't transferred to the NETFS_RREQ_FOLIO_ABANDON
    flag unless we process a folio).
    
    Fix this by simplifying the way the mechanism works.  Replace the flag with
    a variable that records the file position to which we should abandon
    folios.  Any folio that overlaps this region at the time of unlocking must
    be abandoned (and reread).
    
    This works because subrequests are processed in order of file position and
    each folio is processed as soon as enough subrequest transference is
    available to cover it - so when the abandonment point is moved, it covers
    only folios that draw data from the dodgy region.
    
    Also make sure that NETFS_SREQ_FAILED is set on failure and that
    stream->failed is set when we cut the stream short.
    
    Signed-off: David Howells <dhowells@...hat.com>
    cc: Jeff Layton <jlayton@...nel.org>
    cc: netfs@...ts.linux.dev
    cc: linux-fsdevel@...r.kernel.org

diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
index 73f51039c2fe..7f3a3c056c6e 100644
--- a/fs/netfs/read_collect.c
+++ b/fs/netfs/read_collect.c
@@ -46,7 +46,7 @@ static void netfs_unlock_read_folio(struct netfs_io_request *rreq,
 	struct netfs_folio *finfo;
 	struct folio *folio = folioq_folio(folioq, slot);
 
-	if (unlikely(test_bit(NETFS_RREQ_FOLIO_ABANDON, &rreq->flags))) {
+	if (unlikely(folio_pos(folio) < rreq->abandon_to)) {
 		trace_netfs_folio(folio, netfs_folio_trace_abandon);
 		goto just_unlock;
 	}
@@ -126,8 +126,6 @@ static void netfs_read_unlock_folios(struct netfs_io_request *rreq,
 
 		if (*notes & COPY_TO_CACHE)
 			set_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags);
-		if (*notes & ABANDON_SREQ)
-			set_bit(NETFS_RREQ_FOLIO_ABANDON, &rreq->flags);
 
 		folio = folioq_folio(folioq, slot);
 		if (WARN_ONCE(!folio_test_locked(folio),
@@ -152,7 +150,6 @@ static void netfs_read_unlock_folios(struct netfs_io_request *rreq,
 		*notes |= MADE_PROGRESS;
 
 		clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags);
-		clear_bit(NETFS_RREQ_FOLIO_ABANDON, &rreq->flags);
 
 		/* Clean up the head folioq.  If we clear an entire folioq, then
 		 * we can get rid of it provided it's not also the tail folioq
@@ -251,6 +248,12 @@ static void netfs_collect_read_results(struct netfs_io_request *rreq)
 			if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &front->flags))
 				notes |= COPY_TO_CACHE;
 
+			if (test_bit(NETFS_SREQ_FAILED, &front->flags)) {
+				rreq->abandon_to = front->start + front->len;
+				front->transferred = front->len;
+				transferred = front->len;
+				trace_netfs_rreq(rreq, netfs_rreq_trace_set_abandon);
+			}
 			if (front->start + transferred >= rreq->cleaned_to + fsize ||
 			    test_bit(NETFS_SREQ_HIT_EOF, &front->flags))
 				netfs_read_unlock_folios(rreq, &notes);
@@ -268,6 +271,7 @@ static void netfs_collect_read_results(struct netfs_io_request *rreq)
 				stream->error = front->error;
 				rreq->error = front->error;
 				set_bit(NETFS_RREQ_FAILED, &rreq->flags);
+				stream->failed = true;
 			}
 			notes |= MADE_PROGRESS | ABANDON_SREQ;
 		} else if (test_bit(NETFS_SREQ_NEED_RETRY, &front->flags)) {
@@ -566,6 +570,7 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq)
 			__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
 		} else {
 			netfs_stat(&netfs_n_rh_download_failed);
+			__set_bit(NETFS_SREQ_FAILED, &subreq->flags);
 		}
 		trace_netfs_rreq(rreq, netfs_rreq_trace_set_pause);
 		set_bit(NETFS_RREQ_PAUSE, &rreq->flags);
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index c00cffa1da13..4af7208e1360 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -260,6 +260,7 @@ struct netfs_io_request {
 	atomic64_t		issued_to;	/* Write issuer folio cursor */
 	unsigned long long	collected_to;	/* Point we've collected to */
 	unsigned long long	cleaned_to;	/* Position we've cleaned folios to */
+	unsigned long long	abandon_to;	/* Position to abandon folios to */
 	pgoff_t			no_unlock_folio; /* Don't unlock this folio after read */
 	unsigned char		front_folio_order; /* Order (size) of front folio */
 	refcount_t		ref;
@@ -271,7 +272,6 @@ struct netfs_io_request {
 #define NETFS_RREQ_FAILED		4	/* The request failed */
 #define NETFS_RREQ_IN_PROGRESS		5	/* Unlocked when the request completes */
 #define NETFS_RREQ_FOLIO_COPY_TO_CACHE	6	/* Copy current folio to cache from read */
-#define NETFS_RREQ_FOLIO_ABANDON	7	/* Abandon failed folio from read */
 #define NETFS_RREQ_UPLOAD_TO_SERVER	8	/* Need to write to the server */
 #define NETFS_RREQ_NONBLOCK		9	/* Don't block if possible (O_NONBLOCK) */
 #define NETFS_RREQ_BLOCKED		10	/* We blocked */
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index cf14545ca2bd..22eb77b1f5e6 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -56,6 +56,7 @@
 	EM(netfs_rreq_trace_free,		"FREE   ")	\
 	EM(netfs_rreq_trace_redirty,		"REDIRTY")	\
 	EM(netfs_rreq_trace_resubmit,		"RESUBMT")	\
+	EM(netfs_rreq_trace_set_abandon,	"S-ABNDN")	\
 	EM(netfs_rreq_trace_set_pause,		"PAUSE  ")	\
 	EM(netfs_rreq_trace_unlock,		"UNLOCK ")	\
 	EM(netfs_rreq_trace_unlock_pgpriv2,	"UNLCK-2")	\