lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <159078979838.679399.11549530849526926884.stgit@warthog.procyon.org.uk>
Date:   Fri, 29 May 2020 23:03:18 +0100
From:   David Howells <dhowells@...hat.com>
To:     linux-afs@...ts.infradead.org
Cc:     dhowells@...hat.com, linux-fsdevel@...r.kernel.org,
        linux-kernel@...r.kernel.org
Subject: [PATCH 27/27] afs: Adjust the fileserver rotation algorithm to
 reprobe/retry more quickly

Adjust the fileserver rotation algorithm so that if we've tried all the
addresses on a server (cumulatively over multiple operations) until we've
run out of untried addresses, immediately reprobe all that server's
interfaces and retry the op at least once before we move onto the next
server.

Signed-off-by: David Howells <dhowells@...hat.com>
---

 fs/afs/fs_probe.c |   47 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/afs/internal.h |   24 ++++++++++++++----------
 fs/afs/rotate.c   |   29 +++++++++++++++++++++++++++--
 3 files changed, 88 insertions(+), 12 deletions(-)

diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
index c41cf3b2ab89..b34f74b0f319 100644
--- a/fs/afs/fs_probe.c
+++ b/fs/afs/fs_probe.c
@@ -338,6 +338,18 @@ static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server
 	afs_put_server(net, server, afs_server_trace_put_probe);
 }
 
+/*
+ * Probe a server immediately without waiting for its due time to come
+ * round.  This is used when all of the addresses have been tried.
+ */
+void afs_probe_fileserver(struct afs_net *net, struct afs_server *server)
+{
+	write_seqlock(&net->fs_lock);
+	if (!list_empty(&server->probe_link))
+		return afs_dispatch_fs_probe(net, server, true);
+	write_sequnlock(&net->fs_lock);
+}
+
 /*
  * Probe dispatcher to regularly dispatch probes to keep NAT alive.
  */
@@ -411,3 +423,38 @@ void afs_fs_probe_dispatcher(struct work_struct *work)
 		_leave(" [quiesce]");
 	}
 }
+
+/*
+ * Wait for a probe on a particular fileserver to complete for 2s.
+ */
+int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr)
+{
+	struct wait_queue_entry wait;
+	unsigned long timo = 2 * HZ;
+
+	if (atomic_read(&server->probe_outstanding) == 0)
+		goto dont_wait;
+
+	init_wait_entry(&wait, 0);
+	for (;;) {
+		prepare_to_wait_event(&server->probe_wq, &wait,
+				      is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
+		if (timo == 0 ||
+		    server->probe.responded ||
+		    atomic_read(&server->probe_outstanding) == 0 ||
+		    (is_intr && signal_pending(current)))
+			break;
+		timo = schedule_timeout(timo);
+	}
+
+	finish_wait(&server->probe_wq, &wait);
+
+dont_wait:
+	if (server->probe.responded)
+		return 0;
+	if (is_intr && signal_pending(current))
+		return -ERESTARTSYS;
+	if (timo == 0)
+		return -ETIME;
+	return -EDESTADDRREQ;
+}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index af0b7fca87db..e1621b0670cc 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -826,16 +826,18 @@ struct afs_operation {
 	unsigned short		nr_iterations;	/* Number of server iterations */
 
 	unsigned int		flags;
-#define AFS_OPERATION_STOP	0x0001		/* Set to cease iteration */
-#define AFS_OPERATION_VBUSY	0x0002		/* Set if seen VBUSY */
-#define AFS_OPERATION_VMOVED	0x0004		/* Set if seen VMOVED */
-#define AFS_OPERATION_VNOVOL	0x0008		/* Set if seen VNOVOL */
-#define AFS_OPERATION_CUR_ONLY	0x0010		/* Set if current server only (file lock held) */
-#define AFS_OPERATION_NO_VSLEEP	0x0020		/* Set to prevent sleep on VBUSY, VOFFLINE, ... */
-#define AFS_OPERATION_UNINTR	0x0040		/* Set if op is uninterruptible */
-#define AFS_OPERATION_DOWNGRADE	0x0080		/* Set to retry with downgraded opcode */
-#define AFS_OPERATION_LOCK_0	0x0100		/* Set if have io_lock on file[0] */
-#define AFS_OPERATION_LOCK_1	0x0200		/* Set if have io_lock on file[1] */
+#define AFS_OPERATION_STOP		0x0001	/* Set to cease iteration */
+#define AFS_OPERATION_VBUSY		0x0002	/* Set if seen VBUSY */
+#define AFS_OPERATION_VMOVED		0x0004	/* Set if seen VMOVED */
+#define AFS_OPERATION_VNOVOL		0x0008	/* Set if seen VNOVOL */
+#define AFS_OPERATION_CUR_ONLY		0x0010	/* Set if current server only (file lock held) */
+#define AFS_OPERATION_NO_VSLEEP		0x0020	/* Set to prevent sleep on VBUSY, VOFFLINE, ... */
+#define AFS_OPERATION_UNINTR		0x0040	/* Set if op is uninterruptible */
+#define AFS_OPERATION_DOWNGRADE		0x0080	/* Set to retry with downgraded opcode */
+#define AFS_OPERATION_LOCK_0		0x0100	/* Set if have io_lock on file[0] */
+#define AFS_OPERATION_LOCK_1		0x0200	/* Set if have io_lock on file[1] */
+#define AFS_OPERATION_TRIED_ALL		0x0400	/* Set if we've tried all the fileservers */
+#define AFS_OPERATION_RETRY_SERVER	0x0800	/* Set if we should retry the current server */
 };
 
 /*
@@ -1055,7 +1057,9 @@ static inline void afs_op_set_fid(struct afs_operation *op, unsigned int n,
 extern void afs_fileserver_probe_result(struct afs_call *);
 extern void afs_fs_probe_fileserver(struct afs_net *, struct afs_server *, struct key *, bool);
 extern int afs_wait_for_fs_probes(struct afs_server_list *, unsigned long);
+extern void afs_probe_fileserver(struct afs_net *, struct afs_server *);
 extern void afs_fs_probe_dispatcher(struct work_struct *);
+extern int afs_wait_for_one_fs_probe(struct afs_server *, bool);
 
 /*
  * inode.c
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index d1590fb382b6..bfa82f613c93 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -367,6 +367,7 @@ bool afs_select_fileserver(struct afs_operation *op)
 
 	_debug("USING SERVER: %pU", &server->uuid);
 
+	op->flags |= AFS_OPERATION_RETRY_SERVER;
 	op->server = server;
 	if (vnode->cb_server != server) {
 		vnode->cb_server = server;
@@ -381,6 +382,7 @@ bool afs_select_fileserver(struct afs_operation *op)
 	afs_get_addrlist(alist);
 	read_unlock(&server->fs_lock);
 
+retry_server:
 	memset(&op->ac, 0, sizeof(op->ac));
 
 	if (!op->ac.alist)
@@ -396,13 +398,36 @@ bool afs_select_fileserver(struct afs_operation *op)
 	 * address on which it will respond to us.
 	 */
 	if (!afs_iterate_addresses(&op->ac))
-		goto next_server;
+		goto out_of_addresses;
 
-	_debug("address [%u] %u/%u", op->index, op->ac.index, op->ac.alist->nr_addrs);
+	_debug("address [%u] %u/%u %pISp",
+	       op->index, op->ac.index, op->ac.alist->nr_addrs,
+	       &op->ac.alist->addrs[op->ac.index].transport);
 
 	_leave(" = t");
 	return true;
 
+out_of_addresses:
+	/* We've now had a failure to respond on all of a server's addresses -
+	 * immediately probe them again and consider retrying the server.
+	 */
+	afs_probe_fileserver(op->net, op->server);
+	if (op->flags & AFS_OPERATION_RETRY_SERVER) {
+		alist = op->ac.alist;
+		error = afs_wait_for_one_fs_probe(
+			op->server, !(op->flags & AFS_OPERATION_UNINTR));
+		switch (error) {
+		case 0:
+			op->flags &= ~AFS_OPERATION_RETRY_SERVER;
+			goto retry_server;
+		case -ERESTARTSYS:
+			goto failed_set_error;
+		case -ETIME:
+		case -EDESTADDRREQ:
+			goto next_server;
+		}
+	}
+
 next_server:
 	_debug("next");
 	afs_end_cursor(&op->ac);


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ