lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <7906109F-91D2-4ECF-B868-5519B56D2CEE@redhat.com>
Date: Wed, 12 Mar 2025 09:52:53 -0400
From: Benjamin Coddington <bcodding@...hat.com>
To: Jeff Layton <jlayton@...nel.org>
Cc: Trond Myklebust <trondmy@...nel.org>, Anna Schumaker <anna@...nel.org>,
 linux-nfs@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] sunrpc: add a rpc_clnt shutdown control in debugfs

On 12 Mar 2025, at 9:36, Jeff Layton wrote:

> There have been confirmed reports where a container with an NFS mount
> inside it dies abruptly, along with all of its processes, but the NFS
> client sticks around and keeps trying to send RPCs after the networking
> is gone.
>
> We have a reproducer where if we SIGKILL a container with an NFS mount,
> the RPC clients will stick around indefinitely. The orchestrator
> does a MNT_DETACH unmount on the NFS mount, and then tears down the
> networking while there are still RPCs in flight.
>
> Recently new controls were added[1] that allow shutting down an NFS
> mount. That doesn't help here since the mount namespace is detached from
> any tasks at this point.

That's interesting - seems like the orchestrator could just reorder its
request to shutdown before detaching the mount namespace.  Not an objection,
just wondering why the MNT_DETACH must come first.

> Transplant shutdown_client() to the sunrpc module, and give it a more
> distinct name. Add a new debugfs sunrpc/rpc_clnt/*/shutdown knob that
> allows the same functionality as the one in /sys/fs/nfs, but at the
> rpc_clnt level.
>
> [1]: commit d9615d166c7e ("NFS: add sysfs shutdown knob").
>
> Signed-off-by: Jeff Layton <jlayton@...nel.org>

I have a TODO to patch Documentation/ for this knob mostly to write warnings
because there are some potential "gotchas" here - for example you can have
shared RPC clients and shutting down one of those can cause problems for a
different mount (this is true today with the /sys/fs/nfs/[bdi]/shutdown
knob).  Shutting down aribitrary clients will definitely break things in
weird ways, its not a safe place to explore.

Reviewed-by: Benjamin Coddington <bcodding@...hat.com>

Ben

> ---
>  fs/nfs/sysfs.c               | 19 ++++---------------
>  include/linux/sunrpc/sched.h |  1 +
>  net/sunrpc/clnt.c            | 12 ++++++++++++
>  net/sunrpc/debugfs.c         | 15 +++++++++++++++
>  4 files changed, 32 insertions(+), 15 deletions(-)
>
> diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
> index 7b59a40d40c061a41b0fbde91aa006314f02c1fb..c29c5fd639554461bdcd9ff612726194910d85b5 100644
> --- a/fs/nfs/sysfs.c
> +++ b/fs/nfs/sysfs.c
> @@ -217,17 +217,6 @@ void nfs_netns_sysfs_destroy(struct nfs_net *netns)
>  	}
>  }
>
> -static bool shutdown_match_client(const struct rpc_task *task, const void *data)
> -{
> -	return true;
> -}
> -
> -static void shutdown_client(struct rpc_clnt *clnt)
> -{
> -	clnt->cl_shutdown = 1;
> -	rpc_cancel_tasks(clnt, -EIO, shutdown_match_client, NULL);
> -}
> -
>  static ssize_t
>  shutdown_show(struct kobject *kobj, struct kobj_attribute *attr,
>  				char *buf)
> @@ -258,14 +247,14 @@ shutdown_store(struct kobject *kobj, struct kobj_attribute *attr,
>  		goto out;
>
>  	server->flags |= NFS_MOUNT_SHUTDOWN;
> -	shutdown_client(server->client);
> -	shutdown_client(server->nfs_client->cl_rpcclient);
> +	rpc_clnt_shutdown(server->client);
> +	rpc_clnt_shutdown(server->nfs_client->cl_rpcclient);
>
>  	if (!IS_ERR(server->client_acl))
> -		shutdown_client(server->client_acl);
> +		rpc_clnt_shutdown(server->client_acl);
>
>  	if (server->nlm_host)
> -		shutdown_client(server->nlm_host->h_rpcclnt);
> +		rpc_clnt_shutdown(server->nlm_host->h_rpcclnt);
>  out:
>  	return count;
>  }
> diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
> index eac57914dcf3200c1a6ed39ab030e3fe8b4da3e1..fe7c39a17ce44ec68c0cf057133d0f8e7f0ae797 100644
> --- a/include/linux/sunrpc/sched.h
> +++ b/include/linux/sunrpc/sched.h
> @@ -232,6 +232,7 @@ unsigned long	rpc_cancel_tasks(struct rpc_clnt *clnt, int error,
>  				 bool (*fnmatch)(const struct rpc_task *,
>  						 const void *),
>  				 const void *data);
> +void		rpc_clnt_shutdown(struct rpc_clnt *clnt);
>  void		rpc_execute(struct rpc_task *);
>  void		rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *);
>  void		rpc_init_wait_queue(struct rpc_wait_queue *, const char *);
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index 2fe88ea79a70c134e58abfb03fca230883eddf1f..0028858b12d97e7b45f4c24cfbd761ba2a734b32 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -934,6 +934,18 @@ unsigned long rpc_cancel_tasks(struct rpc_clnt *clnt, int error,
>  }
>  EXPORT_SYMBOL_GPL(rpc_cancel_tasks);
>
> +static bool shutdown_match_client(const struct rpc_task *task, const void *data)
> +{
> +	return true;
> +}
> +
> +void rpc_clnt_shutdown(struct rpc_clnt *clnt)
> +{
> +	clnt->cl_shutdown = 1;
> +	rpc_cancel_tasks(clnt, -EIO, shutdown_match_client, NULL);
> +}
> +EXPORT_SYMBOL_GPL(rpc_clnt_shutdown);
> +
>  static int rpc_clnt_disconnect_xprt(struct rpc_clnt *clnt,
>  				    struct rpc_xprt *xprt, void *dummy)
>  {
> diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
> index 32417db340de3775c533d0ad683b5b37800d7fe5..4df31dcca2d747db6767c12ddfa29963ed7be204 100644
> --- a/net/sunrpc/debugfs.c
> +++ b/net/sunrpc/debugfs.c
> @@ -145,6 +145,17 @@ static int do_xprt_debugfs(struct rpc_clnt *clnt, struct rpc_xprt *xprt, void *n
>  	return 0;
>  }
>
> +static int
> +clnt_shutdown(void *data, u64 value)
> +{
> +	struct rpc_clnt *clnt = data;
> +
> +	rpc_clnt_shutdown(clnt);
> +	return 0;
> +}
> +
> +DEFINE_DEBUGFS_ATTRIBUTE(shutdown_fops, NULL, clnt_shutdown, "%llu\n");
> +
>  void
>  rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
>  {
> @@ -163,6 +174,10 @@ rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
>  	debugfs_create_file("tasks", S_IFREG | 0400, clnt->cl_debugfs, clnt,
>  			    &tasks_fops);
>
> +	/* make shutdown file */
> +	debugfs_create_file("shutdown", S_IFREG | 0200, clnt->cl_debugfs, clnt,
> +			    &shutdown_fops);
> +
>  	rpc_clnt_iterate_for_each_xprt(clnt, do_xprt_debugfs, &xprtnum);
>  }
>
>
> ---
> base-commit: 80e54e84911a923c40d7bee33a34c1b4be148d7a
> change-id: 20250312-rpc-shutdown-ce9b6d3599da
>
> Best regards,
> -- 
> Jeff Layton <jlayton@...nel.org>


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ