lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <75db462529291ee8330a2a8e32cc75df4b33f71a.camel@kernel.org>
Date: Tue, 09 Dec 2025 10:10:49 -0500
From: Trond Myklebust <trondmy@...nel.org>
To: Robert Milkowski <rmilkowski@...il.com>
Cc: anna@...nel.org, linux-nfs@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] nfs: pnfs: handle early layoutreturn failures gracefully

On Tue, 2025-12-09 at 14:53 +0000, Robert Milkowski wrote:
> pnfs_layoutreturn_before_put_layout_hdr() bumps the layout header
> refcount
> and sets NFS_LAYOUT_RETURN before prepare or rpc_run_task dispatch.
> If the
> layout driver fails prepare or rpc_run_task() fails to queue the
> call, we
> currently leak refs and leave waiters stuck on
> pnfs_prepare_to_retry_layoutget().
> 
> Mirror the normal completion path for these early failures: warn and
> schedule pnfs_layoutreturn_retry_later(), free any reserved slot,
> drop
> refs/creds/inode, and clear the wait bit.
> 
> Signed-off-by: Robert Milkowski <rmilkowski@...il.com>
> ---
>  fs/nfs/nfs4proc.c | 37 +++++++++++++++++++++++++------------
>  fs/nfs/pnfs.c     | 21 +++++++++++++++++++--
>  2 files changed, 44 insertions(+), 14 deletions(-)
> 
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 93c6ce04332b..6066a1c7227d 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -10132,25 +10132,34 @@ static void nfs4_layoutreturn_done(struct
> rpc_task *task, void *calldata)
>  	rpc_restart_call_prepare(task);
>  }
>  
> -static void nfs4_layoutreturn_release(void *calldata)
> +static void nfs4_layoutreturn_cleanup(struct nfs4_layoutreturn *lrp,
> int status)
>  {
> -	struct nfs4_layoutreturn *lrp = calldata;
>  	struct pnfs_layout_hdr *lo = lrp->args.layout;
>  
> -	if (lrp->rpc_status == 0 || !lrp->inode)
> -		pnfs_layoutreturn_free_lsegs(
> -			lo, &lrp->args.stateid, &lrp->args.range,
> -			lrp->res.lrs_present ? &lrp->res.stateid :
> NULL);
> +	if (status == 0 || !lrp->inode)
> +		pnfs_layoutreturn_free_lsegs(lo, &lrp->args.stateid,
> +					     &lrp->args.range,
> +					     lrp->res.lrs_present ?
> +					     &lrp->res.stateid :
> NULL);
>  	else
>  		pnfs_layoutreturn_retry_later(lo, &lrp-
> >args.stateid,
>  					      &lrp->args.range);
> -	nfs4_sequence_free_slot(&lrp->res.seq_res);
> +	if (lrp->res.seq_res.sr_slot)
> +		nfs4_sequence_free_slot(&lrp->res.seq_res);
>  	if (lrp->ld_private.ops && lrp->ld_private.ops->free)
>  		lrp->ld_private.ops->free(&lrp->ld_private);
> -	pnfs_put_layout_hdr(lrp->args.layout);
> -	nfs_iput_and_deactive(lrp->inode);
> +	pnfs_put_layout_hdr(lo);
> +	if (lrp->inode)
> +		nfs_iput_and_deactive(lrp->inode);
>  	put_cred(lrp->cred);
> -	kfree(calldata);
> +	kfree(lrp);
> +}
> +
> +static void nfs4_layoutreturn_release(void *calldata)
> +{
> +	struct nfs4_layoutreturn *lrp = calldata;
> +
> +	nfs4_layoutreturn_cleanup(lrp, lrp->rpc_status);
>  }
>  
>  static const struct rpc_call_ops nfs4_layoutreturn_call_ops = {
> @@ -10198,8 +10207,12 @@ int nfs4_proc_layoutreturn(struct
> nfs4_layoutreturn *lrp, unsigned int flags)
>  		nfs4_init_sequence(&lrp->args.seq_args, &lrp-
> >res.seq_res, 1,
>  				   0);
>  	task = rpc_run_task(&task_setup_data);
> -	if (IS_ERR(task))
> -		return PTR_ERR(task);
> +	if (IS_ERR(task)) {
> +		status = PTR_ERR(task);
> +		trace_nfs4_layoutreturn(lrp->args.inode, &lrp-
> >args.stateid, status);
> +		nfs4_layoutreturn_cleanup(lrp, status);
> +		return status;
> +	}

NACK. The above introduces a use-after-free. There is no need to call
the release routine after a call to rpc_run_task().

>  	if (!(flags & PNFS_FL_LAYOUTRETURN_ASYNC))
>  		status = task->tk_status;
>  	trace_nfs4_layoutreturn(lrp->args.inode, &lrp->args.stateid,
> status);
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index f157d43d1312..a489f43344b8 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -1370,13 +1370,30 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr
> *lo,
>  	lrp->args.ld_private = &lrp->ld_private;
>  	lrp->clp = NFS_SERVER(ino)->nfs_client;
>  	lrp->cred = cred;
> -	if (ld->prepare_layoutreturn)
> -		ld->prepare_layoutreturn(&lrp->args);
> +	if (ld->prepare_layoutreturn) {
> +		status = ld->prepare_layoutreturn(&lrp->args);
> +		if (status) {
> +			pr_warn_ratelimited("NFS: pNFS layoutreturn
> prepare failed (%d) for layout driver %s\n",
> +				status, ld->name ? ld->name :
> "unknown");
> +			goto out_prepare_fail;
> +		}
> +	}

This is also unnecessary. The existing code will cope just fine with
args->ld_private being unset.

>  
>  	status = nfs4_proc_layoutreturn(lrp, flags);
>  out:
>  	dprintk("<-- %s status: %d\n", __func__, status);
>  	return status;
> +
> +out_prepare_fail:
> +	pnfs_layoutreturn_retry_later(lo, &lrp->args.stateid, &lrp-
> >args.range);
> +	if (lrp->ld_private.ops && lrp->ld_private.ops->free)
> +		lrp->ld_private.ops->free(&lrp->ld_private);
> +	if (lrp->inode)
> +		nfs_iput_and_deactive(lrp->inode);
> +	put_cred(cred);
> +	kfree(lrp);
> +	pnfs_put_layout_hdr(lo);
> +	return status;
>  }
>  
>  /* Return true if layoutreturn is needed */
> 
> base-commit: cb015814f8b6eebcbb8e46e111d108892c5e6821

-- 
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trondmy@...nel.org, trond.myklebust@...merspace.com

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ