[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <75db462529291ee8330a2a8e32cc75df4b33f71a.camel@kernel.org>
Date: Tue, 09 Dec 2025 10:10:49 -0500
From: Trond Myklebust <trondmy@...nel.org>
To: Robert Milkowski <rmilkowski@...il.com>
Cc: anna@...nel.org, linux-nfs@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] nfs: pnfs: handle early layoutreturn failures gracefully
On Tue, 2025-12-09 at 14:53 +0000, Robert Milkowski wrote:
> pnfs_layoutreturn_before_put_layout_hdr() bumps the layout header
> refcount
> and sets NFS_LAYOUT_RETURN before prepare or rpc_run_task dispatch.
> If the
> layout driver fails prepare or rpc_run_task() fails to queue the
> call, we
> currently leak refs and leave waiters stuck on
> pnfs_prepare_to_retry_layoutget().
>
> Mirror the normal completion path for these early failures: warn and
> schedule pnfs_layoutreturn_retry_later(), free any reserved slot,
> drop
> refs/creds/inode, and clear the wait bit.
>
> Signed-off-by: Robert Milkowski <rmilkowski@...il.com>
> ---
> fs/nfs/nfs4proc.c | 37 +++++++++++++++++++++++++------------
> fs/nfs/pnfs.c | 21 +++++++++++++++++++--
> 2 files changed, 44 insertions(+), 14 deletions(-)
>
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 93c6ce04332b..6066a1c7227d 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -10132,25 +10132,34 @@ static void nfs4_layoutreturn_done(struct
> rpc_task *task, void *calldata)
> rpc_restart_call_prepare(task);
> }
>
> -static void nfs4_layoutreturn_release(void *calldata)
> +static void nfs4_layoutreturn_cleanup(struct nfs4_layoutreturn *lrp,
> int status)
> {
> - struct nfs4_layoutreturn *lrp = calldata;
> struct pnfs_layout_hdr *lo = lrp->args.layout;
>
> - if (lrp->rpc_status == 0 || !lrp->inode)
> - pnfs_layoutreturn_free_lsegs(
> - lo, &lrp->args.stateid, &lrp->args.range,
> - lrp->res.lrs_present ? &lrp->res.stateid :
> NULL);
> + if (status == 0 || !lrp->inode)
> + pnfs_layoutreturn_free_lsegs(lo, &lrp->args.stateid,
> + &lrp->args.range,
> + lrp->res.lrs_present ?
> + &lrp->res.stateid :
> NULL);
> else
> pnfs_layoutreturn_retry_later(lo, &lrp-
> >args.stateid,
> &lrp->args.range);
> - nfs4_sequence_free_slot(&lrp->res.seq_res);
> + if (lrp->res.seq_res.sr_slot)
> + nfs4_sequence_free_slot(&lrp->res.seq_res);
> if (lrp->ld_private.ops && lrp->ld_private.ops->free)
> lrp->ld_private.ops->free(&lrp->ld_private);
> - pnfs_put_layout_hdr(lrp->args.layout);
> - nfs_iput_and_deactive(lrp->inode);
> + pnfs_put_layout_hdr(lo);
> + if (lrp->inode)
> + nfs_iput_and_deactive(lrp->inode);
> put_cred(lrp->cred);
> - kfree(calldata);
> + kfree(lrp);
> +}
> +
> +static void nfs4_layoutreturn_release(void *calldata)
> +{
> + struct nfs4_layoutreturn *lrp = calldata;
> +
> + nfs4_layoutreturn_cleanup(lrp, lrp->rpc_status);
> }
>
> static const struct rpc_call_ops nfs4_layoutreturn_call_ops = {
> @@ -10198,8 +10207,12 @@ int nfs4_proc_layoutreturn(struct
> nfs4_layoutreturn *lrp, unsigned int flags)
> nfs4_init_sequence(&lrp->args.seq_args, &lrp-
> >res.seq_res, 1,
> 0);
> task = rpc_run_task(&task_setup_data);
> - if (IS_ERR(task))
> - return PTR_ERR(task);
> + if (IS_ERR(task)) {
> + status = PTR_ERR(task);
> + trace_nfs4_layoutreturn(lrp->args.inode, &lrp-
> >args.stateid, status);
> + nfs4_layoutreturn_cleanup(lrp, status);
> + return status;
> + }
NACK. The above introduces a use-after-free. There is no need to call
the release routine after a call to rpc_run_task().
> if (!(flags & PNFS_FL_LAYOUTRETURN_ASYNC))
> status = task->tk_status;
> trace_nfs4_layoutreturn(lrp->args.inode, &lrp->args.stateid,
> status);
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index f157d43d1312..a489f43344b8 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -1370,13 +1370,30 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr
> *lo,
> lrp->args.ld_private = &lrp->ld_private;
> lrp->clp = NFS_SERVER(ino)->nfs_client;
> lrp->cred = cred;
> - if (ld->prepare_layoutreturn)
> - ld->prepare_layoutreturn(&lrp->args);
> + if (ld->prepare_layoutreturn) {
> + status = ld->prepare_layoutreturn(&lrp->args);
> + if (status) {
> + pr_warn_ratelimited("NFS: pNFS layoutreturn
> prepare failed (%d) for layout driver %s\n",
> + status, ld->name ? ld->name :
> "unknown");
> + goto out_prepare_fail;
> + }
> + }
This is also unnecessary. The existing code will cope just fine with
args->ld_private being unset.
>
> status = nfs4_proc_layoutreturn(lrp, flags);
> out:
> dprintk("<-- %s status: %d\n", __func__, status);
> return status;
> +
> +out_prepare_fail:
> + pnfs_layoutreturn_retry_later(lo, &lrp->args.stateid, &lrp-
> >args.range);
> + if (lrp->ld_private.ops && lrp->ld_private.ops->free)
> + lrp->ld_private.ops->free(&lrp->ld_private);
> + if (lrp->inode)
> + nfs_iput_and_deactive(lrp->inode);
> + put_cred(cred);
> + kfree(lrp);
> + pnfs_put_layout_hdr(lo);
> + return status;
> }
>
> /* Return true if layoutreturn is needed */
>
> base-commit: cb015814f8b6eebcbb8e46e111d108892c5e6821
--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trondmy@...nel.org, trond.myklebust@...merspace.com
Powered by blists - more mailing lists