[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <5d9bbb599569ce29f16e4e0eef6b291eda0f375b.camel@kernel.org>
Date: Tue, 12 Dec 2023 07:07:37 -0500
From: Jeff Layton <jlayton@...nel.org>
To: Lorenzo Bianconi <lorenzo@...nel.org>, linux-nfs@...r.kernel.org
Cc: lorenzo.bianconi@...hat.com, chuck.lever@...cle.com, neilb@...e.de,
netdev@...r.kernel.org
Subject: Re: [PATCH v8 3/3] NFSD: add rpc_status netlink support
On Mon, 2023-12-11 at 13:56 -0500, Jeff Layton wrote:
> On Mon, 2023-09-11 at 14:49 +0200, Lorenzo Bianconi wrote:
> > Introduce rpc_status netlink support for NFSD in order to dump pending
> > RPC requests debugging information from userspace.
> >
> > Tested-by: Jeff Layton <jlayton@...nel.org>
> > Signed-off-by: Lorenzo Bianconi <lorenzo@...nel.org>
> > ---
> > fs/nfsd/nfsctl.c | 192 ++++++++++++++++++++++++++++++++++++-
> > fs/nfsd/nfsd.h | 16 ++++
> > fs/nfsd/nfssvc.c | 15 +++
> > fs/nfsd/state.h | 2 -
> > include/linux/sunrpc/svc.h | 1 +
> > 5 files changed, 222 insertions(+), 4 deletions(-)
> >
> > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> > index 1be66088849c..b862a759ea15 100644
> > --- a/fs/nfsd/nfsctl.c
> > +++ b/fs/nfsd/nfsctl.c
> > @@ -26,6 +26,7 @@
> > #include "pnfs.h"
> > #include "filecache.h"
> > #include "trace.h"
> > +#include "nfs_netlink_gen.h"
> >
> > /*
> > * We have a single directory with several nodes in it.
> > @@ -1497,17 +1498,199 @@ unsigned int nfsd_net_id;
> >
> > int nfsd_server_nl_rpc_status_get_start(struct netlink_callback *cb)
> > {
> > - return 0;
> > + struct nfsd_net *nn = net_generic(sock_net(cb->skb->sk), nfsd_net_id);
> > + int ret = -ENODEV;
> > +
> > + mutex_lock(&nfsd_mutex);
> > + if (nn->nfsd_serv) {
> > + svc_get(nn->nfsd_serv);
> > + ret = 0;
> > + }
> > + mutex_unlock(&nfsd_mutex);
> > +
> > + return ret;
> > }
>
> I think there is a potential race above. Once you've dropped the
> nfsd_mutex, there is no guarantee that the nn->nfsd_serv will still be
> set when you come back to put the serv. That means that we could oops
> when we hit the _done method below.
>
> Is it possible to stash a pointer to the serv while we hold the
> reference?
>
Actually, it looks like Neil may have already fixed this in the series
he sent on Oct 29th. See:
[PATCH 3/5] nfsd: hold nfsd_mutex across entire netlink operation
Another reason to go ahead and get that series in...
> >
> > -int nfsd_server_nl_rpc_status_get_done(struct netlink_callback *cb)
> > +static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb,
> > + struct netlink_callback *cb,
> > + struct nfsd_genl_rqstp *rqstp)
> > {
> > + void *hdr;
> > + int i;
> > +
> > + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
> > + &nfsd_server_nl_family, NLM_F_MULTI,
> > + NFSD_CMD_RPC_STATUS_GET);
> > + if (!hdr)
> > + return -ENOBUFS;
> > +
> > + if (nla_put_be32(skb, NFSD_ATTR_RPC_STATUS_XID, rqstp->rq_xid) ||
> > + nla_put_u32(skb, NFSD_ATTR_RPC_STATUS_FLAGS, rqstp->rq_flags) ||
> > + nla_put_u32(skb, NFSD_ATTR_RPC_STATUS_PROG, rqstp->rq_prog) ||
> > + nla_put_u32(skb, NFSD_ATTR_RPC_STATUS_PROC, rqstp->rq_proc) ||
> > + nla_put_u8(skb, NFSD_ATTR_RPC_STATUS_VERSION, rqstp->rq_vers) ||
> > + nla_put_s64(skb, NFSD_ATTR_RPC_STATUS_SERVICE_TIME,
> > + ktime_to_us(rqstp->rq_stime),
> > + NFSD_ATTR_RPC_STATUS_PAD))
> > + return -ENOBUFS;
> > +
> > + switch (rqstp->saddr.sa_family) {
> > + case AF_INET: {
> > + const struct sockaddr_in *s_in, *d_in;
> > +
> > + s_in = (const struct sockaddr_in *)&rqstp->saddr;
> > + d_in = (const struct sockaddr_in *)&rqstp->daddr;
> > + if (nla_put_in_addr(skb, NFSD_ATTR_RPC_STATUS_SADDR4,
> > + s_in->sin_addr.s_addr) ||
> > + nla_put_in_addr(skb, NFSD_ATTR_RPC_STATUS_DADDR4,
> > + d_in->sin_addr.s_addr) ||
> > + nla_put_be16(skb, NFSD_ATTR_RPC_STATUS_SPORT,
> > + s_in->sin_port) ||
> > + nla_put_be16(skb, NFSD_ATTR_RPC_STATUS_DPORT,
> > + d_in->sin_port))
> > + return -ENOBUFS;
> > + break;
> > + }
> > + case AF_INET6: {
> > + const struct sockaddr_in6 *s_in, *d_in;
> > +
> > + s_in = (const struct sockaddr_in6 *)&rqstp->saddr;
> > + d_in = (const struct sockaddr_in6 *)&rqstp->daddr;
> > + if (nla_put_in6_addr(skb, NFSD_ATTR_RPC_STATUS_SADDR6,
> > + &s_in->sin6_addr) ||
> > + nla_put_in6_addr(skb, NFSD_ATTR_RPC_STATUS_DADDR6,
> > + &d_in->sin6_addr) ||
> > + nla_put_be16(skb, NFSD_ATTR_RPC_STATUS_SPORT,
> > + s_in->sin6_port) ||
> > + nla_put_be16(skb, NFSD_ATTR_RPC_STATUS_DPORT,
> > + d_in->sin6_port))
> > + return -ENOBUFS;
> > + break;
> > + }
> > + default:
> > + break;
> > + }
> > +
> > + if (rqstp->opcnt) {
> > + struct nlattr *attr;
> > +
> > + attr = nla_nest_start(skb, NFSD_ATTR_RPC_STATUS_COMPOND_OP);
> > + if (!attr)
> > + return -ENOBUFS;
> > +
> > + for (i = 0; i < rqstp->opcnt; i++) {
> > + struct nlattr *op_attr;
> > +
> > + op_attr = nla_nest_start(skb, i);
> > + if (!op_attr)
> > + return -ENOBUFS;
> > +
> > + if (nla_put_u32(skb, NFSD_ATTR_RPC_STATUS_COMP_OP,
> > + rqstp->opnum[i]))
> > + return -ENOBUFS;
> > +
> > + nla_nest_end(skb, op_attr);
> > + }
> > +
> > + nla_nest_end(skb, attr);
> > + }
> > +
> > + genlmsg_end(skb, hdr);
> > +
> > return 0;
> > }
> >
> > int nfsd_server_nl_rpc_status_get_dumpit(struct sk_buff *skb,
> > struct netlink_callback *cb)
> > {
> > + struct nfsd_net *nn = net_generic(sock_net(skb->sk), nfsd_net_id);
> > + int i, ret, rqstp_index;
> > +
> > + rcu_read_lock();
> > +
> > + for (i = 0; i < nn->nfsd_serv->sv_nrpools; i++) {
> > + struct svc_rqst *rqstp;
> > +
> > + if (i < cb->args[0]) /* already consumed */
> > + continue;
> > +
> > + rqstp_index = 0;
> > + list_for_each_entry_rcu(rqstp,
> > + &nn->nfsd_serv->sv_pools[i].sp_all_threads,
> > + rq_all) {
> > + struct nfsd_genl_rqstp genl_rqstp;
> > + unsigned int status_counter;
> > +
> > + if (rqstp_index++ < cb->args[1]) /* already consumed */
> > + continue;
> > + /*
> > + * Acquire rq_status_counter before parsing the rqst
> > + * fields. rq_status_counter is set to an odd value in
> > + * order to notify the consumers the rqstp fields are
> > + * meaningful.
> > + */
> > + status_counter =
> > + smp_load_acquire(&rqstp->rq_status_counter);
> > + if (!(status_counter & 1))
> > + continue;
> > +
> > + genl_rqstp.rq_xid = rqstp->rq_xid;
> > + genl_rqstp.rq_flags = rqstp->rq_flags;
> > + genl_rqstp.rq_vers = rqstp->rq_vers;
> > + genl_rqstp.rq_prog = rqstp->rq_prog;
> > + genl_rqstp.rq_proc = rqstp->rq_proc;
> > + genl_rqstp.rq_stime = rqstp->rq_stime;
> > + genl_rqstp.opcnt = 0;
> > + memcpy(&genl_rqstp.daddr, svc_daddr(rqstp),
> > + sizeof(struct sockaddr));
> > + memcpy(&genl_rqstp.saddr, svc_addr(rqstp),
> > + sizeof(struct sockaddr));
> > +
> > +#ifdef CONFIG_NFSD_V4
> > + if (rqstp->rq_vers == NFS4_VERSION &&
> > + rqstp->rq_proc == NFSPROC4_COMPOUND) {
> > + /* NFSv4 compund */
> > + struct nfsd4_compoundargs *args;
> > + int j;
> > +
> > + args = rqstp->rq_argp;
> > + genl_rqstp.opcnt = args->opcnt;
> > + for (j = 0; j < genl_rqstp.opcnt; j++)
> > + genl_rqstp.opnum[j] =
> > + args->ops[j].opnum;
> > + }
> > +#endif /* CONFIG_NFSD_V4 */
> > +
> > + /*
> > + * Acquire rq_status_counter before reporting the rqst
> > + * fields to the user.
> > + */
> > + if (smp_load_acquire(&rqstp->rq_status_counter) !=
> > + status_counter)
> > + continue;
> > +
> > + ret = nfsd_genl_rpc_status_compose_msg(skb, cb,
> > + &genl_rqstp);
> > + if (ret)
> > + goto out;
> > + }
> > + }
> > +
> > + cb->args[0] = i;
> > + cb->args[1] = rqstp_index;
> > + ret = skb->len;
> > +out:
> > + rcu_read_unlock();
> > +
> > + return ret;
> > +}
> > +
> > +int nfsd_server_nl_rpc_status_get_done(struct netlink_callback *cb)
> > +{
> > + mutex_lock(&nfsd_mutex);
> > + nfsd_put(sock_net(cb->skb->sk));
> > + mutex_unlock(&nfsd_mutex);
> > +
> > return 0;
> > }
> >
>
> I think there is a potential race above. Once you've
>
>
> > @@ -1605,6 +1788,10 @@ static int __init init_nfsd(void)
> > retval = register_filesystem(&nfsd_fs_type);
> > if (retval)
> > goto out_free_all;
> > + retval = genl_register_family(&nfsd_server_nl_family);
> > + if (retval)
> > + goto out_free_all;
> > +
> > return 0;
> > out_free_all:
> > nfsd4_destroy_laundry_wq();
> > @@ -1629,6 +1816,7 @@ static int __init init_nfsd(void)
> >
> > static void __exit exit_nfsd(void)
> > {
> > + genl_unregister_family(&nfsd_server_nl_family);
> > unregister_filesystem(&nfsd_fs_type);
> > nfsd4_destroy_laundry_wq();
> > unregister_cld_notifier();
> > diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> > index 11c14faa6c67..d787bd38c053 100644
> > --- a/fs/nfsd/nfsd.h
> > +++ b/fs/nfsd/nfsd.h
> > @@ -62,6 +62,22 @@ struct readdir_cd {
> > __be32 err; /* 0, nfserr, or nfserr_eof */
> > };
> >
> > +/* Maximum number of operations per session compound */
> > +#define NFSD_MAX_OPS_PER_COMPOUND 50
> > +
> > +struct nfsd_genl_rqstp {
> > + struct sockaddr daddr;
> > + struct sockaddr saddr;
> > + unsigned long rq_flags;
> > + ktime_t rq_stime;
> > + __be32 rq_xid;
> > + u32 rq_vers;
> > + u32 rq_prog;
> > + u32 rq_proc;
> > + /* NFSv4 compund */
> > + u32 opnum[NFSD_MAX_OPS_PER_COMPOUND];
> > + u16 opcnt;
> > +};
> >
> > extern struct svc_program nfsd_program;
> > extern const struct svc_version nfsd_version2, nfsd_version3, nfsd_version4;
> > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> > index 1582af33e204..fad34a7325b3 100644
> > --- a/fs/nfsd/nfssvc.c
> > +++ b/fs/nfsd/nfssvc.c
> > @@ -998,6 +998,15 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
> > if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream))
> > goto out_decode_err;
> >
> > + /*
> > + * Release rq_status_counter setting it to an odd value after the rpc
> > + * request has been properly parsed. rq_status_counter is used to
> > + * notify the consumers if the rqstp fields are stable
> > + * (rq_status_counter is odd) or not meaningful (rq_status_counter
> > + * is even).
> > + */
> > + smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter | 1);
> > +
> > rp = NULL;
> > switch (nfsd_cache_lookup(rqstp, &rp)) {
> > case RC_DOIT:
> > @@ -1015,6 +1024,12 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
> > if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream))
> > goto out_encode_err;
> >
> > + /*
> > + * Release rq_status_counter setting it to an even value after the rpc
> > + * request has been properly processed.
> > + */
> > + smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter + 1);
> > +
> > nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, statp + 1);
> > out_cached_reply:
> > return 1;
> > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > index cbddcf484dba..41bdc913fa71 100644
> > --- a/fs/nfsd/state.h
> > +++ b/fs/nfsd/state.h
> > @@ -174,8 +174,6 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
> >
> > /* Maximum number of slots per session. 160 is useful for long haul TCP */
> > #define NFSD_MAX_SLOTS_PER_SESSION 160
> > -/* Maximum number of operations per session compound */
> > -#define NFSD_MAX_OPS_PER_COMPOUND 50
> > /* Maximum session per slot cache size */
> > #define NFSD_SLOT_CACHE_SIZE 2048
> > /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
> > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> > index dbf5b21feafe..caa20defd255 100644
> > --- a/include/linux/sunrpc/svc.h
> > +++ b/include/linux/sunrpc/svc.h
> > @@ -251,6 +251,7 @@ struct svc_rqst {
> > * net namespace
> > */
> > void ** rq_lease_breaker; /* The v4 client breaking a lease */
> > + unsigned int rq_status_counter; /* RPC processing counter */
> > };
> >
> > /* bits for rq_flags */
>
--
Jeff Layton <jlayton@...nel.org>
Powered by blists - more mailing lists