[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <8E06CE2C-E8FB-4727-9ABF-C1EA6AAC067E@oracle.com>
Date: Thu, 3 Nov 2022 15:42:56 +0000
From: Chuck Lever III <chuck.lever@...cle.com>
To: Jeff Layton <jlayton@...nel.org>
CC: Theodore Ts'o <tytso@....edu>,
"adilger.kernel@...ger.ca" <adilger.kernel@...ger.ca>,
"Darrick J. Wong" <djwong@...nel.org>,
"david@...morbit.com" <david@...morbit.com>,
"trondmy@...merspace.com" <trondmy@...merspace.com>,
Neil Brown <neilb@...e.de>, Al Viro <viro@...iv.linux.org.uk>,
"zohar@...ux.ibm.com" <zohar@...ux.ibm.com>,
"xiubli@...hat.com" <xiubli@...hat.com>,
"lczerner@...hat.com" <lczerner@...hat.com>,
"jack@...e.cz" <jack@...e.cz>, Bruce Fields <bfields@...ldses.org>,
"brauner@...nel.org" <brauner@...nel.org>,
"fweimer@...hat.com" <fweimer@...hat.com>,
"linux-btrfs@...r.kernel.org" <linux-btrfs@...r.kernel.org>,
"linux-fsdevel@...r.kernel.org" <linux-fsdevel@...r.kernel.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"ceph-devel@...r.kernel.org" <ceph-devel@...r.kernel.org>,
"linux-ext4@...r.kernel.org" <linux-ext4@...r.kernel.org>,
Linux NFS Mailing List <linux-nfs@...r.kernel.org>,
"linux-xfs@...r.kernel.org" <linux-xfs@...r.kernel.org>
Subject: Re: [PATCH v7 7/9] nfsd: use the getattr operation to fetch i_version
> On Oct 17, 2022, at 6:57 AM, Jeff Layton <jlayton@...nel.org> wrote:
>
> Now that we can call into vfs_getattr to get the i_version field, use
> that facility to fetch it instead of doing it in nfsd4_change_attribute.
>
> Neil also pointed out recently that IS_I_VERSION directory operations
> are always logged, and so we only need to mitigate the rollback problem
> on regular files. Also, we don't need to factor in the ctime when
> reexporting NFS or Ceph.
>
> Set the STATX_VERSION (and BTIME) bits in the request when we're dealing
> with a v4 request. Then, instead of looking at IS_I_VERSION when
> generating the change attr, look at the result mask and only use it if
> STATX_VERSION is set.
>
> Change nfsd4_change_attribute to only factor in the ctime if it's a regular
> file and the fs doesn't advertise STATX_ATTR_VERSION_MONOTONIC.
>
> Reviewed-by: NeilBrown <neilb@...e.de>
> Signed-off-by: Jeff Layton <jlayton@...nel.org>
Acked-by: Chuck Lever <chuck.lever@...cle.com>
> ---
> fs/nfsd/nfs4xdr.c | 4 +++-
> fs/nfsd/nfsfh.c | 53 +++++++++++++++++++++++++++++++----------------
> fs/nfsd/vfs.h | 7 ++++++-
> 3 files changed, 44 insertions(+), 20 deletions(-)
>
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index bcfeb1a922c0..c19b6b00b620 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -2906,7 +2906,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
> goto out;
> }
>
> - err = vfs_getattr(&path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
> + err = vfs_getattr(&path, &stat,
> + STATX_BASIC_STATS | STATX_BTIME | STATX_VERSION,
> + AT_STATX_SYNC_AS_STAT);
> if (err)
> goto out_nfserr;
> if (!(stat.result_mask & STATX_BTIME))
> diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
> index 7030d9209903..21b64ac97a06 100644
> --- a/fs/nfsd/nfsfh.c
> +++ b/fs/nfsd/nfsfh.c
> @@ -628,6 +628,10 @@ void fh_fill_pre_attrs(struct svc_fh *fhp)
> stat.mtime = inode->i_mtime;
> stat.ctime = inode->i_ctime;
> stat.size = inode->i_size;
> + if (v4 && IS_I_VERSION(inode)) {
> + stat.version = inode_query_iversion(inode);
> + stat.result_mask |= STATX_VERSION;
> + }
> }
> if (v4)
> fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
> @@ -659,6 +663,10 @@ void fh_fill_post_attrs(struct svc_fh *fhp)
> if (err) {
> fhp->fh_post_saved = false;
> fhp->fh_post_attr.ctime = inode->i_ctime;
> + if (v4 && IS_I_VERSION(inode)) {
> + fhp->fh_post_attr.version = inode_query_iversion(inode);
> + fhp->fh_post_attr.result_mask |= STATX_VERSION;
> + }
> } else
> fhp->fh_post_saved = true;
> if (v4)
> @@ -750,28 +758,37 @@ enum fsid_source fsid_source(const struct svc_fh *fhp)
> }
>
> /*
> - * We could use i_version alone as the change attribute. However,
> - * i_version can go backwards after a reboot. On its own that doesn't
> - * necessarily cause a problem, but if i_version goes backwards and then
> - * is incremented again it could reuse a value that was previously used
> - * before boot, and a client who queried the two values might
> - * incorrectly assume nothing changed.
> + * We could use i_version alone as the change attribute. However, i_version
> + * can go backwards on a regular file after an unclean shutdown. On its own
> + * that doesn't necessarily cause a problem, but if i_version goes backwards
> + * and then is incremented again it could reuse a value that was previously
> + * used before boot, and a client who queried the two values might incorrectly
> + * assume nothing changed.
> + *
> + * By using both ctime and the i_version counter we guarantee that as long as
> + * time doesn't go backwards we never reuse an old value. If the filesystem
> + * advertises STATX_ATTR_VERSION_MONOTONIC, then this mitigation is not needed.
> *
> - * By using both ctime and the i_version counter we guarantee that as
> - * long as time doesn't go backwards we never reuse an old value.
> + * We only need to do this for regular files as well. For directories, we
> + * assume that the new change attr is always logged to stable storage in some
> + * fashion before the results can be seen.
> */
> u64 nfsd4_change_attribute(struct kstat *stat, struct inode *inode)
> {
> + u64 chattr;
> +
> if (inode->i_sb->s_export_op->fetch_iversion)
> return inode->i_sb->s_export_op->fetch_iversion(inode);
> - else if (IS_I_VERSION(inode)) {
> - u64 chattr;
> -
> - chattr = stat->ctime.tv_sec;
> - chattr <<= 30;
> - chattr += stat->ctime.tv_nsec;
> - chattr += inode_query_iversion(inode);
> - return chattr;
> - } else
> - return time_to_chattr(&stat->ctime);
> + if (stat->result_mask & STATX_VERSION) {
> + chattr = stat->version;
> +
> + if (S_ISREG(inode->i_mode) &&
> + !(stat->attributes & STATX_ATTR_VERSION_MONOTONIC)) {
> + chattr += (u64)stat->ctime.tv_sec << 30;
> + chattr += stat->ctime.tv_nsec;
> + }
> + } else {
> + chattr = time_to_chattr(&stat->ctime);
> + }
> + return chattr;
> }
> diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
> index 120521bc7b24..c98e13ec37b2 100644
> --- a/fs/nfsd/vfs.h
> +++ b/fs/nfsd/vfs.h
> @@ -168,9 +168,14 @@ static inline void fh_drop_write(struct svc_fh *fh)
>
> static inline __be32 fh_getattr(const struct svc_fh *fh, struct kstat *stat)
> {
> + u32 request_mask = STATX_BASIC_STATS;
> struct path p = {.mnt = fh->fh_export->ex_path.mnt,
> .dentry = fh->fh_dentry};
> - return nfserrno(vfs_getattr(&p, stat, STATX_BASIC_STATS,
> +
> + if (fh->fh_maxsize == NFS4_FHSIZE)
> + request_mask |= (STATX_BTIME | STATX_VERSION);
> +
> + return nfserrno(vfs_getattr(&p, stat, request_mask,
> AT_STATX_SYNC_AS_STAT));
> }
>
> --
> 2.37.3
>
--
Chuck Lever
Powered by blists - more mailing lists