[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-id: <173889371961.22054.1232506757563289828@noble.neil.brown.name>
Date: Fri, 07 Feb 2025 13:01:59 +1100
From: "NeilBrown" <neilb@...e.de>
To: "Christian Brauner" <brauner@...nel.org>
Cc: "Alexander Viro" <viro@...iv.linux.org.uk>, "Jan Kara" <jack@...e.cz>,
"Linus Torvalds" <torvalds@...ux-foundation.org>,
"Jeff Layton" <jlayton@...nel.org>, "Dave Chinner" <david@...morbit.com>,
linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 10/19] VFS: introduce inode flags to report locking needs
for directory ops
On Fri, 07 Feb 2025, Christian Brauner wrote:
> On Thu, Feb 06, 2025 at 04:42:47PM +1100, NeilBrown wrote:
> > If a filesystem supports _async ops for some directory ops we can take a
> > "shared" lock on i_rwsem otherwise we must take an "exclusive" lock. As
> > the filesystem may support some async ops but not others we need to
> > easily determine which.
> >
> > With this patch we group the ops into 4 groups that are likely be
> > supported together:
> >
> > CREATE: create, link, mkdir, mknod
> > REMOVE: rmdir, unlink
> > RENAME: rename
> > OPEN: atomic_open, create
> >
> > and set S_ASYNC_XXX for each when the inode in initialised.
> >
> > We also add a LOOKUP_REMOVE intent flag which will be used by locking
> > interfaces to help know which group is being used.
> >
> > Signed-off-by: NeilBrown <neilb@...e.de>
> > ---
> > fs/dcache.c | 24 ++++++++++++++++++++++++
> > include/linux/fs.h | 5 +++++
> > include/linux/namei.h | 5 +++--
> > 3 files changed, 32 insertions(+), 2 deletions(-)
> >
> > diff --git a/fs/dcache.c b/fs/dcache.c
> > index e49607d00d2d..37c0f655166d 100644
> > --- a/fs/dcache.c
> > +++ b/fs/dcache.c
> > @@ -384,6 +384,27 @@ static inline void __d_set_inode_and_type(struct dentry *dentry,
> > smp_store_release(&dentry->d_flags, flags);
> > }
> >
> > +static void set_inode_flags(struct inode *inode)
> > +{
> > + const struct inode_operations *i_op = inode->i_op;
> > +
> > + lockdep_assert_held(&inode->i_lock);
> > + if ((i_op->create_async || !i_op->create) &&
> > + (i_op->link_async || !i_op->link) &&
> > + (i_op->symlink_async || !i_op->symlink) &&
> > + (i_op->mkdir_async || !i_op->mkdir) &&
> > + (i_op->mknod_async || !i_op->mknod))
> > + inode->i_flags |= S_ASYNC_CREATE;
> > + if ((i_op->unlink_async || !i_op->unlink) &&
> > + (i_op->mkdir_async || !i_op->mkdir))
> > + inode->i_flags |= S_ASYNC_REMOVE;
> > + if (i_op->rename_async)
> > + inode->i_flags |= S_ASYNC_RENAME;
> > + if (i_op->atomic_open_async ||
> > + (!i_op->atomic_open && i_op->create_async))
> > + inode->i_flags |= S_ASYNC_OPEN;
> > +}
>
> I think this is unpleasant. As I said we should fold _async into the
> normal methods. Then we can add:
>
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index be3ad155ec9f..1d19f72448fc 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -2186,6 +2186,7 @@ int wrap_directory_iterator(struct file *, struct dir_context *,
> { return wrap_directory_iterator(file, ctx, x); }
>
> struct inode_operations {
> + iop_flags_t iop_flags;
> struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
> const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
> int (*permission) (struct mnt_idmap *, struct inode *, int);
>
> which is similar to what I did for
>
> struct file_operations {
> struct module *owner;
> fop_flags_t fop_flags;
>
> and introduce
>
> IOP_ASYNC_CREATE
> IOP_ASYNC_OPEN
>
Ahh - I see where you are going. Interesting.
The iop_flags effectively provides versioning for the functions so we
don't have to embed the version in the name. That would work.
I guess we would handle the mkdir change by changing every current mkdir
to return ERR_PTR() of the current return value and the vfs_mkdir_xx
caller checks if that is NULL and the original dentry is still negative,
and then performs the lookup.
Thanks,
NeilBrown
> etc and then filesystems can just do:
>
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index df9669d4ded7..90c7aeb49466 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -10859,6 +10859,7 @@ static void nfs4_disable_swap(struct inode *inode)
> }
>
> static const struct inode_operations nfs4_dir_inode_operations = {
> + .iop_flags = IOP_ASYNC_CREATE | IOP_ASYNC_OPEN,
> .create = nfs_create,
> .lookup = nfs_lookup,
> .atomic_open = nfs_atomic_open,
>
> and then you can raise S_ASYNC_OPEN and so on based on the flags, not
> the individual methods.
>
> > +
> > static inline void __d_clear_type_and_inode(struct dentry *dentry)
> > {
> > unsigned flags = READ_ONCE(dentry->d_flags);
> > @@ -1893,6 +1914,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
> > raw_write_seqcount_begin(&dentry->d_seq);
> > __d_set_inode_and_type(dentry, inode, add_flags);
> > raw_write_seqcount_end(&dentry->d_seq);
> > + set_inode_flags(inode);
> > fsnotify_update_flags(dentry);
> > spin_unlock(&dentry->d_lock);
> > }
> > @@ -1999,6 +2021,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected)
> >
> > spin_lock(&new->d_lock);
> > __d_set_inode_and_type(new, inode, add_flags);
> > + set_inode_flags(inode);
> > hlist_add_head(&new->d_u.d_alias, &inode->i_dentry);
> > if (!disconnected) {
> > hlist_bl_lock(&sb->s_roots);
> > @@ -2701,6 +2724,7 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode)
> > raw_write_seqcount_begin(&dentry->d_seq);
> > __d_set_inode_and_type(dentry, inode, add_flags);
> > raw_write_seqcount_end(&dentry->d_seq);
> > + set_inode_flags(inode);
> > fsnotify_update_flags(dentry);
> > }
> > __d_rehash(dentry);
> > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > index e414400c2487..9a9282fef347 100644
> > --- a/include/linux/fs.h
> > +++ b/include/linux/fs.h
> > @@ -2361,6 +2361,11 @@ struct super_operations {
> > #define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */
> > #define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */
> >
> > +#define S_ASYNC_CREATE BIT(18) /* create, link, symlink, mkdir, mknod all _async */
> > +#define S_ASYNC_REMOVE BIT(19) /* unlink, mkdir both _async */
> > +#define S_ASYNC_RENAME BIT(20) /* rename_async supported */
> > +#define S_ASYNC_OPEN BIT(21) /* atomic_open_async or create_async supported */
> > +
> > /*
> > * Note that nosuid etc flags are inode-specific: setting some file-system
> > * flags just means all the inodes inherit those flags by default. It might be
> > diff --git a/include/linux/namei.h b/include/linux/namei.h
> > index 76c587a5ec3a..72e351640406 100644
> > --- a/include/linux/namei.h
> > +++ b/include/linux/namei.h
> > @@ -40,10 +40,11 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT};
> > #define LOOKUP_CREATE BIT(17) /* ... in object creation */
> > #define LOOKUP_EXCL BIT(18) /* ... in target must not exist */
> > #define LOOKUP_RENAME_TARGET BIT(19) /* ... in destination of rename() */
> > +#define LOOKUP_REMOVE BIT(20) /* ... in target of object removal */
> >
> > #define LOOKUP_INTENT_FLAGS (LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_EXCL | \
> > - LOOKUP_RENAME_TARGET)
> > -/* 4 spare bits for intent */
> > + LOOKUP_RENAME_TARGET | LOOKUP_REMOVE)
> > +/* 3 spare bits for intent */
> >
> > /* Scoping flags for lookup. */
> > #define LOOKUP_NO_SYMLINKS BIT(24) /* No symlink crossing. */
> > --
> > 2.47.1
> >
>
Powered by blists - more mailing lists