[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20241221012128.307-1-hdanton@sina.com>
Date: Sat, 21 Dec 2024 09:21:20 +0800
From: Hillf Danton <hdanton@...a.com>
To: NeilBrown <neilb@...e.de>
Cc: Alexander Viro <viro@...iv.linux.org.uk>,
Christian Brauner <brauner@...nel.org>,
Jan Kara <jack@...e.cz>,
Peter Zijlstra <peterz@...radead.org>,
Linus Torvalds <torvalds@...ux-foundation.org>,
linux-fsdevel@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH 08/11] VFS: add inode_dir_lock/unlock
On Fri, 20 Dec 2024 13:54:26 +1100 NeilBrown <neilb@...e.de>
> During the transition from providing exclusive locking on the directory
> for directory modifying operation to providing exclusive locking only on
> the dentry with a shared lock on the directory - we need an alternate
> way to provide exclusion on the directory for file systems which haven't
> been converted. This is provided by inode_dir_lock() and
> inode_dir_inlock().
> This uses a bit in i_state for locking, and wait_var_event_spinlock() for
> waiting.
>
Inventing anything like mutex sounds bad.
> Signed-off-by: NeilBrown <neilb@...e.de>
> ---
> fs/inode.c | 3 ++
> fs/namei.c | 81 +++++++++++++++++++++++++++++++++++++---------
> include/linux/fs.h | 5 +++
> 3 files changed, 74 insertions(+), 15 deletions(-)
>
> diff --git a/fs/inode.c b/fs/inode.c
> index 6b4c77268fc0..9ba69837aa56 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -492,6 +492,8 @@ EXPORT_SYMBOL(address_space_init_once);
> */
> void inode_init_once(struct inode *inode)
> {
> + static struct lock_class_key __key;
> +
> memset(inode, 0, sizeof(*inode));
> INIT_HLIST_NODE(&inode->i_hash);
> INIT_LIST_HEAD(&inode->i_devices);
> @@ -501,6 +503,7 @@ void inode_init_once(struct inode *inode)
> INIT_LIST_HEAD(&inode->i_sb_list);
> __address_space_init_once(&inode->i_data);
> i_size_ordered_init(inode);
> + lockdep_init_map(&inode->i_dirlock_map, "I_DIR_LOCKED", &__key, 0);
> }
> EXPORT_SYMBOL(inode_init_once);
>
> diff --git a/fs/namei.c b/fs/namei.c
> index 371c80902c59..68750b15dbf4 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -3364,6 +3364,34 @@ static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap,
> return mode;
> }
>
> +static bool check_dir_locked(struct inode *dir)
> +{
> + if (dir->i_state & I_DIR_LOCKED) {
> + dir->i_state |= I_DIR_LOCK_WAITER;
> + return true;
> + }
> + return false;
> +}
> +
> +static void inode_lock_dir(struct inode *dir)
> +{
> + lock_acquire_exclusive(&dir->i_dirlock_map, 0, 0, NULL, _THIS_IP_);
> + spin_lock(&dir->i_lock);
> + wait_var_event_spinlock(dir, !check_dir_locked(dir),
> + &dir->i_lock);
> + dir->i_state |= I_DIR_LOCKED;
> + spin_unlock(&dir->i_lock);
> +}
> +
> +static void inode_unlock_dir(struct inode *dir)
> +{
> + lock_map_release(&dir->i_dirlock_map);
> + spin_lock(&dir->i_lock);
> + dir->i_state &= ~(I_DIR_LOCKED | I_DIR_LOCK_WAITER);
> + wake_up_var_locked(dir, &dir->i_lock);
> + spin_unlock(&dir->i_lock);
> +}
> +
> /**
> * vfs_create - create new file
> * @idmap: idmap of the mount the inode was found from
> @@ -3396,10 +3424,13 @@ int vfs_create(struct mnt_idmap *idmap, struct inode *dir,
> error = security_inode_create(dir, dentry, mode);
> if (error)
> return error;
> - if (dir->i_op->create_shared)
> + if (dir->i_op->create_shared) {
> error = dir->i_op->create_shared(idmap, dir, dentry, mode, want_excl);
> - else
> + } else {
> + inode_lock_dir(dir);
> error = dir->i_op->create(idmap, dir, dentry, mode, want_excl);
> + inode_unlock_dir(dir);
> + }
> if (!error)
> fsnotify_create(dir, dentry);
> return error;
> @@ -3699,16 +3730,19 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
> file->f_mode |= FMODE_CREATED;
> audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE);
>
> - if (dir_inode->i_op->create_shared)
> + if (dir_inode->i_op->create_shared) {
> error = dir_inode->i_op->create_shared(idmap, dir_inode,
> dentry, mode,
> open_flag & O_EXCL);
> - else if (dir_inode->i_op->create)
> + } else if (dir_inode->i_op->create) {
> + inode_lock_dir(dir_inode);
> error = dir_inode->i_op->create(idmap, dir_inode,
> dentry, mode,
> open_flag & O_EXCL);
> - else
> + inode_unlock_dir(dir_inode);
> + } else {
> error = -EACCES;
> + }
> if (error)
> goto out_dput;
> }
> @@ -4227,10 +4261,13 @@ int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
> if (error)
> return error;
>
> - if (dir->i_op->mknod_shared)
> + if (dir->i_op->mknod_shared) {
> error = dir->i_op->mknod_shared(idmap, dir, dentry, mode, dev);
> - else
> + } else {
> + inode_lock_dir(dir);
> error = dir->i_op->mknod(idmap, dir, dentry, mode, dev);
> + inode_unlock_dir(dir);
> + }
> if (!error)
> fsnotify_create(dir, dentry);
> return error;
> @@ -4360,7 +4397,9 @@ int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
> else if (de)
> dput(de);
> } else {
> + inode_lock_dir(dir);
> error = dir->i_op->mkdir(idmap, dir, dentry, mode);
> + inode_unlock_dir(dir);
> }
> if (!error)
> fsnotify_mkdir(dir, dentry);
> @@ -4521,10 +4560,13 @@ int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir,
> if (error)
> goto out;
>
> - if (dir->i_op->rmdir_shared)
> + if (dir->i_op->rmdir_shared) {
> error = dir->i_op->rmdir_shared(dir, dentry);
> - else
> + } else {
> + inode_lock_dir(dir);
> error = dir->i_op->rmdir(dir, dentry);
> + inode_unlock_dir(dir);
> + }
> if (error)
> goto out;
>
> @@ -4648,10 +4690,13 @@ int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir,
> error = try_break_deleg(target, delegated_inode);
> if (error)
> goto out;
> - if (dir->i_op->unlink_shared)
> + if (dir->i_op->unlink_shared) {
> error = dir->i_op->unlink_shared(dir, dentry);
> - else
> + } else {
> + inode_lock_dir(dir);
> error = dir->i_op->unlink(dir, dentry);
> + inode_unlock_dir(dir);
> + }
> if (!error) {
> dont_mount(dentry);
> detach_mounts(dentry);
> @@ -4792,10 +4837,13 @@ int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
> if (error)
> return error;
>
> - if (dir->i_op->symlink_shared)
> + if (dir->i_op->symlink_shared) {
> error = dir->i_op->symlink_shared(idmap, dir, dentry, oldname);
> - else
> + } else {
> + inode_lock_dir(dir);
> error = dir->i_op->symlink(idmap, dir, dentry, oldname);
> + inode_unlock_dir(dir);
> + }
> if (!error)
> fsnotify_create(dir, dentry);
> return error;
> @@ -4920,10 +4968,13 @@ int vfs_link(struct dentry *old_dentry, struct mnt_idmap *idmap,
> error = try_break_deleg(inode, delegated_inode);
> if (error)
> ;
> - else if (dir->i_op->link_shared)
> + else if (dir->i_op->link_shared) {
> error = dir->i_op->link_shared(old_dentry, dir, new_dentry);
> - else
> + } else {
> + inode_lock_dir(dir);
> error = dir->i_op->link(old_dentry, dir, new_dentry);
> + inode_unlock_dir(dir);
> + }
> }
>
> if (!error && (inode->i_state & I_LINKABLE)) {
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 68eba181175b..3ca92a54f28e 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -722,6 +722,8 @@ struct inode {
> void (*free_inode)(struct inode *);
> };
> struct file_lock_context *i_flctx;
> +
> + struct lockdep_map i_dirlock_map; /* For tracking I_DIR_LOCKED locks */
The cost of this map says no to any attempt inventing mutex in any form.
Powered by blists - more mailing lists