[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20240806163256.882140-1-mjguzik@gmail.com>
Date: Tue, 6 Aug 2024 18:32:56 +0200
From: Mateusz Guzik <mjguzik@...il.com>
To: brauner@...nel.org,
viro@...iv.linux.org.uk
Cc: jack@...e.cz,
linux-kernel@...r.kernel.org,
linux-fsdevel@...r.kernel.org,
Mateusz Guzik <mjguzik@...il.com>
Subject: [PATCH v2] vfs: avoid spurious dentry ref/unref cycle on open
Opening a file grabs a reference on the terminal dentry in
__legitimize_path(), then another one in do_dentry_open() and finally
drops the initial reference in terminate_walk().
That's 2 modifications which don't need to be there -- do_dentry_open can
consume the already held reference instead.
In order to facilitate some debugging a dedicated namei state flag was
added to denote this happened.
When benchmarking on a 20-core vm using will-it-scale's open3_processes
("Same file open/close"), the results are (ops/s):
before: 3087010
after: 4173977 (+35%)
Signed-off-by: Mateusz Guzik <mjguzik@...il.com>
---
- drop the debug flag
- tweak commentary
- make vfs_open_consume clean up the path
perhaps this will do the trick? :)
fs/internal.h | 3 ++-
fs/namei.c | 15 ++++++++++++---
fs/open.c | 44 +++++++++++++++++++++++++++++++++++++++++---
3 files changed, 55 insertions(+), 7 deletions(-)
diff --git a/fs/internal.h b/fs/internal.h
index cdd73209eecb..6899e6ec9394 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -193,7 +193,8 @@ int chmod_common(const struct path *path, umode_t mode);
int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
int flag);
int chown_common(const struct path *path, uid_t user, gid_t group);
-extern int vfs_open(const struct path *, struct file *);
+int vfs_open_consume(struct path *, struct file *);
+int vfs_open(const struct path *, struct file *);
/*
* inode.c
diff --git a/fs/namei.c b/fs/namei.c
index 1bf081959066..44ea5353ae26 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3683,6 +3683,7 @@ static const char *open_last_lookups(struct nameidata *nd,
static int do_open(struct nameidata *nd,
struct file *file, const struct open_flags *op)
{
+ struct vfsmount *mnt;
struct mnt_idmap *idmap;
int open_flag = op->open_flag;
bool do_truncate;
@@ -3720,11 +3721,17 @@ static int do_open(struct nameidata *nd,
error = mnt_want_write(nd->path.mnt);
if (error)
return error;
+ /*
+ * We grab an additional reference here because after the call to
+ * vfs_open_consume() we no longer own the reference in nd->path.mnt
+ * while we need to undo write access below.
+ */
+ mnt = mntget(nd->path.mnt);
do_truncate = true;
}
error = may_open(idmap, &nd->path, acc_mode, open_flag);
if (!error && !(file->f_mode & FMODE_OPENED))
- error = vfs_open(&nd->path, file);
+ error = vfs_open_consume(&nd->path, file);
if (!error)
error = security_file_post_open(file, op->acc_mode);
if (!error && do_truncate)
@@ -3733,8 +3740,10 @@ static int do_open(struct nameidata *nd,
WARN_ON(1);
error = -EINVAL;
}
- if (do_truncate)
- mnt_drop_write(nd->path.mnt);
+ if (do_truncate) {
+ mnt_drop_write(mnt);
+ mntput(mnt);
+ }
return error;
}
diff --git a/fs/open.c b/fs/open.c
index 22adbef7ecc2..2fdfb3133d0f 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -905,6 +905,15 @@ static inline int file_get_write_access(struct file *f)
return error;
}
+/*
+ * Populate struct file
+ *
+ * NOTE: it assumes f_path is populated and consumes the caller's reference.
+ *
+ * The caller must not path_put on it regardless of the error code -- the
+ * routine will either clean it up on its own or rely on fput, which must
+ * be issued anyway.
+ */
static int do_dentry_open(struct file *f,
int (*open)(struct inode *, struct file *))
{
@@ -912,7 +921,6 @@ static int do_dentry_open(struct file *f,
struct inode *inode = f->f_path.dentry->d_inode;
int error;
- path_get(&f->f_path);
f->f_inode = inode;
f->f_mapping = inode->i_mapping;
f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
@@ -1045,6 +1053,7 @@ int finish_open(struct file *file, struct dentry *dentry,
BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
file->f_path.dentry = dentry;
+ path_get(&file->f_path);
return do_dentry_open(file, open);
}
EXPORT_SYMBOL(finish_open);
@@ -1077,15 +1086,22 @@ char *file_path(struct file *filp, char *buf, int buflen)
EXPORT_SYMBOL(file_path);
/**
- * vfs_open - open the file at the given path
+ * vfs_open_consume - open the file at the given path and consume the reference
* @path: path to open
* @file: newly allocated file with f_flag initialized
*/
-int vfs_open(const struct path *path, struct file *file)
+int vfs_open_consume(struct path *path, struct file *file)
{
int ret;
file->f_path = *path;
+ path->mnt = NULL;
+ path->dentry = NULL;
+
+ /*
+ * do_dentry_open() does the reference consuming regardless of its
+ * return value
+ */
ret = do_dentry_open(file, NULL);
if (!ret) {
/*
@@ -1098,6 +1114,27 @@ int vfs_open(const struct path *path, struct file *file)
return ret;
}
+/**
+ * vfs_open - open the file at the given path
+ * @path: path to open
+ * @file: newly allocated file with f_flag initialized
+ *
+ * See commentary in vfs_open_consume. The difference here is that this routine
+ * grabs its own reference and does not clean up the passed path.
+ */
+int vfs_open(const struct path *path, struct file *file)
+{
+ int ret;
+
+ file->f_path = *path;
+ path_get(&file->f_path);
+ ret = do_dentry_open(file, NULL);
+ if (!ret) {
+ fsnotify_open(file);
+ }
+ return ret;
+}
+
struct file *dentry_open(const struct path *path, int flags,
const struct cred *cred)
{
@@ -1183,6 +1220,7 @@ struct file *kernel_file_open(const struct path *path, int flags,
return f;
f->f_path = *path;
+ path_get(&f->f_path);
error = do_dentry_open(f, NULL);
if (error) {
fput(f);
--
2.43.0
Powered by blists - more mailing lists