lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20250304165728.491785-1-mjguzik@gmail.com>
Date: Tue,  4 Mar 2025 17:57:28 +0100
From: Mateusz Guzik <mjguzik@...il.com>
To: brauner@...nel.org,
	viro@...iv.linux.org.uk
Cc: jack@...e.cz,
	linux-kernel@...r.kernel.org,
	linux-fsdevel@...r.kernel.org,
	Mateusz Guzik <mjguzik@...il.com>
Subject: [PATCH v3] vfs: avoid spurious dentry ref/unref cycle on open

Opening a file grabs a reference on the terminal dentry in
__legitimize_path(), then another one in do_dentry_open() and finally
drops the initial reference in terminate_walk().

That's 2 modifications which don't need to be there -- do_dentry_open()
can consume the already held reference instead.

When benchmarking on a 20-core vm using will-it-scale's open3_processes
("Same file open/close"), the results are (ops/s):
before: 3087010
after:  4173977 (+35%)

Signed-off-by: Mateusz Guzik <mjguzik@...il.com>
---

Al, while I originally brought this up you objected to my patch, which I
then rewrote to the form below. You wrote your own more invovled
variant, but the effort to get this in seem stalled:
https://lore.kernel.org/linux-fsdevel/20240822003359.GO504335@ZenIV/

(I think there was a bigger git branch somewhere, but now I can't find
it.)

The lockref thing is increasingly getting in the way of some other stuff
and is just overhead which is not need to be there.

If you don't have time to finish your patches, how about the variant
below? This is rebased v2 I sent a while back and which got no feedback.

I am indifferent as to what lands, as long as the extra ref cycle is
eliminated.

 fs/internal.h |  3 ++-
 fs/namei.c    | 15 ++++++++++++---
 fs/open.c     | 44 +++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 55 insertions(+), 7 deletions(-)

diff --git a/fs/internal.h b/fs/internal.h
index 3d05a989e4fa..1d0eb25a7598 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -193,7 +193,8 @@ int chmod_common(const struct path *path, umode_t mode);
 int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
 		int flag);
 int chown_common(const struct path *path, uid_t user, gid_t group);
-extern int vfs_open(const struct path *, struct file *);
+int vfs_open_consume(struct path *, struct file *);
+int vfs_open(const struct path *, struct file *);
 
 /*
  * inode.c
diff --git a/fs/namei.c b/fs/namei.c
index d00443e38d3a..8ce8e6038346 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3796,6 +3796,7 @@ static const char *open_last_lookups(struct nameidata *nd,
 static int do_open(struct nameidata *nd,
 		   struct file *file, const struct open_flags *op)
 {
+	struct vfsmount *mnt;
 	struct mnt_idmap *idmap;
 	int open_flag = op->open_flag;
 	bool do_truncate;
@@ -3833,11 +3834,17 @@ static int do_open(struct nameidata *nd,
 		error = mnt_want_write(nd->path.mnt);
 		if (error)
 			return error;
+		/*
+		 * We grab an additional reference here because after the call to
+		 * vfs_open_consume() we no longer own the reference in nd->path.mnt
+		 * while we need to undo write access below.
+		 */
+		mnt = mntget(nd->path.mnt);
 		do_truncate = true;
 	}
 	error = may_open(idmap, &nd->path, acc_mode, open_flag);
 	if (!error && !(file->f_mode & FMODE_OPENED))
-		error = vfs_open(&nd->path, file);
+		error = vfs_open_consume(&nd->path, file);
 	if (!error)
 		error = security_file_post_open(file, op->acc_mode);
 	if (!error && do_truncate)
@@ -3846,8 +3853,10 @@ static int do_open(struct nameidata *nd,
 		WARN_ON(1);
 		error = -EINVAL;
 	}
-	if (do_truncate)
-		mnt_drop_write(nd->path.mnt);
+	if (do_truncate) {
+		mnt_drop_write(mnt);
+		mntput(mnt);
+	}
 	return error;
 }
 
diff --git a/fs/open.c b/fs/open.c
index f2fcfaeb2232..fc1c6118eb30 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -891,6 +891,15 @@ static inline int file_get_write_access(struct file *f)
 	return error;
 }
 
+/*
+ * Populate struct file
+ *
+ * NOTE: it assumes f_path is populated and consumes the caller's reference.
+ *
+ * The caller must not path_put on it regardless of the error code -- the
+ * routine will either clean it up on its own or rely on fput, which must
+ * be issued anyway.
+ */
 static int do_dentry_open(struct file *f,
 			  int (*open)(struct inode *, struct file *))
 {
@@ -898,7 +907,6 @@ static int do_dentry_open(struct file *f,
 	struct inode *inode = f->f_path.dentry->d_inode;
 	int error;
 
-	path_get(&f->f_path);
 	f->f_inode = inode;
 	f->f_mapping = inode->i_mapping;
 	f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
@@ -1042,6 +1050,7 @@ int finish_open(struct file *file, struct dentry *dentry,
 	BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
 
 	file->f_path.dentry = dentry;
+	path_get(&file->f_path);
 	return do_dentry_open(file, open);
 }
 EXPORT_SYMBOL(finish_open);
@@ -1074,15 +1083,22 @@ char *file_path(struct file *filp, char *buf, int buflen)
 EXPORT_SYMBOL(file_path);
 
 /**
- * vfs_open - open the file at the given path
+ * vfs_open_consume - open the file at the given path and consume the reference
  * @path: path to open
  * @file: newly allocated file with f_flag initialized
  */
-int vfs_open(const struct path *path, struct file *file)
+int vfs_open_consume(struct path *path, struct file *file)
 {
 	int ret;
 
 	file->f_path = *path;
+	path->mnt = NULL;
+	path->dentry = NULL;
+
+	/*
+	 * do_dentry_open() consumes the reference regardless of its
+	 * return value
+	 */
 	ret = do_dentry_open(file, NULL);
 	if (!ret) {
 		/*
@@ -1095,6 +1111,27 @@ int vfs_open(const struct path *path, struct file *file)
 	return ret;
 }
 
+/**
+ * vfs_open - open the file at the given path
+ * @path: path to open
+ * @file: newly allocated file with f_flag initialized
+ *
+ * See commentary in vfs_open_consume. The difference here is that this routine
+ * grabs its own reference and does not clean up the passed path.
+ */
+int vfs_open(const struct path *path, struct file *file)
+{
+	int ret;
+
+	file->f_path = *path;
+	path_get(&file->f_path);
+	ret = do_dentry_open(file, NULL);
+	if (!ret) {
+		fsnotify_open(file);
+	}
+	return ret;
+}
+
 struct file *dentry_open(const struct path *path, int flags,
 			 const struct cred *cred)
 {
@@ -1197,6 +1234,7 @@ struct file *kernel_file_open(const struct path *path, int flags,
 		return f;
 
 	f->f_path = *path;
+	path_get(&f->f_path);
 	error = do_dentry_open(f, NULL);
 	if (error) {
 		fput(f);
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ