[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230925205545.4135472-1-mjguzik@gmail.com>
Date: Mon, 25 Sep 2023 22:55:45 +0200
From: Mateusz Guzik <mjguzik@...il.com>
To: brauner@...nel.org
Cc: viro@...iv.linux.org.uk, linux-kernel@...r.kernel.org,
linux-fsdevel@...r.kernel.org, torvalds@...ux-foundation.org,
Mateusz Guzik <mjguzik@...il.com>
Subject: [PATCH] vfs: shave work on failed file open
Failed opens (mostly ENOENT) legitimately happen a lot, for example here
are stats from stracing kernel build for few seconds (strace -fc make):
% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ------------------
0.76 0.076233 5 15040 3688 openat
(this is tons of header files tried in different paths)
Apart from a rare corner case where the file object is fully constructed
and we need to abort, there is a lot of overhead which can be avoided.
Most notably delegation of freeing to task_work, which comes with an
enormous cost (see 021a160abf62 ("fs: use __fput_sync in close(2)" for
an example).
Benched with will-it-scale with a custom testcase based on
tests/open1.c:
[snip]
while (1) {
int fd = open("/tmp/nonexistent", O_RDONLY);
assert(fd == -1);
(*iterations)++;
}
[/snip]
Sapphire Rapids, one worker in single-threaded case (ops/s):
before: 1950013
after: 2914973 (+49%)
Signed-off-by: Mateusz Guzik <mjguzik@...il.com>
---
fs/file_table.c | 39 +++++++++++++++++++++++++++++++++++++++
fs/namei.c | 2 +-
include/linux/file.h | 1 +
3 files changed, 41 insertions(+), 1 deletion(-)
diff --git a/fs/file_table.c b/fs/file_table.c
index ee21b3da9d08..320dc1f9aa0e 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -82,6 +82,16 @@ static inline void file_free(struct file *f)
call_rcu(&f->f_rcuhead, file_free_rcu);
}
+static inline void file_free_badopen(struct file *f)
+{
+ BUG_ON(f->f_mode & (FMODE_BACKING | FMODE_OPENED));
+ security_file_free(f);
+ put_cred(f->f_cred);
+ if (likely(!(f->f_mode & FMODE_NOACCOUNT)))
+ percpu_counter_dec(&nr_files);
+ kmem_cache_free(filp_cachep, f);
+}
+
/*
* Return the total number of open files in the system
*/
@@ -468,6 +478,35 @@ void __fput_sync(struct file *file)
EXPORT_SYMBOL(fput);
EXPORT_SYMBOL(__fput_sync);
+/*
+ * Clean up after failing to open (e.g., open(2) returns with -ENOENT).
+ *
+ * This represents opportunities to shave on work in the common case compared
+ * to the usual fput:
+ * 1. vast majority of the time FMODE_OPENED is not set, meaning there is no
+ * need to delegate to task_work
+ * 2. if the above holds then we are guaranteed we have the only reference with
+ * nobody else seeing the file, thus no need to use atomics to release it
+ * 3. then there is no need to delegate freeing to RCU
+ */
+void fput_badopen(struct file *file)
+{
+ if (unlikely(file->f_mode & (FMODE_BACKING | FMODE_OPENED))) {
+ fput(file);
+ return;
+ }
+
+ if (WARN_ON(atomic_long_read(&file->f_count) != 1)) {
+ fput(file);
+ return;
+ }
+
+ /* zero out the ref count to appease possible asserts */
+ atomic_long_set(&file->f_count, 0);
+ file_free_badopen(file);
+}
+EXPORT_SYMBOL(fput_badopen);
+
void __init files_init(void)
{
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
diff --git a/fs/namei.c b/fs/namei.c
index 567ee547492b..67579fe30b28 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3802,7 +3802,7 @@ static struct file *path_openat(struct nameidata *nd,
WARN_ON(1);
error = -EINVAL;
}
- fput(file);
+ fput_badopen(file);
if (error == -EOPENSTALE) {
if (flags & LOOKUP_RCU)
error = -ECHILD;
diff --git a/include/linux/file.h b/include/linux/file.h
index 6e9099d29343..96300e27d9a8 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -15,6 +15,7 @@
struct file;
extern void fput(struct file *);
+extern void fput_badopen(struct file *);
struct file_operations;
struct task_struct;
--
2.39.2
Powered by blists - more mailing lists