[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260206201918.1988344-2-longman@redhat.com>
Date: Fri, 6 Feb 2026 15:19:16 -0500
From: Waiman Long <longman@...hat.com>
To: Paul Moore <paul@...l-moore.com>,
Eric Paris <eparis@...hat.com>,
Christian Brauner <brauner@...nel.org>,
Al Viro <viro@...iv.linux.org.uk>
Cc: linux-kernel@...r.kernel.org,
audit@...r.kernel.org,
Richard Guy Briggs <rgb@...hat.com>,
Ricardo Robaina <rrobaina@...hat.com>,
Waiman Long <longman@...hat.com>
Subject: [PATCH v3 1/2] fs: Add a pool of extra fs->pwd references to fs_struct
When the audit subsystem is enabled, it can do a lot of get_fs_pwd()
calls to get references to fs->pwd and then releasing those references
back with path_put() later. That may cause a lot of spinlock contention
on a single pwd's dentry lock because of the constant changes to the
reference count when there are many processes on the same working
directory actively doing open/close system calls. This can cause
noticeable performance regresssion when compared with the case where
the audit subsystem is turned off especially on systems with a lot of
CPUs which is becoming more common these days.
A simple and elegant solution to avoid this kind of performance
regression is to add a common pool of extra fs->pwd references inside
the fs_struct. When a caller needs a pwd reference, it can borrow one
from pool, if available, to avoid an explicit path_get(). When it is
time to release the reference, it can put it back into the common pool
if fs->pwd isn't changed before without doing a path_put(). We still
need to acquire the fs's spinlock, but fs_struct is more distributed
and it is less common to have many tasks sharing a single fs_struct.
A new set of get_fs_pwd_pool/put_fs_pwd_pool() APIs are introduced
with this patch to enable other subsystems to acquire and release
a pwd reference from the common pool without doing unnecessary
path_get/path_put().
Besides fs/fs_struct.c, the copy_mnt_ns() function of fs/namespace.c is
also modified to properly handle the extra pwd references, if available.
Signed-off-by: Waiman Long <longman@...hat.com>
---
fs/fs_struct.c | 26 +++++++++++++++++++++-----
fs/namespace.c | 8 ++++++++
include/linux/fs_struct.h | 30 +++++++++++++++++++++++++++++-
3 files changed, 58 insertions(+), 6 deletions(-)
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index b8c46c5a38a0..621fe1677913 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -32,15 +32,19 @@ void set_fs_root(struct fs_struct *fs, const struct path *path)
void set_fs_pwd(struct fs_struct *fs, const struct path *path)
{
struct path old_pwd;
+ int count;
path_get(path);
write_seqlock(&fs->seq);
old_pwd = fs->pwd;
fs->pwd = *path;
+ count = fs->pwd_refs + 1;
+ fs->pwd_refs = 0;
write_sequnlock(&fs->seq);
if (old_pwd.dentry)
- path_put(&old_pwd);
+ while (count--)
+ path_put(&old_pwd);
}
static inline int replace_path(struct path *p, const struct path *old, const struct path *new)
@@ -62,10 +66,15 @@ void chroot_fs_refs(const struct path *old_root, const struct path *new_root)
task_lock(p);
fs = p->fs;
if (fs) {
- int hits = 0;
+ int hits;
+
write_seqlock(&fs->seq);
+ hits = replace_path(&fs->pwd, old_root, new_root);
+ if (hits && fs->pwd_refs) {
+ count += fs->pwd_refs;
+ fs->pwd_refs = 0;
+ }
hits += replace_path(&fs->root, old_root, new_root);
- hits += replace_path(&fs->pwd, old_root, new_root);
while (hits--) {
count++;
path_get(new_root);
@@ -81,8 +90,11 @@ void chroot_fs_refs(const struct path *old_root, const struct path *new_root)
void free_fs_struct(struct fs_struct *fs)
{
+ int count = fs->pwd_refs + 1;
+
path_put(&fs->root);
- path_put(&fs->pwd);
+ while (count--)
+ path_put(&fs->pwd);
kmem_cache_free(fs_cachep, fs);
}
@@ -110,6 +122,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
if (fs) {
fs->users = 1;
fs->in_exec = 0;
+ fs->pwd_refs = 0;
seqlock_init(&fs->seq);
fs->umask = old->umask;
@@ -117,7 +130,10 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
fs->root = old->root;
path_get(&fs->root);
fs->pwd = old->pwd;
- path_get(&fs->pwd);
+ if (old->pwd_refs)
+ old->pwd_refs--;
+ else
+ path_get(&fs->pwd);
read_sequnlock_excl(&old->seq);
}
return fs;
diff --git a/fs/namespace.c b/fs/namespace.c
index c58674a20cad..a2323ba84d76 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4135,6 +4135,14 @@ struct mnt_namespace *copy_mnt_ns(u64 flags, struct mnt_namespace *ns,
* as belonging to new namespace. We have already acquired a private
* fs_struct, so tsk->fs->lock is not needed.
*/
+ if (new_fs)
+ WARN_ON_ONCE(new_fs->users != 1);
+
+ /* Release the extra pwd references of new_fs, if present. */
+ while (new_fs && new_fs->pwd_refs) {
+ path_put(&new_fs->pwd);
+ new_fs->pwd_refs--;
+ }
p = old;
q = new;
while (p) {
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index 0070764b790a..093648e65c20 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -8,10 +8,11 @@
#include <linux/seqlock.h>
struct fs_struct {
- int users;
seqlock_t seq;
+ int users;
int umask;
int in_exec;
+ int pwd_refs; /* A pool of extra pwd references */
struct path root, pwd;
} __randomize_layout;
@@ -40,6 +41,33 @@ static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd)
read_sequnlock_excl(&fs->seq);
}
+/* Acquire a pwd reference from the pwd_refs pool, if available */
+static inline void get_fs_pwd_pool(struct fs_struct *fs, struct path *pwd)
+{
+ read_seqlock_excl(&fs->seq);
+ *pwd = fs->pwd;
+ if (fs->pwd_refs)
+ fs->pwd_refs--;
+ else
+ path_get(pwd);
+ read_sequnlock_excl(&fs->seq);
+}
+
+/* Release a pwd reference back to the pwd_refs pool, if appropriate */
+static inline void put_fs_pwd_pool(struct fs_struct *fs, struct path *pwd)
+{
+ bool put = false;
+
+ read_seqlock_excl(&fs->seq);
+ if ((fs->pwd.dentry == pwd->dentry) && (fs->pwd.mnt == pwd->mnt))
+ fs->pwd_refs++;
+ else
+ put = true;
+ read_sequnlock_excl(&fs->seq);
+ if (put)
+ path_put(pwd);
+}
+
extern bool current_chrooted(void);
static inline int current_umask(void)
--
2.52.0
Powered by blists - more mailing lists