[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <50054d23-0a89-41ec-b28b-b1ed77d93b00@redhat.com>
Date: Wed, 4 Feb 2026 23:45:17 -0500
From: Waiman Long <llong@...hat.com>
To: Waiman Long <llong@...hat.com>, Al Viro <viro@...iv.linux.org.uk>
Cc: Paul Moore <paul@...l-moore.com>, Eric Paris <eparis@...hat.com>,
Christian Brauner <brauner@...nel.org>, linux-kernel@...r.kernel.org,
audit@...r.kernel.org, Richard Guy Briggs <rgb@...hat.com>,
Ricardo Robaina <rrobaina@...hat.com>
Subject: Re: [PATCH v2] audit: Avoid excessive dput/dget in audit_context
setup and reset paths
On 2/4/26 10:03 PM, Waiman Long wrote:
> On 2/4/26 3:18 PM, Al Viro wrote:
>> On Wed, Feb 04, 2026 at 01:16:15PM -0500, Waiman Long wrote:
>>
>>
>>> Thanks for the detailed explanation. I am thinking about something like
>>> the code diff below. Of course, there are other corner cases like
>>> unshare(2)
>>> that still needs to be handled. Do you think something like this is
>>> viable?
>> Deadlocks aside, the immediate problem here is that consensus number
>> is too
>> low. Take three threads sharing the same fs_struct instance. The
>> first one
>> calls your get_fs_pwd_share(); then the remaining two threads call
>> set_fs_pwd()
>> (e.g. by calling chdir(2) in userland code). The reference stored into
>> fs->pwd_waiter by the first of those two gets overwritten by that stored
>> by the second. When the caller of get_fs_pwd_share() gets to
>> put_fs_pwd_share(),
>> only one of the sleepers gets woken up...
>>
>> And it's very easy to end up with something as simple as chdir("foo")
>> deadlocking -
>> we start with resolving the relative pathname we'd been given, audit
>> wants to
>> record the current directory, on the theory that relative pathname is
>> none too
>> useful in logs without knowing what had it been relative to. Then, in
>> the
>> same thread, you call set_fs_pwd() - after all, that's the main
>> effect of chdir(2).
>> Deadlock...
>>
>> IOW, it's not just unshare(2) that needs to be taken care of -
>> chdir(2) would need
>> to be treated differently.
>
> Now I realize that there is indeed a deadlock problem. Scrap that. Now
> I have a simpler idea that shouldn't have this type of deadlock
> problem. So what do you think about the sample code below?
Well, a complete change log is as follows.
Cheers,
Longman
=======================[ Cut here ]================================
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index b8c46c5a38a0..67e08d8db058 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -32,15 +32,19 @@ void set_fs_root(struct fs_struct *fs, const struct
path *p>
void set_fs_pwd(struct fs_struct *fs, const struct path *path)
{
struct path old_pwd;
+ int count;
path_get(path);
write_seqlock(&fs->seq);
old_pwd = fs->pwd;
fs->pwd = *path;
+ count = fs->pwd_xrefs + 1;
+ fs->pwd_xrefs = 0;
write_sequnlock(&fs->seq);
if (old_pwd.dentry)
- path_put(&old_pwd);
+ while (count--)
+ path_put(&old_pwd);
}
static inline int replace_path(struct path *p, const struct path *old,
const s>
@@ -70,6 +74,8 @@ void chroot_fs_refs(const struct path *old_root, const
struct>
count++;
path_get(new_root);
}
+ count += fs->pwd_xrefs;
+ fs->pwd_xrefs = 0;
write_sequnlock(&fs->seq);
}
task_unlock(p);
@@ -81,8 +87,11 @@ void chroot_fs_refs(const struct path *old_root,
const struc>
void free_fs_struct(struct fs_struct *fs)
{
+ int count = fs->pwd_xrefs + 1;
+
path_put(&fs->root);
- path_put(&fs->pwd);
+ while (count--)
+ path_put(&fs->pwd);
kmem_cache_free(fs_cachep, fs);
}
@@ -110,6 +119,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
if (fs) {
fs->users = 1;
fs->in_exec = 0;
+ fs->pwd_xrefs = 0;
seqlock_init(&fs->seq);
fs->umask = old->umask;
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index 0070764b790a..0d79d51de240 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -8,10 +8,11 @@
#include <linux/seqlock.h>
struct fs_struct {
- int users;
seqlock_t seq;
+ int users;
int umask;
int in_exec;
+ int pwd_xrefs; /* Extra references of pwd */
struct path root, pwd;
} __randomize_layout;
@@ -40,6 +41,31 @@ static inline void get_fs_pwd(struct fs_struct *fs,
struct p>
read_sequnlock_excl(&fs->seq);
}
+static inline void get_fs_pwd_share(struct fs_struct *fs, struct path *pwd)
+{
+ read_seqlock_excl(&fs->seq);
+ *pwd = fs->pwd;
+ if (fs->pwd_xrefs)
+ fs->pwd_xrefs--;
+ else
+ path_get(pwd);
+ read_sequnlock_excl(&fs->seq);
+}
+
+static inline void put_fs_pwd_share(struct fs_struct *fs, struct path *pwd)
+{
+ bool put = false;
+
+ read_seqlock_excl(&fs->seq);
+ if ((fs->pwd.dentry == pwd->dentry) && (fs->pwd.mnt == pwd->mnt))
+ fs->pwd_xrefs++;
+ else
+ put = true;
+ read_sequnlock_excl(&fs->seq);
+ if (put)
+ path_put(pwd);
+}
+
extern bool current_chrooted(void);
static inline int current_umask(void)
Powered by blists - more mailing lists