[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20260122062458.GA3330204@ZenIV>
Date: Thu, 22 Jan 2026 06:24:58 +0000
From: Al Viro <viro@...iv.linux.org.uk>
To: Max Kellermann <max.kellermann@...os.com>
Cc: linux-fsdevel@...r.kernel.org, Christian Brauner <brauner@...nel.org>,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v3 20/21] __dentry_kill(): new locking scheme
On Wed, Jan 21, 2026 at 09:55:50PM +0000, Al Viro wrote:
> It's not so much an intention as having nothing good to wait on.
>
> Theoretically, there's a way to deal with that - dentry in the middle
> of stuck iput() from dentry_unlink_inode() from __dentry_kill() is
> guaranteed to be
> * negative
> * unhashed
> * not in-lookup
>
> What we could do is adding an hlist_head aliased with ->d_alias, ->d_rcu
> and ->d_in_lookup_hash. Then select_collect2() running into a dentry
> with negative refcount would set _that_ as victim and bugger off, same
> as we do for ones on shrink lists.
No need to bother with hlist - all we need is a single-linked list that
does not need to be generic. Variant below seems to survive the local
beating...
>From 4d1f759a6338703f07a27a4d8e5814604968fb5b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@...iv.linux.org.uk>
Date: Wed, 21 Jan 2026 18:17:12 -0500
Subject: [PATCH] get rid of busy-waiting in shrink_dcache_parent()
There's a case in which shrink_dcache_parent() ends up busy-waiting:
if some dentry in the subtree in question is found to be in process
of being evicted by another thread. We need to wait for that to
finish so that parent would no longer be pinned; otherwise we'd end
up with ridiculous situation - nothing in a subtree is busy, and call
of shrink_dcache_parent() fails to evict some directory only because
memory pressure initiated eviction of some of its children before we
got to those.
The reason why we busy-wait is the lack of anything convenient we could
wait on. That can be fixed, though, and without growing struct dentry.
Dentries in question are
* already marked dead (negative ->d_count)
* already negative
* already unhashed
* already not in in-lookup hash
* yet to get removed from ->d_sib and get DCACHE_DENTRY_KILLED in
flags.
Neither ->d_alias nor the fields overlapping it (->d_rcu and ->d_in_lookup_hash)
are going to be accessed for these dentries until after dentry_unlist(). What's
more, ->d_alias.next is guaranteed to be NULL.
So we can embed struct completion into struct select_data and (ab)use
->d_alias.next for linked list of struct select_data instances.
If dentry_unlist() finds ->d_alias.next non-NULL, it carefully goes over
the list and calls complete() for each of those.
That way select_collect2() can treat negative ->d_count the same way it
deals with dentries on other thread's shrink list - grab rcu_read_lock(),
stash dentry into data.victim and tell d_walk() to stop.
Should shrink_dcache_parent() run into that case, it would attach its
select_data to victim dentry, drop whatever normal eviction candidates
it has gathered and wait for completion. Voila...
Signed-off-by: Al Viro <viro@...iv.linux.org.uk>
---
fs/dcache.c | 84 ++++++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 71 insertions(+), 13 deletions(-)
diff --git a/fs/dcache.c b/fs/dcache.c
index cf865c12cdf9..283c8ab767e1 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -605,6 +605,54 @@ void d_drop(struct dentry *dentry)
}
EXPORT_SYMBOL(d_drop);
+struct select_data {
+ struct dentry *start;
+ union {
+ long found;
+ struct {
+ struct dentry *victim;
+ struct select_data *next;
+ struct completion completion;
+ };
+ };
+ struct list_head dispose;
+};
+
+/*
+ * shrink_dcache_parent() needs to be notified when dentry in process of
+ * being evicted finally gets unlisted. Such dentries are
+ * already with negative ->d_count
+ * already negative
+ * already not in in-lookup hash
+ * reachable only via ->d_sib.
+ *
+ * Neither ->d_alias, nor ->d_rcu, nor ->d_in_lookup_hash are going to be
+ * accessed for those, so we can (ab)use ->d_alias.next for list of
+ * select_data of waiters. Initially it's going to be NULL and as long
+ * as dentry_unlist() returns it to that state we are fine.
+ */
+static inline void d_add_waiter(struct dentry *dentry, struct select_data *p)
+{
+ struct select_data *v = (void *)dentry->d_u.d_alias.next;
+ init_completion(&p->completion);
+ p->next = v;
+ dentry->d_u.d_alias.next = (void *)p;
+}
+
+static inline void d_complete_waiters(struct dentry *dentry)
+{
+ struct select_data *v = (void *)dentry->d_u.d_alias.next;
+ if (unlikely(v)) {
+ /* some shrink_dcache_tree() instances are waiting */
+ dentry->d_u.d_alias.next = NULL;
+ while (v) {
+ struct completion *r = &v->completion;
+ v = v->next;
+ complete(r);
+ }
+ }
+}
+
static inline void dentry_unlist(struct dentry *dentry)
{
struct dentry *next;
@@ -613,6 +661,7 @@ static inline void dentry_unlist(struct dentry *dentry)
* attached to the dentry tree
*/
dentry->d_flags |= DCACHE_DENTRY_KILLED;
+ d_complete_waiters(dentry);
if (unlikely(hlist_unhashed(&dentry->d_sib)))
return;
__hlist_del(&dentry->d_sib);
@@ -1499,15 +1548,6 @@ int d_set_mounted(struct dentry *dentry)
* constraints.
*/
-struct select_data {
- struct dentry *start;
- union {
- long found;
- struct dentry *victim;
- };
- struct list_head dispose;
-};
-
static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
{
struct select_data *data = _data;
@@ -1559,6 +1599,10 @@ static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry)
return D_WALK_QUIT;
}
to_shrink_list(dentry, &data->dispose);
+ } else if (dentry->d_lockref.count < 0) {
+ rcu_read_lock();
+ data->victim = dentry;
+ return D_WALK_QUIT;
}
/*
* We can return to the caller if we have found some (this
@@ -1598,12 +1642,26 @@ static void shrink_dcache_tree(struct dentry *parent, bool for_umount)
data.victim = NULL;
d_walk(parent, &data, select_collect2);
if (data.victim) {
- spin_lock(&data.victim->d_lock);
- if (!lock_for_kill(data.victim)) {
- spin_unlock(&data.victim->d_lock);
+ struct dentry *v = data.victim;
+
+ spin_lock(&v->d_lock);
+ if (v->d_lockref.count < 0 &&
+ !(v->d_flags & DCACHE_DENTRY_KILLED)) {
+ // It's busy dying; have it notify us once
+ // it becomes invisible to d_walk().
+ d_add_waiter(v, &data);
+ spin_unlock(&v->d_lock);
+ rcu_read_unlock();
+ if (!list_empty(&data.dispose))
+ shrink_dentry_list(&data.dispose);
+ wait_for_completion(&data.completion);
+ continue;
+ }
+ if (!lock_for_kill(v)) {
+ spin_unlock(&v->d_lock);
rcu_read_unlock();
} else {
- shrink_kill(data.victim);
+ shrink_kill(v);
}
}
if (!list_empty(&data.dispose))
--
2.47.3
Powered by blists - more mailing lists