[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251119184001.2942865-1-mjguzik@gmail.com>
Date: Wed, 19 Nov 2025 19:40:01 +0100
From: Mateusz Guzik <mjguzik@...il.com>
To: brauner@...nel.org
Cc: viro@...iv.linux.org.uk,
jack@...e.cz,
linux-kernel@...r.kernel.org,
linux-fsdevel@...r.kernel.org,
Mateusz Guzik <mjguzik@...il.com>
Subject: [PATCH] fs: inline step_into() and walk_component()
The primary consumer is link_path_walk(), calling walk_component() every
time which in turn calls step_into().
Inlining these saves overhead of 2 function calls per path component,
along with allowing the compiler to do better job optimizing them in place.
step_into() had absolutely atrocious assembly to facilitate the
slowpath. In order to lessen the burden at the callsite all the hard
work is moved into step_into_slowpath(). This also elides some of the
branches as for example LOOKUP_RCU is only checked once.
The inline-able step_into() variant deals with the common case of
traversing a cached non-mounted on directory while in RCU mode.
Since symlink handling is already denoted as unlikely(), I took the
opportunity to also shorten step_into_slowpath() by moving parts of it
into pick_link() which further shortens assembly.
Benchmarked as follows on Sapphire Rapids:
1. the "before" was a kernel with not-yet-merged optimizations (notably
elision of calls to security_inode_permissin() and marking ext4
inodes as not having acls)
2. "after" is the same + this patch
3. benchmark consists of issuing 205 calls to access(2) in a loop with
pathnames lifted out of gcc and the linker building real code, most
of which have several path components and 118 of which fail with
-ENOENT. Some of those do symlink traversal.
In terms of ops/s:
before: 21619
after: 22536 (+4%)
profile before:
20.25% [kernel] [k] __d_lookup_rcu
10.54% [kernel] [k] link_path_walk
10.22% [kernel] [k] entry_SYSCALL_64
6.50% libc.so.6 [.] __GI___access
6.35% [kernel] [k] strncpy_from_user
4.87% [kernel] [k] step_into
3.68% [kernel] [k] kmem_cache_alloc_noprof
2.88% [kernel] [k] walk_component
2.86% [kernel] [k] kmem_cache_free
2.14% [kernel] [k] set_root
2.08% [kernel] [k] lookup_fast
after:
23.38% [kernel] [k] __d_lookup_rcu
11.27% [kernel] [k] entry_SYSCALL_64
10.89% [kernel] [k] link_path_walk
7.00% libc.so.6 [.] __GI___access
6.88% [kernel] [k] strncpy_from_user
3.50% [kernel] [k] kmem_cache_alloc_noprof
2.01% [kernel] [k] kmem_cache_free
2.00% [kernel] [k] set_root
1.99% [kernel] [k] lookup_fast
1.81% [kernel] [k] do_syscall_64
1.69% [kernel] [k] entry_SYSCALL_64_safe_stack
While walk_component() and step_into() of course disappear from the
profile, the link_path_walk() barely gets more overhead despite the
inlining thanks to the fast path added and while completing more walks
per second.
I don't know why overhead grew a lot on __d_lookup_rcu().
Signed-off-by: Mateusz Guzik <mjguzik@...il.com>
---
perhaps this could be 2-3 patches instead to do things incrementally
the other patches not listed in the commit message are:
1. mntput_not_expire slowpath (Christian took it, not present in fs-next yet)
2. nd->depth predicts.
3. lookup_slow noinline
all of those sent
you may notice step_into is quite high on the profile:
4.87% [kernel] [k] step_into
here is the routine prior to the patch:
call ffffffff81374630 <__fentry__>
push %rbp
mov %rsp,%rbp
push %r15
push %r14
mov %esi,%r14d
push %r13
push %r12
push %rbx
mov %rdi,%rbx
sub $0x28,%rsp
mov (%rdi),%rax
mov 0x38(%rdi),%r8d
mov %gs:0x2e10acd(%rip),%r12 # ffffffff84551008 <__stack_chk_guard>
mov %r12,0x20(%rsp)
mov %rdx,%r12
mov %rdx,0x18(%rsp)
mov %rax,0x10(%rsp)
This is setup before it even gets to do anything which of course has
to be undone later. Also note the stackguard check. I'm not pasting
everything you can disasm yourself to check the entire monstrosity.
In contrast this is entirety of step_into() fast path with the patch + noinline:
call ffffffff81374630 <__fentry__>
testb $0x1,0x39(%rdi)
je ffffffff81740cbe <step_into+0x4e>
mov (%rdx),%eax
test $0x38000,%eax
jne ffffffff81740cbe <step_into+0x4e>
and $0x380000,%eax
mov 0x30(%rdx),%rcx
cmp $0x300000,%eax
je ffffffff81740cbe <step_into+0x4e>
mov (%rdi),%r8
mov 0x44(%rdi),%esi
mov 0x4(%rdx),%eax
cmp %eax,%esi
jne ffffffff81740cc3 <step_into+0x53>
test %rcx,%rcx
je ffffffff81740ccf <step_into+0x5f>
mov 0x44(%rdi),%eax
mov %r8,(%rdi)
mov %rdx,0x8(%rdi)
mov %eax,0x40(%rdi)
xor %eax,%eax
mov %rcx,0x30(%rdi)
jmp ffffffff8230b1f0 <__pi___x86_return_thunk>
This possibly can be shortened but I have not tried yet.
fs/namei.c | 67 ++++++++++++++++++++++++++++++++++++++++--------------
1 file changed, 50 insertions(+), 17 deletions(-)
diff --git a/fs/namei.c b/fs/namei.c
index 1d1f864ad6ad..e00b9ce21536 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1668,17 +1668,17 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
bool jumped;
int ret;
- path->mnt = nd->path.mnt;
- path->dentry = dentry;
if (nd->flags & LOOKUP_RCU) {
unsigned int seq = nd->next_seq;
+ if (likely(!(dentry->d_flags & DCACHE_MANAGED_DENTRY)))
+ return 0;
if (likely(__follow_mount_rcu(nd, path)))
return 0;
// *path and nd->next_seq might've been clobbered
path->mnt = nd->path.mnt;
path->dentry = dentry;
nd->next_seq = seq;
- if (!try_to_unlazy_next(nd, dentry))
+ if (unlikely(!try_to_unlazy_next(nd, dentry)))
return -ECHILD;
}
ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags);
@@ -1941,12 +1941,23 @@ static int reserve_stack(struct nameidata *nd, struct path *link)
enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4};
-static const char *pick_link(struct nameidata *nd, struct path *link,
+static noinline const char *pick_link(struct nameidata *nd, struct path *link,
struct inode *inode, int flags)
{
struct saved *last;
const char *res;
- int error = reserve_stack(nd, link);
+ int error;
+
+ if (nd->flags & LOOKUP_RCU) {
+ /* make sure that d_is_symlink above matches inode */
+ if (read_seqcount_retry(&link->dentry->d_seq, nd->next_seq))
+ return ERR_PTR(-ECHILD);
+ } else {
+ if (link->mnt == nd->path.mnt)
+ mntget(link->mnt);
+ }
+
+ error = reserve_stack(nd, link);
if (unlikely(error)) {
if (!(nd->flags & LOOKUP_RCU))
@@ -2021,14 +2032,17 @@ static const char *pick_link(struct nameidata *nd, struct path *link,
*
* NOTE: dentry must be what nd->next_seq had been sampled from.
*/
-static const char *step_into(struct nameidata *nd, int flags,
+static noinline const char *step_into_slowpath(struct nameidata *nd, int flags,
struct dentry *dentry)
{
struct path path;
struct inode *inode;
- int err = handle_mounts(nd, dentry, &path);
+ int err;
- if (err < 0)
+ path.mnt = nd->path.mnt;
+ path.dentry = dentry;
+ err = handle_mounts(nd, dentry, &path);
+ if (unlikely(err < 0))
return ERR_PTR(err);
inode = path.dentry->d_inode;
if (likely(!d_is_symlink(path.dentry)) ||
@@ -2050,17 +2064,36 @@ static const char *step_into(struct nameidata *nd, int flags,
nd->seq = nd->next_seq;
return NULL;
}
- if (nd->flags & LOOKUP_RCU) {
- /* make sure that d_is_symlink above matches inode */
- if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq))
- return ERR_PTR(-ECHILD);
- } else {
- if (path.mnt == nd->path.mnt)
- mntget(path.mnt);
- }
return pick_link(nd, &path, inode, flags);
}
+static __always_inline const char *step_into(struct nameidata *nd, int flags,
+ struct dentry *dentry)
+{
+ struct path path;
+ struct inode *inode;
+
+ path.mnt = nd->path.mnt;
+ path.dentry = dentry;
+ if (!(nd->flags & LOOKUP_RCU))
+ goto slowpath;
+ if (unlikely(dentry->d_flags & DCACHE_MANAGED_DENTRY))
+ goto slowpath;
+ inode = path.dentry->d_inode;
+ if (unlikely(d_is_symlink(path.dentry)))
+ goto slowpath;
+ if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq))
+ return ERR_PTR(-ECHILD);
+ if (unlikely(!inode))
+ return ERR_PTR(-ENOENT);
+ nd->path = path;
+ nd->inode = inode;
+ nd->seq = nd->next_seq;
+ return NULL;
+slowpath:
+ return step_into_slowpath(nd, flags, dentry);
+}
+
static struct dentry *follow_dotdot_rcu(struct nameidata *nd)
{
struct dentry *parent, *old;
@@ -2171,7 +2204,7 @@ static const char *handle_dots(struct nameidata *nd, int type)
return NULL;
}
-static const char *walk_component(struct nameidata *nd, int flags)
+static __always_inline const char *walk_component(struct nameidata *nd, int flags)
{
struct dentry *dentry;
/*
--
2.48.1
Powered by blists - more mailing lists