lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZrKL2youCTmO3K0Q@tassilo>
Date: Tue, 6 Aug 2024 13:47:23 -0700
From: Andi Kleen <ak@...ux.intel.com>
To: Mateusz Guzik <mjguzik@...il.com>
Cc: Jeff Layton <jlayton@...nel.org>,
	Alexander Viro <viro@...iv.linux.org.uk>,
	Christian Brauner <brauner@...nel.org>, Jan Kara <jack@...e.cz>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Josef Bacik <josef@...icpanda.com>, linux-fsdevel@...r.kernel.org,
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2] fs: try an opportunistic lookup for O_CREAT opens too

> Before I get to the vfs layer, there is a significant loss in the
> memory allocator because of memcg -- it takes several irq off/on trips
> for every alloc (needed to grab struct file *). I have a plan what to
> do with it (handle stuff with local cmpxchg (note no lock prefix)),
> which I'm trying to get around to. Apart from that you may note the
> allocator fast path performs a 16-byte cmpxchg, which is again dog
> slow and executes twice (once for the file obj, another time for the
> namei buffer). Someone(tm) should patch it up and I have some vague
> ideas, but 0 idea when I can take a serious stab.

I just LBR sampled it on my skylake and it doesn't look
particularly slow. You see the whole massive block including CMPXCHG16 
gets IPC 2.7, which is rather good. If you see lots of cycles on it it's likely
a missing cache line.

    kmem_cache_free:
        ffffffff9944ce20                        nop %edi, %edx
        ffffffff9944ce24                        nopl  %eax, (%rax,%rax,1)
        ffffffff9944ce29                        pushq  %rbp
        ffffffff9944ce2a                        mov %rdi, %rdx
        ffffffff9944ce2d                        mov %rsp, %rbp
        ffffffff9944ce30                        pushq  %r15
        ffffffff9944ce32                        pushq  %r14
        ffffffff9944ce34                        pushq  %r13
        ffffffff9944ce36                        pushq  %r12
        ffffffff9944ce38                        mov $0x80000000, %r12d
        ffffffff9944ce3e                        pushq  %rbx
        ffffffff9944ce3f                        mov %rsi, %rbx
        ffffffff9944ce42                        and $0xfffffffffffffff0, %rsp
        ffffffff9944ce46                        sub $0x10, %rsp
        ffffffff9944ce4a                        movq  %gs:0x28, %rax
        ffffffff9944ce53                        movq  %rax, 0x8(%rsp)
        ffffffff9944ce58                        xor %eax, %eax
        ffffffff9944ce5a                        add %rsi, %r12
        ffffffff9944ce5d                        jb 0xffffffff9944d1ea
        ffffffff9944ce63                        mov $0xffffffff80000000, %rax
        ffffffff9944ce6a                        xor %r13d, %r13d
        ffffffff9944ce6d                        subq  0x17b068c(%rip), %rax
        ffffffff9944ce74                        add %r12, %rax
        ffffffff9944ce77                        shr $0xc, %rax
        ffffffff9944ce7b                        shl $0x6, %rax
        ffffffff9944ce7f                        addq  0x17b066a(%rip), %rax
        ffffffff9944ce86                        movq  0x8(%rax), %rcx
        ffffffff9944ce8a                        test $0x1, %cl
        ffffffff9944ce8d                        jnz 0xffffffff9944d15c
        ffffffff9944ce93                        nopl  %eax, (%rax,%rax,1)
        ffffffff9944ce98                        movq  (%rax), %rcx
        ffffffff9944ce9b                        and $0x8, %ch
        ffffffff9944ce9e                        jz 0xffffffff9944cfea
        ffffffff9944cea4                        test %rax, %rax
        ffffffff9944cea7                        jz 0xffffffff9944cfea
        ffffffff9944cead                        movq  0x8(%rax), %r14
        ffffffff9944ceb1                        test %r14, %r14
        ffffffff9944ceb4                        jz 0xffffffff9944cfac
        ffffffff9944ceba                        cmp %r14, %rdx
        ffffffff9944cebd                        jnz 0xffffffff9944d165
        ffffffff9944cec3                        test %r14, %r14
        ffffffff9944cec6                        jz 0xffffffff9944cfac
        ffffffff9944cecc                        movq  0x8(%rbp), %r15
        ffffffff9944ced0                        nopl  %eax, (%rax,%rax,1)
        ffffffff9944ced5                        movq  0x1fe5134(%rip), %rax
        ffffffff9944cedc                        test %r13, %r13
        ffffffff9944cedf                        jnz 0xffffffff9944ceef
        ffffffff9944cee1                        mov $0xffffffff80000000, %rax
        ffffffff9944cee8                        subq  0x17b0611(%rip), %rax
        ffffffff9944ceef                        add %rax, %r12
        ffffffff9944cef2                        shr $0xc, %r12
        ffffffff9944cef6                        shl $0x6, %r12
        ffffffff9944cefa                        addq  0x17b05ef(%rip), %r12
        ffffffff9944cf01                        movq  0x8(%r12), %rax
        ffffffff9944cf06                        mov %r12, %r13
        ffffffff9944cf09                        test $0x1, %al
        ffffffff9944cf0b                        jnz 0xffffffff9944d1b1
        ffffffff9944cf11                        nopl  %eax, (%rax,%rax,1)
        ffffffff9944cf16                        movq  (%r13), %rax
        ffffffff9944cf1a                        movq  %rbx, (%rsp)
        ffffffff9944cf1e                        test $0x8, %ah
        ffffffff9944cf21                        mov $0x0, %eax
        ffffffff9944cf26                        cmovz %rax, %r13
        ffffffff9944cf2a                        data16 nop
        ffffffff9944cf2c                        movq  0x38(%r13), %r8
        ffffffff9944cf30                        cmp $0x3, %r8
        ffffffff9944cf34                        jnbe 0xffffffff9944d1ca
        ffffffff9944cf3a                        nopl  %eax, (%rax,%rax,1)
        ffffffff9944cf3f                        movq  0x23d6f72(%rip), %rax
        ffffffff9944cf46                        mov %rbx, %rdx
        ffffffff9944cf49                        sub %rax, %rdx
        ffffffff9944cf4c                        cmp $0x1fffff, %rdx
        ffffffff9944cf53                        jbe 0xffffffff9944d03a
        ffffffff9944cf59                        movq  (%r14), %rax
        ffffffff9944cf5c                        addq  %gs:0x66bccab4(%rip), %rax
        ffffffff9944cf64                        movq  0x8(%rax), %rdx
        ffffffff9944cf68                        cmpq  %r13, 0x10(%rax)
        ffffffff9944cf6c                        jnz 0xffffffff9944d192
        ffffffff9944cf72                        movl  0x28(%r14), %ecx
        ffffffff9944cf76                        movq  (%rax), %rax
        ffffffff9944cf79                        add %rbx, %rcx
        ffffffff9944cf7c                        cmp %rbx, %rax
        ffffffff9944cf7f                        jz 0xffffffff9944d1ba
        ffffffff9944cf85                        movq  0xb8(%r14), %rsi
        ffffffff9944cf8c                        mov %rcx, %rdi
        ffffffff9944cf8f                        bswap %rdi
        ffffffff9944cf92                        xor %rax, %rsi
        ffffffff9944cf95                        xor %rdi, %rsi
        ffffffff9944cf98                        movq  %rsi, (%rcx)
        ffffffff9944cf9b                        leaq  0x2000(%rdx), %rcx
        ffffffff9944cfa2                        movq  (%r14), %rsi
        ffffffff9944cfa5                        cmpxchg16bx  %gs:(%rsi)
        ffffffff9944cfaa                        jnz 0xffffffff9944cf59
        ffffffff9944cfac                        movq  0x8(%rsp), %rax
        ffffffff9944cfb1                        subq  %gs:0x28, %rax
        ffffffff9944cfba                        jnz 0xffffffff9944d1fc
        ffffffff9944cfc0                        leaq  -0x28(%rbp), %rsp
        ffffffff9944cfc4                        popq  %rbx
        ffffffff9944cfc5                        popq  %r12
        ffffffff9944cfc7                        popq  %r13
        ffffffff9944cfc9                        popq  %r14
        ffffffff9944cfcb                        popq  %r15
        ffffffff9944cfcd                        popq  %rbp
        ffffffff9944cfce                        retq                            # PRED 38 cycles [126] 2.74 IPC    <-------------

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ