lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <132ceb8f-e82d-bed7-7b93-41f5dd505a03@huawei.com>
Date: Wed, 10 Dec 2025 10:56:56 +0800
From: Jinjie Ruan <ruanjinjie@...wei.com>
To: Kevin Brodsky <kevin.brodsky@....com>, <catalin.marinas@....com>,
	<will@...nel.org>, <oleg@...hat.com>, <tglx@...utronix.de>,
	<peterz@...radead.org>, <luto@...nel.org>, <shuah@...nel.org>,
	<kees@...nel.org>, <wad@...omium.org>, <deller@....de>, <macro@...am.me.uk>,
	<charlie@...osinc.com>, <ldv@...ace.io>, <mark.rutland@....com>,
	<song@...nel.org>, <ryan.roberts@....com>, <ada.coupriediaz@....com>,
	<anshuman.khandual@....com>, <broonie@...nel.org>, <pengcan@...inos.cn>,
	<dvyukov@...gle.com>, <linux-arm-kernel@...ts.infradead.org>,
	<linux-kernel@...r.kernel.org>, <linux-kselftest@...r.kernel.org>
Subject: Re: [PATCH v9 14/16] arm64: Inline el0_svc_common()



On 2025/12/9 21:48, Kevin Brodsky wrote:
> On 04/12/2025 09:21, Jinjie Ruan wrote:
>> After switch arm64 to Generic Entry, the compiler no longer inlines
> 
> Did it inline it before this series?

Yes, as below

<do_el0_svc>:
       d503201f        nop
       d503201f        nop
       d503233f        paciasp
       a9be7bfd        stp     x29, x30, [sp, #-32]!
       910003fd        mov     x29, sp
       a90153f3        stp     x19, x20, [sp, #16]
       aa0003f3        mov     x19, x0
       d5384114        mrs     x20, sp_el0
       f9400001        ldr     x1, [x0]
       f9400282        ldr     x2, [x20]
       f9008801        str     x1, [x0, #272]
       f9402001        ldr     x1, [x0, #64]
       b9011801        str     w1, [x0, #280]
       373001e2        tbnz    w2, #6, ffff80008002c0f0 <do_el0_svc+0x70>
       f278105f        tst     x2, #0x1f00
       54000261        b.ne    ffff80008002c108 <do_el0_svc+0x88>  // b.any
       52803ac2        mov     w2, #0x1d6                      // #470
       97ffffb1        bl      ffff80008002bf88 <invoke_syscall.constprop.0>
       f9400280        ldr     x0, [x20]
       92783400        and     x0, x0, #0x3fff00
       926bdc00        and     x0, x0, #0xffffffffffe01fff
       b4000060        cbz     x0, ffff80008002c0e0 <do_el0_svc+0x60>
       aa1303e0        mov     x0, x19
       97ffc835        bl      ffff80008001e1b0 <syscall_trace_exit>
       a94153f3        ldp     x19, x20, [sp, #16]
       a8c27bfd        ldp     x29, x30, [sp], #32
       d50323bf        autiasp
       d65f03c0        ret
       92804000        mov     x0, #0xfffffffffffffdff         // #-513
       f9000260        str     x0, [x19]
       a94153f3        ldp     x19, x20, [sp, #16]
       a8c27bfd        ldp     x29, x30, [sp], #32
       d50323bf        autiasp
       d65f03c0        ret
       3100043f        cmn     w1, #0x1
       54000140        b.eq    ffff80008002c134 <do_el0_svc+0xb4>  // b.none
       aa1303e0        mov     x0, x19
       97ffc7c1        bl      ffff80008001e018 <syscall_trace_enter>
       2a0003e1        mov     w1, w0
       3100041f        cmn     w0, #0x1
       54fffdc0        b.eq    ffff80008002c0d8 <do_el0_svc+0x58>  // b.none
       aa1303e0        mov     x0, x19
       52803ac2        mov     w2, #0x1d6                      // #470
       97ffff97        bl      ffff80008002bf88 <invoke_syscall.constprop.0>
       17ffffea        b       ffff80008002c0d8 <do_el0_svc+0x58>
       928004a0        mov     x0, #0xffffffffffffffda         // #-38
       f9000260        str     x0, [x19]
       17fffff5        b       ffff80008002c110 <do_el0_svc+0x90>
       d53cd044        mrs     x4, tpidr_el2
       d53cd040        mrs     x0, tpidr_el2
       d53cd041        mrs     x1, tpidr_el2
       00000000        udf     #0
       d503201f        nop
       d503201f        nop

> 
>> el0_svc_common() into do_el0_svc(). So inline el0_svc_common() and it
>> has 1% performance uplift on perf bench basic syscall on kunpeng920
>> as below.
>>
>> | Metric     | W/O this patch | With this patch | Change    |
>> | ---------- | -------------- | --------------- | --------- |
>> | Total time | 2.195 [sec]    | 2.171 [sec]     |  ↓1.1%   |
>> | usecs/op   | 0.219575       | 0.217192        |  ↓1.1%   |
>> | ops/sec    | 4,554,260      | 4,604,225       |  ↑1.1%    |
>>
>> Signed-off-by: Jinjie Ruan <ruanjinjie@...wei.com>
> 
> I think this is sensible - do_el0_svc() is clearly hot and the small
> increase in code size is completely justified. It also removes a
> performance regression when enabling CONFIG_COMPAT (without it
> el0_svc_common() has only one caller so it should be inlined regardless).
> 
> Reviewed-by: Kevin Brodsky <kevin.brodsky@....com>
> 
>> ---
>>  arch/arm64/kernel/syscall.c | 4 ++--
>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
>> index 47e193a1cfff..5aa51da9ec25 100644
>> --- a/arch/arm64/kernel/syscall.c
>> +++ b/arch/arm64/kernel/syscall.c
>> @@ -66,8 +66,8 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
>>  	choose_random_kstack_offset(get_random_u16());
>>  }
>>  
>> -static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
>> -			   const syscall_fn_t syscall_table[])
>> +static __always_inline void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
>> +					   const syscall_fn_t syscall_table[])
>>  {
>>  	unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
>>  	unsigned long flags = read_thread_flags();
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ