lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <49976c4d-b171-4c08-b05f-94b9a897151b@sifive.com>
Date: Mon, 25 Mar 2024 22:39:27 -0500
From: Samuel Holland <samuel.holland@...ive.com>
To: Alexandre Ghiti <alexghiti@...osinc.com>
Cc: Palmer Dabbelt <palmer@...belt.com>, linux-riscv@...ts.infradead.org,
 Andrew Jones <ajones@...tanamicro.com>, Albert Ou <aou@...s.berkeley.edu>,
 Anup Patel <apatel@...tanamicro.com>, Marc Zyngier <maz@...nel.org>,
 Paul Walmsley <paul.walmsley@...ive.com>, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2] riscv: Add tracepoints for SBI calls and returns

Hi Alex,

On 2024-03-22 4:27 AM, Alexandre Ghiti wrote:
> On Fri, Mar 22, 2024 at 12:01 AM Samuel Holland
> <samuel.holland@...ive.com> wrote:
>>
>> These are useful for measuring the latency of SBI calls. The SBI HSM
>> extension is excluded because those functions are called from contexts
>> such as cpuidle where instrumentation is not allowed.
>>
>> Reviewed-by: Andrew Jones <ajones@...tanamicro.com>
>> Signed-off-by: Samuel Holland <samuel.holland@...ive.com>
>> ---
>>
>> Changes in v2:
>>  - Remove some blank lines
>>
>>  arch/riscv/include/asm/trace.h | 54 ++++++++++++++++++++++++++++++++++
>>  arch/riscv/kernel/sbi.c        |  7 +++++
>>  2 files changed, 61 insertions(+)
>>  create mode 100644 arch/riscv/include/asm/trace.h
>>
>> diff --git a/arch/riscv/include/asm/trace.h b/arch/riscv/include/asm/trace.h
>> new file mode 100644
>> index 000000000000..6151cee5450c
>> --- /dev/null
>> +++ b/arch/riscv/include/asm/trace.h
>> @@ -0,0 +1,54 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#undef TRACE_SYSTEM
>> +#define TRACE_SYSTEM riscv
>> +
>> +#if !defined(_TRACE_RISCV_H) || defined(TRACE_HEADER_MULTI_READ)
>> +#define _TRACE_RISCV_H
>> +
>> +#include <linux/tracepoint.h>
>> +
>> +TRACE_EVENT_CONDITION(sbi_call,
>> +       TP_PROTO(int ext, int fid),
>> +       TP_ARGS(ext, fid),
>> +       TP_CONDITION(ext != SBI_EXT_HSM),
>> +
>> +       TP_STRUCT__entry(
>> +               __field(int, ext)
>> +               __field(int, fid)
>> +       ),
>> +
>> +       TP_fast_assign(
>> +               __entry->ext = ext;
>> +               __entry->fid = fid;
>> +       ),
>> +
>> +       TP_printk("ext=0x%x fid=%d", __entry->ext, __entry->fid)
>> +);
>> +
>> +TRACE_EVENT_CONDITION(sbi_return,
>> +       TP_PROTO(int ext, long error, long value),
>> +       TP_ARGS(ext, error, value),
>> +       TP_CONDITION(ext != SBI_EXT_HSM),
>> +
>> +       TP_STRUCT__entry(
>> +               __field(long, error)
>> +               __field(long, value)
>> +       ),
>> +
>> +       TP_fast_assign(
>> +               __entry->error = error;
>> +               __entry->value = value;
>> +       ),
>> +
>> +       TP_printk("error=%ld value=0x%lx", __entry->error, __entry->value)
>> +);
>> +
>> +#endif /* _TRACE_RISCV_H */
>> +
>> +#undef TRACE_INCLUDE_PATH
>> +#undef TRACE_INCLUDE_FILE
>> +
>> +#define TRACE_INCLUDE_PATH asm
>> +#define TRACE_INCLUDE_FILE trace
>> +
>> +#include <trace/define_trace.h>
>> diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
>> index e66e0999a800..a1d21d8f5293 100644
>> --- a/arch/riscv/kernel/sbi.c
>> +++ b/arch/riscv/kernel/sbi.c
>> @@ -14,6 +14,9 @@
>>  #include <asm/smp.h>
>>  #include <asm/tlbflush.h>
>>
>> +#define CREATE_TRACE_POINTS
>> +#include <asm/trace.h>
>> +
>>  /* default SBI version is 0.1 */
>>  unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT;
>>  EXPORT_SYMBOL(sbi_spec_version);
>> @@ -31,6 +34,8 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
>>  {
>>         struct sbiret ret;
>>
>> +       trace_sbi_call(ext, fid);
>> +
>>         register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0);
>>         register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1);
>>         register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2);
>> @@ -46,6 +51,8 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
>>         ret.error = a0;
>>         ret.value = a1;
>>
>> +       trace_sbi_return(ext, ret.error, ret.value);
>> +
>>         return ret;
>>  }
>>  EXPORT_SYMBOL(sbi_ecall);
>> --
>> 2.43.1
>>
> 
> sbi_ecall() is used by flush_icache_all() so I was a bit scared we
> would need to patch 2 instructions using a tracepoint and then to use
> flush_icache_all() which could bug if only one of the patched
> instruction was seen. But I took a look at the disassembled code, and
> tracepoints happen to only need to patch 1 nop into an unconditional
> jump, which is not a problem then.

Right, tracepoints use the static branch infrastructure, which uses a single
instruction.

> But then when disassembling, I noticed that the addition of the
> tracepoint comes with "quite" some overhead:
> 
> Before:
> 
> Dump of assembler code for function sbi_ecall:
>    0xffffffff800085e0 <+0>: add sp,sp,-32
>    0xffffffff800085e2 <+2>: sd s0,24(sp)
>    0xffffffff800085e4 <+4>: mv t1,a0
>    0xffffffff800085e6 <+6>: add s0,sp,32
>    0xffffffff800085e8 <+8>: mv t3,a1
>    0xffffffff800085ea <+10>: mv a0,a2
>    0xffffffff800085ec <+12>: mv a1,a3
>    0xffffffff800085ee <+14>: mv a2,a4
>    0xffffffff800085f0 <+16>: mv a3,a5
>    0xffffffff800085f2 <+18>: mv a4,a6
>    0xffffffff800085f4 <+20>: mv a5,a7
>    0xffffffff800085f6 <+22>: mv a6,t3
>    0xffffffff800085f8 <+24>: mv a7,t1
>    0xffffffff800085fa <+26>: ecall
>    0xffffffff800085fe <+30>: ld s0,24(sp)
>    0xffffffff80008600 <+32>: add sp,sp,32
>    0xffffffff80008602 <+34>: ret
> 
> After:
> 
> Dump of assembler code for function sbi_ecall:
>    0xffffffff8000bbf2 <+0>: add sp,sp,-112
>    0xffffffff8000bbf4 <+2>: sd s0,96(sp)
>    0xffffffff8000bbf6 <+4>: sd s1,88(sp)
>    0xffffffff8000bbf8 <+6>: sd s3,72(sp)
>    0xffffffff8000bbfa <+8>: sd s4,64(sp)
>    0xffffffff8000bbfc <+10>: sd s5,56(sp)
>    0xffffffff8000bbfe <+12>: sd s6,48(sp)
>    0xffffffff8000bc00 <+14>: sd s7,40(sp)
>    0xffffffff8000bc02 <+16>: sd s8,32(sp)
>    0xffffffff8000bc04 <+18>: sd s9,24(sp)
>    0xffffffff8000bc06 <+20>: sd ra,104(sp)
>    0xffffffff8000bc08 <+22>: sd s2,80(sp)
>    0xffffffff8000bc0a <+24>: add s0,sp,112
>    0xffffffff8000bc0c <+26>: mv s1,a0
>    0xffffffff8000bc0e <+28>: mv s3,a1
>    0xffffffff8000bc10 <+30>: mv s9,a2
>    0xffffffff8000bc12 <+32>: mv s8,a3
>    0xffffffff8000bc14 <+34>: mv s7,a4
>    0xffffffff8000bc16 <+36>: mv s6,a5
>    0xffffffff8000bc18 <+38>: mv s5,a6
>    0xffffffff8000bc1a <+40>: mv s4,a7
>    0xffffffff8000bc1c <+42>: nop
>    0xffffffff8000bc20 <+46>: mv a0,s9
>    0xffffffff8000bc22 <+48>: mv a1,s8
>    0xffffffff8000bc24 <+50>: mv a2,s7
>    0xffffffff8000bc26 <+52>: mv a3,s6
>    0xffffffff8000bc28 <+54>: mv a4,s5
>    0xffffffff8000bc2a <+56>: mv a5,s4
>    0xffffffff8000bc2c <+58>: mv a6,s3
>    0xffffffff8000bc2e <+60>: mv a7,s1
>    0xffffffff8000bc30 <+62>: ecall
>    0xffffffff8000bc34 <+66>: mv s4,a0
>    0xffffffff8000bc36 <+68>: mv s3,a1
>    0xffffffff8000bc38 <+70>: nop
>    0xffffffff8000bc3c <+74>: ld ra,104(sp)
>    0xffffffff8000bc3e <+76>: ld s0,96(sp)
>    0xffffffff8000bc40 <+78>: ld s1,88(sp)
>    0xffffffff8000bc42 <+80>: ld s2,80(sp)
>    0xffffffff8000bc44 <+82>: ld s5,56(sp)
>    0xffffffff8000bc46 <+84>: ld s6,48(sp)
>    0xffffffff8000bc48 <+86>: ld s7,40(sp)
>    0xffffffff8000bc4a <+88>: ld s8,32(sp)
>    0xffffffff8000bc4c <+90>: ld s9,24(sp)
>    0xffffffff8000bc4e <+92>: mv a0,s4
>    0xffffffff8000bc50 <+94>: mv a1,s3
>    0xffffffff8000bc52 <+96>: ld s4,64(sp)
>    0xffffffff8000bc54 <+98>: ld s3,72(sp)
>    0xffffffff8000bc56 <+100>: add sp,sp,112
>    0xffffffff8000bc58 <+102>: ret
> 
> Not sure this is really problematic though, let me know what you think.

The overhead is only incurred when tracepoints are enabled. But when they are
enabled, I do not think it is avoidable, unless we were to hide this tracepoint
behind some Kconfig option. I don't think the overhead enough to be a problem,
but I suppose it's up to maintainer judgement.

Regards,
Samuel


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ