[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aJ8rsK2-XcPXNX7h@krava>
Date: Fri, 15 Aug 2025 14:44:32 +0200
From: Jiri Olsa <olsajiri@...il.com>
To: Jiawei Zhao <phoenix500526@....com>
Cc: ast@...nel.org, daniel@...earbox.net, andrii@...nel.org,
yonghong.song@...ux.dev, bpf@...r.kernel.org,
linux-kselftest@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH bpf-next v10 3/3] selftests/bpf: make usdt_o2 reliably
generate SIB USDT arg spec
On Thu, Aug 14, 2025 at 04:07:39PM +0000, Jiawei Zhao wrote:
> usdt_o2 is intended to exercise the SIB (Scale-Index-Base) argument
> handling in libbpf's USDT path. With GCC 13 this reliably produced a
> SIB-form argument (e.g. 8@(%rdx,%rax,8)), but with newer GCC (e.g. 15)
> the compiler frequently optimizes the probe argument into a plain
> register (e.g. 8@...x) or a stack slot, so the test stops covering the
> SIB code path and becomes flaky across toolchains.
>
> Force a SIB memory operand in the probe by:
> * placing the base pointer into %rdx and the index into %rax using an
> empty inline asm with output constraints ("=d", "=a") and matching
> inputs
> * immediately passing base[idx] to STAP_PROBE1.
> * only enable on x86 platform.
>
> This makes the compiler encode the operand as SIB (base + index8),
> which in .note.stapsdt shows up as 8@(%rdx,%rax,8) regardless of GCC
> version. A memory clobber and noinline prevent reordering/re-allocation
> around the probe site.
>
> This change is x86_64-specific and does not alter program semantics; it
> only stabilizes the USDT argument shape so the test consistently
> validates SIB handling. Clang historically prefers stack temporaries for
> such operands, but the selftests build with GCC, and this keeps behavior
> stable across GCC versions without introducing a separate .S file.
>
> Signed-off-by: Jiawei Zhao <phoenix500526@....com>
> ---
> .../selftests/bpf/prog_tests/usdt_o2.c | 20 ++++++++++++++-----
> 1 file changed, 15 insertions(+), 5 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/usdt_o2.c b/tools/testing/selftests/bpf/prog_tests/usdt_o2.c
> index f02dcf5188ab..e46d5743ad24 100644
> --- a/tools/testing/selftests/bpf/prog_tests/usdt_o2.c
> +++ b/tools/testing/selftests/bpf/prog_tests/usdt_o2.c
> @@ -15,11 +15,19 @@ __attribute__((optimize("O2")))
> int lets_test_this(int);
> static volatile __u64 array[1] = {test_value};
>
> -static __always_inline void trigger_func(void)
> +static noinline void trigger_func(void)
> {
> +#if defined(__x86_64__) || defined(__i386__)
> /* Base address + offset + (index * scale) */
> - for (volatile int i = 0; i <= 0; i++)
> - STAP_PROBE1(test, usdt1, array[i]);
> + /* Force SIB addressing with inline assembly */
> + const __u64 *base;
> + __u32 idx;
> + /* binding base to %rdx and idx to %rax */
> + asm volatile("" : "=d"(base), "=a"(idx) : "0"(array), "1"((__u32)0) : "memory");
> + STAP_PROBE1(test, usdt1, base[idx]);
hum, I still end up with
stapsdt 0x0000002a NT_STAPSDT (SystemTap probe descriptors)
Provider: test
Name: usdt1
Location: 0x00000000007674c9, Base: 0x00000000035bc698, Semaphore: 0x0000000000000000
Arguments: 8@...x
disasm being:
static noinline void trigger_func(void)
{
76749f: 55 push %rbp
7674a0: 48 89 e5 mov %rsp,%rbp
/* Base address + offset + (index * scale) */
/* Force SIB addressing with inline assembly */
const __u64 *base;
__u32 idx;
/* binding base to %rdx and idx to %rax */
asm volatile("" : "=d"(base), "=a"(idx) : "0"(array), "1"((__u32)0) : "memory");
7674a3: ba 20 49 9c 03 mov $0x39c4920,%edx
7674a8: b8 00 00 00 00 mov $0x0,%eax
7674ad: 48 89 55 f8 mov %rdx,-0x8(%rbp)
7674b1: 89 45 f4 mov %eax,-0xc(%rbp)
STAP_PROBE1(test, usdt1, base[idx]);
7674b4: 8b 45 f4 mov -0xc(%rbp),%eax
7674b7: 48 8d 14 c5 00 00 00 lea 0x0(,%rax,8),%rdx
7674be: 00
7674bf: 48 8b 45 f8 mov -0x8(%rbp),%rax
7674c3: 48 01 d0 add %rdx,%rax
7674c6: 48 8b 00 mov (%rax),%rax
7674c9: 90 nop
#else
STAP_PROBE1(test, usdt1, array[0]);
#endif
}
7674ca: 90 nop
7674cb: 5d pop %rbp
7674cc: c3 ret
I wonder we could also try to bring in Andrii's usdt.h [1] and overload usdt
arguments like outlined in the hack below (full code in [1])
we will probably need smarter and sustainable change, but you I guess you get
the idea
jirka
[1] https://github.com/anakryiko/usdt
[2] git://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git usdt_hack
---
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt_o2.c b/tools/testing/selftests/bpf/prog_tests/usdt_o2.c
index e46d5743ad24..7bb098c37de5 100644
--- a/tools/testing/selftests/bpf/prog_tests/usdt_o2.c
+++ b/tools/testing/selftests/bpf/prog_tests/usdt_o2.c
@@ -4,6 +4,8 @@
#include "../sdt.h"
#include "test_usdt_o2.skel.h"
+#define USDT_ARGS ".asciz \"(,%%rax,8)\"\n"
+#include "usdt.h"
#if defined(__GNUC__) && !defined(__clang__)
__attribute__((optimize("O2")))
@@ -28,6 +30,7 @@ static noinline void trigger_func(void)
#else
STAP_PROBE1(test, usdt1, array[0]);
#endif
+ USDT(krava, test1, 1, 2);
}
static void basic_sib_usdt(void)
diff --git a/tools/testing/selftests/bpf/usdt.h b/tools/testing/selftests/bpf/usdt.h
index 549d1f774810..960ebd6aa88b 100644
--- a/tools/testing/selftests/bpf/usdt.h
+++ b/tools/testing/selftests/bpf/usdt.h
@@ -403,6 +403,10 @@ struct usdt_sema { volatile unsigned short active; };
__asm__ __volatile__ ("" :: "m" (sema));
#endif
+#ifndef USDT_ARGS
+#define USDT_ARGS __usdt_asm_args(__VA_ARGS__)
+#endif
+
/* main USDT definition (nop and .note.stapsdt metadata) */
#define __usdt_probe(group, name, sema_def, sema, ...) do { \
sema_def(sema) \
@@ -418,7 +422,7 @@ struct usdt_sema { volatile unsigned short active; };
__usdt_asm1( __usdt_asm_addr sema) \
__usdt_asm_strz(group) \
__usdt_asm_strz(name) \
- __usdt_asm_args(__VA_ARGS__) \
+ USDT_ARGS \
__usdt_asm1( .ascii "\0") \
__usdt_asm1(994: .balign 4) \
__usdt_asm1( .popsection) \
Powered by blists - more mailing lists