[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAH0uvogeG9rUw+1huquE=8fEq8AdJaSygS94G8a05=cHckjndg@mail.gmail.com>
Date: Tue, 4 Feb 2025 16:12:04 -0800
From: Howard Chu <howardchu95@...il.com>
To: Ian Rogers <irogers@...gle.com>
Cc: Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>, Namhyung Kim <namhyung@...nel.org>,
Mark Rutland <mark.rutland@....com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>, Jiri Olsa <jolsa@...nel.org>,
Adrian Hunter <adrian.hunter@...el.com>, Kan Liang <kan.liang@...ux.intel.com>,
John Garry <john.g.garry@...cle.com>, Will Deacon <will@...nel.org>,
James Clark <james.clark@...aro.org>, Mike Leach <mike.leach@...aro.org>,
Leo Yan <leo.yan@...ux.dev>, Guo Ren <guoren@...nel.org>,
Paul Walmsley <paul.walmsley@...ive.com>, Palmer Dabbelt <palmer@...belt.com>,
Albert Ou <aou@...s.berkeley.edu>, Charlie Jenkins <charlie@...osinc.com>,
Bibo Mao <maobibo@...ngson.cn>, Arnd Bergmann <arnd@...db.de>, Huacai Chen <chenhuacai@...nel.org>,
Catalin Marinas <catalin.marinas@....com>, Jiri Slaby <jirislaby@...nel.org>,
Björn Töpel <bjorn@...osinc.com>,
linux-kernel@...r.kernel.org, linux-perf-users@...r.kernel.org,
linux-arm-kernel@...ts.infradead.org, linux-csky@...r.kernel.org,
linux-riscv@...ts.infradead.org
Subject: Re: [PATCH v1 2/7] perf trace: Reorganize syscalls
Hello Ian,
Thanks for doing this.
On Fri, Jan 31, 2025 at 11:15 PM Ian Rogers <irogers@...gle.com> wrote:
>
> Identify struct syscall information in the syscalls table by a machine
> type and syscall number, not just system call number. Having the
> machine type means that 32-bit system calls can be differentiated from
> 64-bit ones on a machine capable of both. Having a table for all
> machine types and all system call numbers would be too large, so
> maintain a sorted array of system calls as they are encountered.
>
> Signed-off-by: Ian Rogers <irogers@...gle.com>
> ---
> tools/perf/builtin-trace.c | 178 +++++++++++++++++++++++++------------
> 1 file changed, 119 insertions(+), 59 deletions(-)
>
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index ac97632f13dc..10d3ad0d21f7 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -66,6 +66,7 @@
> #include "rb_resort.h"
> #include "../perf.h"
> #include "trace_augment.h"
> +#include "dwarf-regs.h"
>
> #include <errno.h>
> #include <inttypes.h>
> @@ -86,6 +87,7 @@
>
> #include <linux/ctype.h>
> #include <perf/mmap.h>
> +#include <tools/libc_compat.h>
>
> #ifdef HAVE_LIBTRACEEVENT
> #include <event-parse.h>
> @@ -143,7 +145,10 @@ struct trace {
> struct perf_tool tool;
> struct syscalltbl *sctbl;
> struct {
> + /** Sorted sycall numbers used by the trace. */
> struct syscall *table;
> + /** Size of table. */
> + size_t table_size;
> struct {
> struct evsel *sys_enter,
> *sys_exit,
> @@ -1445,22 +1450,37 @@ static const struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
> return __syscall_fmt__find_by_alias(syscall_fmts, nmemb, alias);
> }
>
> -/*
> - * is_exit: is this "exit" or "exit_group"?
> - * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter.
> - * args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc.
> - * nonexistent: Just a hole in the syscall table, syscall id not allocated
> +/**
> + * struct syscall
> */
> struct syscall {
> + /** @e_machine: The ELF machine associated with the entry. */
> + int e_machine;
> + /** @id: id value from the tracepoint, the system call number. */
> + int id;
> struct tep_event *tp_format;
> int nr_args;
> + /**
> + * @args_size: sum of the sizes of the syscall arguments, anything
> + * after that is augmented stuff: pathname for openat, etc.
> + */
> +
> int args_size;
> struct {
> struct bpf_program *sys_enter,
> *sys_exit;
> } bpf_prog;
> + /** @is_exit: is this "exit" or "exit_group"? */
> bool is_exit;
> + /**
> + * @is_open: is this "open" or "openat"? To associate the fd returned in
> + * sys_exit with the pathname in sys_enter.
> + */
> bool is_open;
> + /**
> + * @nonexistent: Name lookup failed. Just a hole in the syscall table,
> + * syscall id not allocated.
> + */
> bool nonexistent;
> bool use_btf;
> struct tep_format_field *args;
> @@ -2066,22 +2086,21 @@ static int syscall__set_arg_fmts(struct syscall *sc)
> return 0;
> }
>
> -static int trace__read_syscall_info(struct trace *trace, int id)
> +static int syscall__read_info(struct syscall *sc, struct trace *trace)
> {
> char tp_name[128];
> - struct syscall *sc;
> - const char *name = syscalltbl__name(trace->sctbl, id);
> + const char *name;
> int err;
>
> - if (trace->syscalls.table == NULL) {
> - trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc));
> - if (trace->syscalls.table == NULL)
> - return -ENOMEM;
> - }
> - sc = trace->syscalls.table + id;
> if (sc->nonexistent)
> return -EEXIST;
>
> + if (sc->name) {
> + /* Info already read. */
> + return 0;
> + }
> +
> + name = syscalltbl__name(trace->sctbl, sc->id);
> if (name == NULL) {
> sc->nonexistent = true;
> return -EEXIST;
> @@ -2104,11 +2123,12 @@ static int trace__read_syscall_info(struct trace *trace, int id)
> */
> if (IS_ERR(sc->tp_format)) {
> sc->nonexistent = true;
> - return PTR_ERR(sc->tp_format);
> + err = PTR_ERR(sc->tp_format);
> + sc->tp_format = NULL;
> + return err;
> }
>
> - if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ?
> - RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields))
> + if (syscall__alloc_arg_fmts(sc, sc->tp_format->format.nr_fields))
Makes sense because IS_ERR(sc->tp_format) is checked a few lines
above. However, nr_fields can be 7 with the __syscall_nr prefix. I
sent a patch to fix this, but it's not included here... Never mind,
it's off-topic. This patch looks good.
Thanks,
Howard
> return -ENOMEM;
>
> sc->args = sc->tp_format->format.fields;
> @@ -2397,13 +2417,67 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
> return printed;
> }
>
> +static void syscall__init(struct syscall *sc, int e_machine, int id)
> +{
> + memset(sc, 0, sizeof(*sc));
> + sc->e_machine = e_machine;
> + sc->id = id;
> +}
> +
> +static void syscall__exit(struct syscall *sc)
> +{
> + if (!sc)
> + return;
> +
> + zfree(&sc->arg_fmt);
> +}
> +
> +static int syscall__cmp(const void *va, const void *vb)
> +{
> + const struct syscall *a = va, *b = vb;
> +
> + if (a->e_machine != b->e_machine)
> + return a->e_machine - b->e_machine;
> +
> + return a->id - b->id;
> +}
> +
> +static struct syscall *trace__find_syscall(struct trace *trace, int e_machine, int id)
> +{
> + struct syscall key = {
> + .e_machine = e_machine,
> + .id = id,
> + };
> + struct syscall *sc, *tmp;
> +
> + sc = bsearch(&key, trace->syscalls.table, trace->syscalls.table_size,
> + sizeof(struct syscall), syscall__cmp);
> + if (sc)
> + return sc;
> +
> + tmp = reallocarray(trace->syscalls.table, trace->syscalls.table_size + 1,
> + sizeof(struct syscall));
> + if (!tmp)
> + return NULL;
> +
> + trace->syscalls.table = tmp;
> + sc = &trace->syscalls.table[trace->syscalls.table_size++];
> + syscall__init(sc, e_machine, id);
> + qsort(trace->syscalls.table, trace->syscalls.table_size, sizeof(struct syscall),
> + syscall__cmp);
> + sc = bsearch(&key, trace->syscalls.table, trace->syscalls.table_size,
> + sizeof(struct syscall), syscall__cmp);
> + return sc;
> +}
> +
> typedef int (*tracepoint_handler)(struct trace *trace, struct evsel *evsel,
> union perf_event *event,
> struct perf_sample *sample);
>
> -static struct syscall *trace__syscall_info(struct trace *trace,
> - struct evsel *evsel, int id)
> +static struct syscall *trace__syscall_info(struct trace *trace, struct evsel *evsel,
> + int e_machine, int id)
> {
> + struct syscall *sc;
> int err = 0;
>
> if (id < 0) {
> @@ -2428,28 +2502,20 @@ static struct syscall *trace__syscall_info(struct trace *trace,
>
> err = -EINVAL;
>
> - if (id > trace->sctbl->syscalls.max_id) {
> - goto out_cant_read;
> - }
> -
> - if ((trace->syscalls.table == NULL || trace->syscalls.table[id].name == NULL) &&
> - (err = trace__read_syscall_info(trace, id)) != 0)
> - goto out_cant_read;
> + sc = trace__find_syscall(trace, e_machine, id);
> + if (sc)
> + err = syscall__read_info(sc, trace);
>
> - if (trace->syscalls.table && trace->syscalls.table[id].nonexistent)
> - goto out_cant_read;
> -
> - return &trace->syscalls.table[id];
> -
> -out_cant_read:
> - if (verbose > 0) {
> + if (err && verbose > 0) {
> char sbuf[STRERR_BUFSIZE];
> - fprintf(trace->output, "Problems reading syscall %d: %d (%s)", id, -err, str_error_r(-err, sbuf, sizeof(sbuf)));
> - if (id <= trace->sctbl->syscalls.max_id && trace->syscalls.table[id].name != NULL)
> - fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
> +
> + fprintf(trace->output, "Problems reading syscall %d: %d (%s)", id, -err,
> + str_error_r(-err, sbuf, sizeof(sbuf)));
> + if (sc && sc->name)
> + fprintf(trace->output, "(%s)", sc->name);
> fputs(" information\n", trace->output);
> }
> - return NULL;
> + return err ? NULL : sc;
> }
>
> struct syscall_stats {
> @@ -2596,14 +2662,6 @@ static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sam
> return NULL;
> }
>
> -static void syscall__exit(struct syscall *sc)
> -{
> - if (!sc)
> - return;
> -
> - zfree(&sc->arg_fmt);
> -}
> -
> static int trace__sys_enter(struct trace *trace, struct evsel *evsel,
> union perf_event *event __maybe_unused,
> struct perf_sample *sample)
> @@ -2615,7 +2673,7 @@ static int trace__sys_enter(struct trace *trace, struct evsel *evsel,
> int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
> int augmented_args_size = 0;
> void *augmented_args = NULL;
> - struct syscall *sc = trace__syscall_info(trace, evsel, id);
> + struct syscall *sc = trace__syscall_info(trace, evsel, EM_HOST, id);
> struct thread_trace *ttrace;
>
> if (sc == NULL)
> @@ -2689,7 +2747,7 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel,
> struct thread_trace *ttrace;
> struct thread *thread;
> int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
> - struct syscall *sc = trace__syscall_info(trace, evsel, id);
> + struct syscall *sc = trace__syscall_info(trace, evsel, EM_HOST, id);
> char msg[1024];
> void *args, *augmented_args = NULL;
> int augmented_args_size;
> @@ -2764,7 +2822,7 @@ static int trace__sys_exit(struct trace *trace, struct evsel *evsel,
> struct thread *thread;
> int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0, printed = 0;
> int alignment = trace->args_alignment;
> - struct syscall *sc = trace__syscall_info(trace, evsel, id);
> + struct syscall *sc = trace__syscall_info(trace, evsel, EM_HOST, id);
> struct thread_trace *ttrace;
>
> if (sc == NULL)
> @@ -3117,7 +3175,7 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel,
>
> if (evsel == trace->syscalls.events.bpf_output) {
> int id = perf_evsel__sc_tp_uint(evsel, id, sample);
> - struct syscall *sc = trace__syscall_info(trace, evsel, id);
> + struct syscall *sc = trace__syscall_info(trace, evsel, EM_HOST, id);
>
> if (sc) {
> fprintf(trace->output, "%s(", sc->name);
> @@ -3622,7 +3680,7 @@ static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, str
>
> static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
> {
> - struct syscall *sc = trace__syscall_info(trace, NULL, id);
> + struct syscall *sc = trace__syscall_info(trace, NULL, EM_HOST, id);
>
> if (sc == NULL)
> return;
> @@ -3633,20 +3691,20 @@ static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
>
> static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id)
> {
> - struct syscall *sc = trace__syscall_info(trace, NULL, id);
> + struct syscall *sc = trace__syscall_info(trace, NULL, EM_HOST, id);
> return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->skel->progs.syscall_unaugmented);
> }
>
> static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id)
> {
> - struct syscall *sc = trace__syscall_info(trace, NULL, id);
> + struct syscall *sc = trace__syscall_info(trace, NULL, EM_HOST, id);
> return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->skel->progs.syscall_unaugmented);
> }
>
> static int trace__bpf_sys_enter_beauty_map(struct trace *trace, int key, unsigned int *beauty_array)
> {
> struct tep_format_field *field;
> - struct syscall *sc = trace__syscall_info(trace, NULL, key);
> + struct syscall *sc = trace__syscall_info(trace, NULL, EM_HOST, key);
> const struct btf_type *bt;
> char *struct_offset, *tmp, name[32];
> bool can_augment = false;
> @@ -3744,7 +3802,7 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace
> try_to_find_pair:
> for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) {
> int id = syscalltbl__id_at_idx(trace->sctbl, i);
> - struct syscall *pair = trace__syscall_info(trace, NULL, id);
> + struct syscall *pair = trace__syscall_info(trace, NULL, EM_HOST, id);
> struct bpf_program *pair_prog;
> bool is_candidate = false;
>
> @@ -3894,7 +3952,7 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
> */
> for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) {
> int key = syscalltbl__id_at_idx(trace->sctbl, i);
> - struct syscall *sc = trace__syscall_info(trace, NULL, key);
> + struct syscall *sc = trace__syscall_info(trace, NULL, EM_HOST, key);
> struct bpf_program *pair_prog;
> int prog_fd;
>
> @@ -4659,7 +4717,11 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
> pct = avg ? 100.0 * stddev_stats(&stats->stats) / avg : 0.0;
> avg /= NSEC_PER_MSEC;
>
> - sc = &trace->syscalls.table[syscall_stats_entry->syscall];
> + sc = trace__syscall_info(trace, /*evsel=*/NULL, EM_HOST,
> + syscall_stats_entry->syscall);
> + if (!sc)
> + continue;
> +
> printed += fprintf(fp, " %-15s", sc->name);
> printed += fprintf(fp, " %8" PRIu64 " %6" PRIu64 " %9.3f %9.3f %9.3f",
> n, stats->nr_failures, syscall_stats_entry->msecs, min, avg);
> @@ -5067,12 +5129,10 @@ static int trace__config(const char *var, const char *value, void *arg)
>
> static void trace__exit(struct trace *trace)
> {
> - int i;
> -
> strlist__delete(trace->ev_qualifier);
> zfree(&trace->ev_qualifier_ids.entries);
> if (trace->syscalls.table) {
> - for (i = 0; i <= trace->sctbl->syscalls.max_id; i++)
> + for (size_t i = 0; i < trace->syscalls.table_size; i++)
> syscall__exit(&trace->syscalls.table[i]);
> zfree(&trace->syscalls.table);
> }
> --
> 2.48.1.362.g079036d154-goog
>
Powered by blists - more mailing lists