[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAP-5=fUdfCyxmEiTpzS2uumUp3-SyQOseX2xZo81-dQtWXj6vA@mail.gmail.com>
Date: Thu, 23 Jan 2025 14:31:14 -0800
From: Ian Rogers <irogers@...gle.com>
To: Arnaldo Carvalho de Melo <acme@...nel.org>
Cc: Namhyung Kim <namhyung@...nel.org>, Ingo Molnar <mingo@...nel.org>,
Thomas Gleixner <tglx@...utronix.de>, Jiri Olsa <jolsa@...nel.org>,
Adrian Hunter <adrian.hunter@...el.com>, Kan Liang <kan.liang@...ux.intel.com>,
Clark Williams <williams@...hat.com>, linux-kernel@...r.kernel.org,
linux-perf-users@...r.kernel.org, Arnaldo Carvalho de Melo <acme@...hat.com>,
Athira Rajeev <atrajeev@...ux.vnet.ibm.com>, "Steinar H. Gunderson" <sesse@...gle.com>
Subject: Re: [PATCH 3/3] perf disasm: Allow configuring what disassemblers to use
On Mon, Nov 11, 2024 at 7:18 AM Arnaldo Carvalho de Melo
<acme@...nel.org> wrote:
>
> From: Arnaldo Carvalho de Melo <acme@...hat.com>
>
> The perf tools annotation code used for a long time parsing the output
> of binutils's objdump (or its reimplementations, like llvm's) to then
> parse and augment it with samples, allow navigation, etc.
>
> More recently disassemblers from the capstone and llvm (libraries, not
> parsing the output of tools using those libraries to mimic binutils's
> objdump output) were introduced.
>
> So when all those methods are available, there is a static preference
> for a series of attempts of disassembling a binary, with the 'llvm,
> capstone, objdump' sequence being hard coded.
>
> This patch allows users to change that sequence, specifying via a 'perf
> config' 'annotate.disassemblers' entry which and in what order
> disassemblers should be attempted.
>
> As alluded to in the comments in the source code of this series, this
> flexibility is useful for users and developers alike, elliminating the
> requirement to rebuild the tool with some specific set of libraries to
> see how the output of disassembling would be for one of these methods.
>
> root@x1:~# rm -f ~/.perfconfig
> root@x1:~# perf annotate -v --stdio2 update_load_avg
> <SNIP>
> symbol__disassemble:
> filename=/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux,
> sym=update_load_avg, start=0xffffffffb6148fe0, en>
> annotating [0x6ff7170]
> /usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux :
> [0x7407ca0] update_load_avg
> Disassembled with llvm
> annotate.disassemblers=llvm,capstone,objdump
> Samples: 66 of event 'cpu_atom/cycles/P', 10000 Hz,
> Event count (approx.): 5185444, [percent: local period]
> update_load_avg()
> /usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux
> Percent 0xffffffff81148fe0 <update_load_avg>:
> 1.61 pushq %r15
> pushq %r14
> 1.00 pushq %r13
> movl %edx,%r13d
> 1.90 pushq %r12
> pushq %rbp
> movq %rsi,%rbp
> pushq %rbx
> movq %rdi,%rbx
> subq $0x18,%rsp
> 15.14 movl 0x1a4(%rdi),%eax
>
> root@x1:~# perf config annotate.disassemblers=capstone
> root@x1:~# cat ~/.perfconfig
> # this file is auto-generated.
> [annotate]
> disassemblers = capstone
> root@x1:~#
> root@x1:~# perf annotate -v --stdio2 update_load_avg
> <SNIP>
> Disassembled with capstone
> annotate.disassemblers=capstone
> Samples: 66 of event 'cpu_atom/cycles/P', 10000 Hz,
> Event count (approx.): 5185444, [percent: local period]
> update_load_avg()
> /usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux
> Percent 0xffffffff81148fe0 <update_load_avg>:
> 1.61 pushq %r15
> pushq %r14
> 1.00 pushq %r13
> movl %edx,%r13d
> 1.90 pushq %r12
> pushq %rbp
> movq %rsi,%rbp
> pushq %rbx
> movq %rdi,%rbx
> subq $0x18,%rsp
> 15.14 movl 0x1a4(%rdi),%eax
> root@x1:~# perf config annotate.disassemblers=objdump,capstone
> root@x1:~# perf config annotate.disassemblers
> annotate.disassemblers=objdump,capstone
> root@x1:~# cat ~/.perfconfig
> # this file is auto-generated.
> [annotate]
> disassemblers = objdump,capstone
> root@x1:~# perf annotate -v --stdio2 update_load_avg
> Executing: objdump --start-address=0xffffffff81148fe0 \
> --stop-address=0xffffffff811497aa \
> -d --no-show-raw-insn -S -C "$1"
> Disassembled with objdump
> annotate.disassemblers=objdump,capstone
> Samples: 66 of event 'cpu_atom/cycles/P', 10000 Hz,
> Event count (approx.): 5185444, [percent: local period]
> update_load_avg()
> /usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux
> Percent
>
> Disassembly of section .text:
>
> ffffffff81148fe0 <update_load_avg>:
> #define DO_ATTACH 0x4
>
> ffffffff81148fe0 <update_load_avg>:
> #define DO_ATTACH 0x4
> #define DO_DETACH 0x8
>
> /* Update task and its cfs_rq load average */
> static inline void update_load_avg(struct cfs_rq *cfs_rq,
> struct sched_entity *se,
> int flags)
> {
> 1.61 push %r15
> push %r14
> 1.00 push %r13
> mov %edx,%r13d
> 1.90 push %r12
> push %rbp
> mov %rsi,%rbp
> push %rbx
> mov %rdi,%rbx
> sub $0x18,%rsp
> }
>
> /* rq->task_clock normalized against any time
> this cfs_rq has spent throttled */
> static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
> {
> if (unlikely(cfs_rq->throttle_count))
> 15.14 mov 0x1a4(%rdi),%eax
> root@x1:~#
>
> After adding a way to select the disassembler from the command line a
> 'perf test' comparing the output of the various diassemblers should be
> introduced, to test these codebases.
>
> Cc: Adrian Hunter <adrian.hunter@...el.com>
> Cc: Athira Rajeev <atrajeev@...ux.vnet.ibm.com>
> Cc: Ian Rogers <irogers@...gle.com>
> Cc: Jiri Olsa <jolsa@...nel.org>
> Cc: Kan Liang <kan.liang@...ux.intel.com>
> Cc: Namhyung Kim <namhyung@...nel.org>
> Cc: Steinar H. Gunderson <sesse@...gle.com>
> Signed-off-by: Arnaldo Carvalho de Melo <acme@...hat.com>
> ---
> tools/perf/Documentation/perf-config.txt | 13 ++++
> tools/perf/util/annotate.c | 6 ++
> tools/perf/util/annotate.h | 6 ++
> tools/perf/util/disasm.c | 77 ++++++++++++++++++++++--
> 4 files changed, 96 insertions(+), 6 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
> index 379f9d7a8ab11a02..1f668d4724e3749a 100644
> --- a/tools/perf/Documentation/perf-config.txt
> +++ b/tools/perf/Documentation/perf-config.txt
> @@ -247,6 +247,19 @@ annotate.*::
> These are in control of addresses, jump function, source code
> in lines of assembly code from a specific program.
>
> + annotate.disassemblers::
> + Choose the disassembler to use: "objdump", "llvm", "capstone",
> + if not specified it will first try, if available, the "llvm" one,
> + then, if it fails, "capstone", and finally the original "objdump"
> + based one.
> +
> + Choosing a different one is useful when handling some feature that
> + is known to be best support at some point by one of the options,
> + to compare the output when in doubt about some bug, etc.
> +
> + This can be a list, in order of preference, the first one that works
> + finishes the process.
> +
> annotate.addr2line::
> addr2line binary to use for file names and line numbers.
>
> diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
> index b1d98da79be8b2b0..32e15c9f53f3c0a3 100644
> --- a/tools/perf/util/annotate.c
> +++ b/tools/perf/util/annotate.c
> @@ -2116,6 +2116,12 @@ static int annotation__config(const char *var, const char *value, void *data)
> opt->offset_level = ANNOTATION__MAX_OFFSET_LEVEL;
> else if (opt->offset_level < ANNOTATION__MIN_OFFSET_LEVEL)
> opt->offset_level = ANNOTATION__MIN_OFFSET_LEVEL;
> + } else if (!strcmp(var, "annotate.disassemblers")) {
> + opt->disassemblers_str = strdup(value);
> + if (!opt->disassemblers_str) {
> + pr_err("Not enough memory for annotate.disassemblers\n");
> + return -1;
> + }
> } else if (!strcmp(var, "annotate.hide_src_code")) {
> opt->hide_src_code = perf_config_bool("hide_src_code", value);
> } else if (!strcmp(var, "annotate.jump_arrows")) {
> diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
> index 8b9e05a1932f2f9e..194a05cbc506e4da 100644
> --- a/tools/perf/util/annotate.h
> +++ b/tools/perf/util/annotate.h
> @@ -34,6 +34,9 @@ struct annotated_data_type;
> #define ANNOTATION__BR_CNTR_WIDTH 30
> #define ANNOTATION_DUMMY_LEN 256
>
> +// llvm, capstone, objdump
> +#define MAX_DISASSEMBLERS 3
> +
> struct annotation_options {
> bool hide_src_code,
> use_offset,
> @@ -49,11 +52,14 @@ struct annotation_options {
> annotate_src,
> full_addr;
> u8 offset_level;
> + u8 nr_disassemblers;
> int min_pcnt;
> int max_lines;
> int context;
> char *objdump_path;
> char *disassembler_style;
> + const char *disassemblers_str;
> + const char *disassemblers[MAX_DISASSEMBLERS];
> const char *prefix;
> const char *prefix_strip;
> unsigned int percent_type;
> diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c
> index 83df1da20a7b16cd..df6c172c9c7f86d9 100644
> --- a/tools/perf/util/disasm.c
> +++ b/tools/perf/util/disasm.c
> @@ -2210,13 +2210,65 @@ static int symbol__disassemble_objdump(const char *filename, struct symbol *sym,
> return err;
> }
>
> +static int annotation_options__init_disassemblers(struct annotation_options *options)
> +{
> + char *disassembler;
> +
> + if (options->disassemblers_str == NULL) {
> + const char *default_disassemblers_str =
> +#ifdef HAVE_LIBLLVM_SUPPORT
> + "llvm,"
> +#endif
> +#ifdef HAVE_LIBCAPSTONE_SUPPORT
> + "capstone,"
> +#endif
> + "objdump";
> +
> + options->disassemblers_str = strdup(default_disassemblers_str);
> + if (!options->disassemblers_str)
> + goto out_enomem;
> + }
> +
> + disassembler = strdup(options->disassemblers_str);
> + if (disassembler == NULL)
> + goto out_enomem;
> +
> + while (1) {
> + char *comma = strchr(disassembler, ',');
> +
> + if (comma != NULL)
> + *comma = '\0';
> +
> + options->disassemblers[options->nr_disassemblers++] = strim(disassembler);
Here is an array assignment.
> +
> + if (comma == NULL)
> + break;
> +
> + disassembler = comma + 1;
> +
> + if (options->nr_disassemblers >= MAX_DISASSEMBLERS) {
> + pr_debug("annotate.disassemblers can have at most %d entries, ignoring \"%s\"\n",
> + MAX_DISASSEMBLERS, disassembler);
> + break;
> + }
The bound check is after the assignment.
> + }
> +
> + return 0;
> +
> +out_enomem:
> + pr_err("Not enough memory for annotate.disassemblers\n");
> + return -1;
> +}
> +
> int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
> {
> + struct annotation_options *options = args->options;
> struct map *map = args->ms.map;
> struct dso *dso = map__dso(map);
> char symfs_filename[PATH_MAX];
> bool delete_extract = false;
> struct kcore_extract kce;
> + const char *disassembler;
> bool decomp = false;
> int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
>
> @@ -2276,16 +2328,29 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
> }
> }
>
> - err = symbol__disassemble_llvm(symfs_filename, sym, args);
> - if (err == 0)
> + err = annotation_options__init_disassemblers(options);
symbol__disassemble may be called more than once so
annotation_options__init_disassemblers can be called more than once
yielding a buffer overflow above. I'm working on a fix which removes
some of the fiddliness of using strings.
Thanks,
Ian
> + if (err)
> goto out_remove_tmp;
>
> - err = symbol__disassemble_capstone(symfs_filename, sym, args);
> - if (err == 0)
> - goto out_remove_tmp;
> + err = -1;
>
> - err = symbol__disassemble_objdump(symfs_filename, sym, args);
> + for (int i = 0; i < options->nr_disassemblers && err != 0; ++i) {
> + disassembler = options->disassemblers[i];
>
> + if (!strcmp(disassembler, "llvm"))
> + err = symbol__disassemble_llvm(symfs_filename, sym, args);
> + else if (!strcmp(disassembler, "capstone"))
> + err = symbol__disassemble_capstone(symfs_filename, sym, args);
> + else if (!strcmp(disassembler, "objdump"))
> + err = symbol__disassemble_objdump(symfs_filename, sym, args);
> + else
> + pr_debug("Unknown disassembler %s, skipping...\n", disassembler);
> + }
> +
> + if (err == 0) {
> + pr_debug("Disassembled with %s\nannotate.disassemblers=%s\n",
> + disassembler, options->disassemblers_str);
> + }
> out_remove_tmp:
> if (decomp)
> unlink(symfs_filename);
> --
> 2.47.0
>
Powered by blists - more mailing lists