[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3184a838-9784-eb86-09ad-ceadabcd3381@arm.com>
Date: Mon, 8 Feb 2021 17:39:46 +0200
From: James Clark <james.clark@....com>
To: Alexandre Truong <alexandre.truong@....com>,
linux-kernel@...r.kernel.org, linux-perf-users@...r.kernel.org
Cc: John Garry <john.garry@...wei.com>, Will Deacon <will@...nel.org>,
Mathieu Poirier <mathieu.poirier@...aro.org>,
Leo Yan <leo.yan@...aro.org>,
Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...hat.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Mark Rutland <mark.rutland@....com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Jiri Olsa <jolsa@...hat.com>,
Namhyung Kim <namhyung@...nel.org>,
Kemeng Shi <shikemeng@...wei.com>,
Ian Rogers <irogers@...gle.com>,
Andi Kleen <ak@...ux.intel.com>,
Kan Liang <kan.liang@...ux.intel.com>,
Jin Yao <yao.jin@...ux.intel.com>,
Adrian Hunter <adrian.hunter@...el.com>,
Suzuki K Poulose <suzuki.poulose@....com>,
Al Grant <al.grant@....com>,
Wilco Dijkstra <wilco.dijkstra@....com>
Subject: Re: [PATCH 4/4] perf tools: determine if LR is the return address
On 22/01/2021 18:18, Alexandre Truong wrote:
> +}
> +
> +static int add_entry(struct unwind_entry *entry, void *arg)
> +{
> + struct entries *entries = arg;
> +
> + entries->stack[entries->i++] = entry->ip;
> + return 0;
> +}
> +
> +u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread)
> +{
> + u64 leaf_frame;
> + struct entries entries = {{0, 0}, 0};
> +
> + if (get_leaf_frame_caller_enabled(sample))
> + return 0;
> +
> + unwind__get_entries(add_entry, &entries, thread, sample, 2);
> + leaf_frame = callchain_param.order == ORDER_CALLER ?
> + entries.stack[0] : entries.stack[1];
> +
> + if (leaf_frame + 1 == sample->user_regs.regs[PERF_REG_ARM64_LR])
> + return sample->user_regs.regs[PERF_REG_ARM64_LR];
Hi Alex,
>From your other reply about your investigation it looks like the check against PERF_REG_ARM64_LR isn't
required because libunwind won't return a value if it's not correct. Whether it's equal to the LR or not.
And PERF_REG_ARM64_LR points to the instruction _after_ the call site. i.e. where to return to,
not where the call was made from. So just leaf_frame rather than leaf_frame+1 would be more accurate.
I was also looking at unwind_entry in machine.c which is similar to your add_entry function and saw that it
does some extra bits like this:
if (symbol_conf.hide_unresolved && entry->ms.sym == NULL)
return 0;
if (append_inlines(cursor, &entry->ms, entry->ip) == 0)
return 0;
/*
* Convert entry->ip from a virtual address to an offset in
* its corresponding binary.
*/
if (entry->ms.map)
addr = map__map_ip(entry->ms.map, entry->ip);
I have a feeling you will also need to do those on your values returned from libunwind to make it 100%
equivalent.
James
> + return 0;
> +}
> diff --git a/tools/perf/util/arm-frame-pointer-unwind-support.h b/tools/perf/util/arm-frame-pointer-unwind-support.h
> new file mode 100644
> index 000000000000..16dc03fa9abe
> --- /dev/null
> +++ b/tools/perf/util/arm-frame-pointer-unwind-support.h
> @@ -0,0 +1,7 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
> +#define __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
> +
> +u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread);
> +
> +#endif /* __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H */
> diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
> index 40082d70eec1..bc6147e46c89 100644
> --- a/tools/perf/util/machine.c
> +++ b/tools/perf/util/machine.c
> @@ -34,6 +34,7 @@
> #include "bpf-event.h"
> #include <internal/lib.h> // page_size
> #include "cgroup.h"
> +#include "arm-frame-pointer-unwind-support.h"
>
> #include <linux/ctype.h>
> #include <symbol/kallsyms.h>
> @@ -2671,10 +2672,12 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
> return err;
> }
>
> -static u64 get_leaf_frame_caller(struct perf_sample *sample __maybe_unused,
> - struct thread *thread __maybe_unused)
> +static u64 get_leaf_frame_caller(struct perf_sample *sample, struct thread *thread)
> {
> - return 0;
> + if (strncmp(thread->maps->machine->env->arch, "aarch64", 7) == 0)
> + return get_leaf_frame_caller_aarch64(sample, thread);
> + else
> + return 0;
> }
>
> static int thread__resolve_callchain_sample(struct thread *thread,
>
Powered by blists - more mailing lists