[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAP-5=fXzbqoqV2YUbVV_BMVNrwtddi65=YrbqD6hMaywNxU53A@mail.gmail.com>
Date: Fri, 11 Jul 2025 17:34:50 -0700
From: Ian Rogers <irogers@...gle.com>
To: Namhyung Kim <namhyung@...nel.org>
Cc: Arnaldo Carvalho de Melo <acme@...nel.org>, Kan Liang <kan.liang@...ux.intel.com>,
Jiri Olsa <jolsa@...nel.org>, Adrian Hunter <adrian.hunter@...el.com>,
Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...nel.org>,
LKML <linux-kernel@...r.kernel.org>, linux-perf-users@...r.kernel.org,
Song Liu <song@...nel.org>, bpf@...r.kernel.org
Subject: Re: [PATCH] perf ftrace latency: Add -e option to measure time
between two events
On Fri, Jul 11, 2025 at 3:35 PM Namhyung Kim <namhyung@...nel.org> wrote:
>
> In addition to the function latency, it can measure events latencies.
> Some kernel tracepoints are paired and it's menningful to measure how
> long it takes between the two events. The latency is tracked for the
> same thread.
>
> Currently it only uses BPF to do the work but it can be lifted later.
> Instead of having separate a BPF program for each tracepoint, it only
> uses generic 'event_begin' and 'event_end' programs to attach to any
> (raw) tracepoints.
>
> $ sudo perf ftrace latency -a -b --hide-empty \
> -e i915_request_wait_begin,i915_request_wait_end -- sleep 1
> # DURATION | COUNT | GRAPH |
> 256 - 512 us | 4 | ###### |
> 2 - 4 ms | 2 | ### |
> 4 - 8 ms | 12 | ################### |
> 8 - 16 ms | 10 | ################ |
>
> # statistics (in usec)
> total time: 194915
> avg time: 6961
> max time: 12855
> min time: 373
> count: 28
Nice!
> Signed-off-by: Namhyung Kim <namhyung@...nel.org>
> ---
> tools/perf/Documentation/perf-ftrace.txt | 6 +
> tools/perf/builtin-ftrace.c | 50 ++++++-
> tools/perf/util/bpf_ftrace.c | 69 ++++++---
> tools/perf/util/bpf_skel/func_latency.bpf.c | 148 +++++++++++++-------
> tools/perf/util/ftrace.h | 1 +
> 5 files changed, 199 insertions(+), 75 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
> index b77f58c4d2fdcff9..914457853bcf53ac 100644
> --- a/tools/perf/Documentation/perf-ftrace.txt
> +++ b/tools/perf/Documentation/perf-ftrace.txt
> @@ -139,6 +139,12 @@ OPTIONS for 'perf ftrace latency'
> Set the function name to get the histogram. Unlike perf ftrace trace,
> it only allows single function to calculate the histogram.
>
> +-e::
> +--events=::
> + Set the pair of events to get the histogram. The histogram is calculated
> + by the time difference between the two events from the same thread. This
> + requires -b/--use-bpf option.
> +
> -b::
> --use-bpf::
> Use BPF to measure function latency instead of using the ftrace (it
> diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
> index 3a253a1b9f4526b9..e1f2f3fb1b0850a3 100644
> --- a/tools/perf/builtin-ftrace.c
> +++ b/tools/perf/builtin-ftrace.c
> @@ -1549,6 +1549,33 @@ static void delete_filter_func(struct list_head *head)
> }
> }
>
> +static int parse_filter_event(const struct option *opt, const char *str,
> + int unset __maybe_unused)
> +{
> + struct list_head *head = opt->value;
> + struct filter_entry *entry;
> + char *s, *p;
> + int ret = -ENOMEM;
> +
> + s = strdup(str);
> + if (s == NULL)
> + return -ENOMEM;
> +
> + while ((p = strsep(&s, ",")) != NULL) {
> + entry = malloc(sizeof(*entry) + strlen(p) + 1);
> + if (entry == NULL)
> + goto out;
> +
> + strcpy(entry->name, p);
> + list_add_tail(&entry->list, head);
> + }
> + ret = 0;
> +
> +out:
> + free(s);
> + return ret;
> +}
> +
> static int parse_buffer_size(const struct option *opt,
> const char *str, int unset)
> {
> @@ -1711,6 +1738,8 @@ int cmd_ftrace(int argc, const char **argv)
> const struct option latency_options[] = {
> OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
> "Show latency of given function", parse_filter_func),
> + OPT_CALLBACK('e', "events", &ftrace.event_pair, "event1,event2",
> + "Show latency between the two events", parse_filter_event),
> #ifdef HAVE_BPF_SKEL
> OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf,
> "Use BPF to measure function latency"),
> @@ -1763,6 +1792,7 @@ int cmd_ftrace(int argc, const char **argv)
> INIT_LIST_HEAD(&ftrace.notrace);
> INIT_LIST_HEAD(&ftrace.graph_funcs);
> INIT_LIST_HEAD(&ftrace.nograph_funcs);
> + INIT_LIST_HEAD(&ftrace.event_pair);
>
> signal(SIGINT, sig_handler);
> signal(SIGUSR1, sig_handler);
> @@ -1817,9 +1847,24 @@ int cmd_ftrace(int argc, const char **argv)
> cmd_func = __cmd_ftrace;
> break;
> case PERF_FTRACE_LATENCY:
> - if (list_empty(&ftrace.filters)) {
> - pr_err("Should provide a function to measure\n");
> + if (list_empty(&ftrace.filters) && list_empty(&ftrace.event_pair)) {
> + pr_err("Should provide a function or events to measure\n");
> parse_options_usage(ftrace_usage, options, "T", 1);
> + parse_options_usage(NULL, options, "e", 1);
> + ret = -EINVAL;
> + goto out_delete_filters;
> + }
> + if (!list_empty(&ftrace.filters) && !list_empty(&ftrace.event_pair)) {
> + pr_err("Please specify either of function or events\n");
> + parse_options_usage(ftrace_usage, options, "T", 1);
> + parse_options_usage(NULL, options, "e", 1);
> + ret = -EINVAL;
> + goto out_delete_filters;
> + }
> + if (!list_empty(&ftrace.event_pair) && !ftrace.target.use_bpf) {
> + pr_err("Event processing needs BPF\n");
> + parse_options_usage(ftrace_usage, options, "b", 1);
> + parse_options_usage(NULL, options, "e", 1);
> ret = -EINVAL;
> goto out_delete_filters;
> }
> @@ -1910,6 +1955,7 @@ int cmd_ftrace(int argc, const char **argv)
> delete_filter_func(&ftrace.notrace);
> delete_filter_func(&ftrace.graph_funcs);
> delete_filter_func(&ftrace.nograph_funcs);
> + delete_filter_func(&ftrace.event_pair);
>
> return ret;
> }
> diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
> index 7324668cc83e747e..34ac0adf841c27ee 100644
> --- a/tools/perf/util/bpf_ftrace.c
> +++ b/tools/perf/util/bpf_ftrace.c
> @@ -21,16 +21,21 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
> {
> int fd, err;
> int i, ncpus = 1, ntasks = 1;
> - struct filter_entry *func;
> + struct filter_entry *func = NULL;
>
> - if (!list_is_singular(&ftrace->filters)) {
> - pr_err("ERROR: %s target function(s).\n",
> - list_empty(&ftrace->filters) ? "No" : "Too many");
> - return -1;
> + if (!list_empty(&ftrace->filters)) {
> + if (!list_is_singular(&ftrace->filters)) {
> + pr_err("ERROR: Too many target functions.\n");
> + return -1;
> + }
> + func = list_first_entry(&ftrace->filters, struct filter_entry, list);
> + } else {
> + if (list_is_singular(&ftrace->event_pair)) {
Should there be a list length function and then check the length == 2
here? An empty list or list of length >2 will otherwise pass.
Thanks,
Ian
> + pr_err("ERROR: Needs two target events.\n");
> + return -1;
> + }
> }
>
> - func = list_first_entry(&ftrace->filters, struct filter_entry, list);
> -
> skel = func_latency_bpf__open();
> if (!skel) {
> pr_err("Failed to open func latency skeleton\n");
> @@ -93,20 +98,44 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
>
> skel->bss->min = INT64_MAX;
>
> - skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
> - false, func->name);
> - if (IS_ERR(skel->links.func_begin)) {
> - pr_err("Failed to attach fentry program\n");
> - err = PTR_ERR(skel->links.func_begin);
> - goto out;
> - }
> + if (func) {
> + skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
> + false, func->name);
> + if (IS_ERR(skel->links.func_begin)) {
> + pr_err("Failed to attach fentry program\n");
> + err = PTR_ERR(skel->links.func_begin);
> + goto out;
> + }
>
> - skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end,
> - true, func->name);
> - if (IS_ERR(skel->links.func_end)) {
> - pr_err("Failed to attach fexit program\n");
> - err = PTR_ERR(skel->links.func_end);
> - goto out;
> + skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end,
> + true, func->name);
> + if (IS_ERR(skel->links.func_end)) {
> + pr_err("Failed to attach fexit program\n");
> + err = PTR_ERR(skel->links.func_end);
> + goto out;
> + }
> + } else {
> + struct filter_entry *event;
> +
> + event = list_first_entry(&ftrace->event_pair, struct filter_entry, list);
> +
> + skel->links.event_begin = bpf_program__attach_raw_tracepoint(skel->progs.event_begin,
> + event->name);
> + if (IS_ERR(skel->links.event_begin)) {
> + pr_err("Failed to attach first tracepoint program\n");
> + err = PTR_ERR(skel->links.event_begin);
> + goto out;
> + }
> +
> + event = list_next_entry(event, list);
> +
> + skel->links.event_end = bpf_program__attach_raw_tracepoint(skel->progs.event_end,
> + event->name);
> + if (IS_ERR(skel->links.event_end)) {
> + pr_err("Failed to attach second tracepoint program\n");
> + err = PTR_ERR(skel->links.event_end);
> + goto out;
> + }
> }
>
> /* XXX: we don't actually use this fd - just for poll() */
> diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
> index e731a79a753a4d2d..621e2022c8bc9648 100644
> --- a/tools/perf/util/bpf_skel/func_latency.bpf.c
> +++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
> @@ -52,34 +52,89 @@ const volatile unsigned int min_latency;
> const volatile unsigned int max_latency;
> const volatile unsigned int bucket_num = NUM_BUCKET;
>
> -SEC("kprobe/func")
> -int BPF_PROG(func_begin)
> +static bool can_record(void)
> {
> - __u64 key, now;
> -
> - if (!enabled)
> - return 0;
> -
> - key = bpf_get_current_pid_tgid();
> -
> if (has_cpu) {
> __u32 cpu = bpf_get_smp_processor_id();
> __u8 *ok;
>
> ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
> if (!ok)
> - return 0;
> + return false;
> }
>
> if (has_task) {
> - __u32 pid = key & 0xffffffff;
> + __u32 pid = bpf_get_current_pid_tgid();
> __u8 *ok;
>
> ok = bpf_map_lookup_elem(&task_filter, &pid);
> if (!ok)
> - return 0;
> + return false;
> }
> + return true;
> +}
> +
> +static void update_latency(__s64 delta)
> +{
> + __u64 val = delta;
> + __u32 key = 0;
> + __u64 *hist;
> + __u64 cmp_base = use_nsec ? 1 : 1000;
> +
> + if (delta < 0)
> + return;
>
> + if (bucket_range != 0) {
> + val = delta / cmp_base;
> +
> + if (min_latency > 0) {
> + if (val > min_latency)
> + val -= min_latency;
> + else
> + goto do_lookup;
> + }
> +
> + // Less than 1 unit (ms or ns), or, in the future,
> + // than the min latency desired.
> + if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units )
> + key = val / bucket_range + 1;
> + if (key >= bucket_num)
> + key = bucket_num - 1;
> + }
> +
> + goto do_lookup;
> + }
> + // calculate index using delta
> + for (key = 0; key < (bucket_num - 1); key++) {
> + if (delta < (cmp_base << key))
> + break;
> + }
> +
> +do_lookup:
> + hist = bpf_map_lookup_elem(&latency, &key);
> + if (!hist)
> + return;
> +
> + __sync_fetch_and_add(hist, 1);
> +
> + __sync_fetch_and_add(&total, delta); // always in nsec
> + __sync_fetch_and_add(&count, 1);
> +
> + if (delta > max)
> + max = delta;
> + if (delta < min)
> + min = delta;
> +}
> +
> +SEC("kprobe/func")
> +int BPF_PROG(func_begin)
> +{
> + __u64 key, now;
> +
> + if (!enabled || !can_record())
> + return 0;
> +
> + key = bpf_get_current_pid_tgid();
> now = bpf_ktime_get_ns();
>
> // overwrite timestamp for nested functions
> @@ -92,7 +147,6 @@ int BPF_PROG(func_end)
> {
> __u64 tid;
> __u64 *start;
> - __u64 cmp_base = use_nsec ? 1 : 1000;
>
> if (!enabled)
> return 0;
> @@ -101,56 +155,44 @@ int BPF_PROG(func_end)
>
> start = bpf_map_lookup_elem(&functime, &tid);
> if (start) {
> - __s64 delta = bpf_ktime_get_ns() - *start;
> - __u64 val = delta;
> - __u32 key = 0;
> - __u64 *hist;
> -
> + update_latency(bpf_ktime_get_ns() - *start);
> bpf_map_delete_elem(&functime, &tid);
> + }
>
> - if (delta < 0)
> - return 0;
> + return 0;
> +}
>
> - if (bucket_range != 0) {
> - val = delta / cmp_base;
> +SEC("raw_tp")
> +int BPF_PROG(event_begin)
> +{
> + __u64 key, now;
>
> - if (min_latency > 0) {
> - if (val > min_latency)
> - val -= min_latency;
> - else
> - goto do_lookup;
> - }
> + if (!enabled || !can_record())
> + return 0;
>
> - // Less than 1 unit (ms or ns), or, in the future,
> - // than the min latency desired.
> - if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units )
> - key = val / bucket_range + 1;
> - if (key >= bucket_num)
> - key = bucket_num - 1;
> - }
> + key = bpf_get_current_pid_tgid();
> + now = bpf_ktime_get_ns();
>
> - goto do_lookup;
> - }
> - // calculate index using delta
> - for (key = 0; key < (bucket_num - 1); key++) {
> - if (delta < (cmp_base << key))
> - break;
> - }
> + // overwrite timestamp for nested events
> + bpf_map_update_elem(&functime, &key, &now, BPF_ANY);
> + return 0;
> +}
>
> -do_lookup:
> - hist = bpf_map_lookup_elem(&latency, &key);
> - if (!hist)
> - return 0;
> +SEC("raw_tp")
> +int BPF_PROG(event_end)
> +{
> + __u64 tid;
> + __u64 *start;
>
> - __sync_fetch_and_add(hist, 1);
> + if (!enabled)
> + return 0;
>
> - __sync_fetch_and_add(&total, delta); // always in nsec
> - __sync_fetch_and_add(&count, 1);
> + tid = bpf_get_current_pid_tgid();
>
> - if (delta > max)
> - max = delta;
> - if (delta < min)
> - min = delta;
> + start = bpf_map_lookup_elem(&functime, &tid);
> + if (start) {
> + update_latency(bpf_ktime_get_ns() - *start);
> + bpf_map_delete_elem(&functime, &tid);
> }
>
> return 0;
> diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
> index a9bc47da83a56cd6..3f5094ac59080310 100644
> --- a/tools/perf/util/ftrace.h
> +++ b/tools/perf/util/ftrace.h
> @@ -17,6 +17,7 @@ struct perf_ftrace {
> struct list_head notrace;
> struct list_head graph_funcs;
> struct list_head nograph_funcs;
> + struct list_head event_pair;
> struct hashmap *profile_hash;
> unsigned long percpu_buffer_size;
> bool inherit;
> --
> 2.50.0.727.gbf7dc18ff4-goog
>
Powered by blists - more mailing lists