linux-kernel - [PATCH v2] perf ftrace latency: Add -e option to measure time between two events

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250714052143.342851-1-namhyung@kernel.org>
Date: Sun, 13 Jul 2025 22:21:43 -0700
From: Namhyung Kim <namhyung@...nel.org>
To: Arnaldo Carvalho de Melo <acme@...nel.org>,
	Ian Rogers <irogers@...gle.com>,
	Kan Liang <kan.liang@...ux.intel.com>
Cc: Jiri Olsa <jolsa@...nel.org>,
	Adrian Hunter <adrian.hunter@...el.com>,
	Peter Zijlstra <peterz@...radead.org>,
	Ingo Molnar <mingo@...nel.org>,
	LKML <linux-kernel@...r.kernel.org>,
	linux-perf-users@...r.kernel.org,
	Song Liu <song@...nel.org>,
	bpf@...r.kernel.org
Subject: [PATCH v2] perf ftrace latency: Add -e option to measure time between two events

In addition to the function latency, it can measure events latencies.
Some kernel tracepoints are paired and it's menningful to measure how
long it takes between the two events.  The latency is tracked for the
same thread.

Currently it only uses BPF to do the work but it can be lifted later.
Instead of having separate a BPF program for each tracepoint, it only
uses generic 'event_begin' and 'event_end' programs to attach to any
(raw) tracepoints.

  $ sudo perf ftrace latency -a -b --hide-empty \
    -e i915_request_wait_begin,i915_request_wait_end -- sleep 1
  #   DURATION     |      COUNT | GRAPH                                |
     256 -  512 us |          4 | ######                               |
       2 -    4 ms |          2 | ###                                  |
       4 -    8 ms |         12 | ###################                  |
       8 -   16 ms |         10 | ################                     |

  # statistics  (in usec)
    total time:               194915
      avg time:                 6961
      max time:                12855
      min time:                  373
         count:                   28

Signed-off-by: Namhyung Kim <namhyung@...nel.org>
---
v2) check the nunber of events properly  (Ian)

 tools/perf/Documentation/perf-ftrace.txt    |   6 +
 tools/perf/builtin-ftrace.c                 |  50 ++++++-
 tools/perf/util/bpf_ftrace.c                |  75 +++++++---
 tools/perf/util/bpf_skel/func_latency.bpf.c | 148 +++++++++++++-------
 tools/perf/util/ftrace.h                    |   1 +
 5 files changed, 205 insertions(+), 75 deletions(-)

diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index b77f58c4d2fdcff9..914457853bcf53ac 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -139,6 +139,12 @@ OPTIONS for 'perf ftrace latency'
 	Set the function name to get the histogram.  Unlike perf ftrace trace,
 	it only allows single function to calculate the histogram.
 
+-e::
+--events=::
+	Set the pair of events to get the histogram.  The histogram is calculated
+	by the time difference between the two events from the same thread.  This
+	requires -b/--use-bpf option.
+
 -b::
 --use-bpf::
 	Use BPF to measure function latency instead of using the ftrace (it
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 3a253a1b9f4526b9..e1f2f3fb1b0850a3 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -1549,6 +1549,33 @@ static void delete_filter_func(struct list_head *head)
 	}
 }
 
+static int parse_filter_event(const struct option *opt, const char *str,
+			     int unset __maybe_unused)
+{
+	struct list_head *head = opt->value;
+	struct filter_entry *entry;
+	char *s, *p;
+	int ret = -ENOMEM;
+
+	s = strdup(str);
+	if (s == NULL)
+		return -ENOMEM;
+
+	while ((p = strsep(&s, ",")) != NULL) {
+		entry = malloc(sizeof(*entry) + strlen(p) + 1);
+		if (entry == NULL)
+			goto out;
+
+		strcpy(entry->name, p);
+		list_add_tail(&entry->list, head);
+	}
+	ret = 0;
+
+out:
+	free(s);
+	return ret;
+}
+
 static int parse_buffer_size(const struct option *opt,
 			     const char *str, int unset)
 {
@@ -1711,6 +1738,8 @@ int cmd_ftrace(int argc, const char **argv)
 	const struct option latency_options[] = {
 	OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
 		     "Show latency of given function", parse_filter_func),
+	OPT_CALLBACK('e', "events", &ftrace.event_pair, "event1,event2",
+		     "Show latency between the two events", parse_filter_event),
 #ifdef HAVE_BPF_SKEL
 	OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf,
 		    "Use BPF to measure function latency"),
@@ -1763,6 +1792,7 @@ int cmd_ftrace(int argc, const char **argv)
 	INIT_LIST_HEAD(&ftrace.notrace);
 	INIT_LIST_HEAD(&ftrace.graph_funcs);
 	INIT_LIST_HEAD(&ftrace.nograph_funcs);
+	INIT_LIST_HEAD(&ftrace.event_pair);
 
 	signal(SIGINT, sig_handler);
 	signal(SIGUSR1, sig_handler);
@@ -1817,9 +1847,24 @@ int cmd_ftrace(int argc, const char **argv)
 		cmd_func = __cmd_ftrace;
 		break;
 	case PERF_FTRACE_LATENCY:
-		if (list_empty(&ftrace.filters)) {
-			pr_err("Should provide a function to measure\n");
+		if (list_empty(&ftrace.filters) && list_empty(&ftrace.event_pair)) {
+			pr_err("Should provide a function or events to measure\n");
 			parse_options_usage(ftrace_usage, options, "T", 1);
+			parse_options_usage(NULL, options, "e", 1);
+			ret = -EINVAL;
+			goto out_delete_filters;
+		}
+		if (!list_empty(&ftrace.filters) && !list_empty(&ftrace.event_pair)) {
+			pr_err("Please specify either of function or events\n");
+			parse_options_usage(ftrace_usage, options, "T", 1);
+			parse_options_usage(NULL, options, "e", 1);
+			ret = -EINVAL;
+			goto out_delete_filters;
+		}
+		if (!list_empty(&ftrace.event_pair) && !ftrace.target.use_bpf) {
+			pr_err("Event processing needs BPF\n");
+			parse_options_usage(ftrace_usage, options, "b", 1);
+			parse_options_usage(NULL, options, "e", 1);
 			ret = -EINVAL;
 			goto out_delete_filters;
 		}
@@ -1910,6 +1955,7 @@ int cmd_ftrace(int argc, const char **argv)
 	delete_filter_func(&ftrace.notrace);
 	delete_filter_func(&ftrace.graph_funcs);
 	delete_filter_func(&ftrace.nograph_funcs);
+	delete_filter_func(&ftrace.event_pair);
 
 	return ret;
 }
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
index 7324668cc83e747e..0cb02412043c9196 100644
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -21,15 +21,26 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
 {
 	int fd, err;
 	int i, ncpus = 1, ntasks = 1;
-	struct filter_entry *func;
+	struct filter_entry *func = NULL;
 
-	if (!list_is_singular(&ftrace->filters)) {
-		pr_err("ERROR: %s target function(s).\n",
-		       list_empty(&ftrace->filters) ? "No" : "Too many");
-		return -1;
-	}
+	if (!list_empty(&ftrace->filters)) {
+		if (!list_is_singular(&ftrace->filters)) {
+			pr_err("ERROR: Too many target functions.\n");
+			return -1;
+		}
+		func = list_first_entry(&ftrace->filters, struct filter_entry, list);
+	} else {
+		int count = 0;
+		struct list_head *pos;
 
-	func = list_first_entry(&ftrace->filters, struct filter_entry, list);
+		list_for_each(pos, &ftrace->event_pair)
+			count++;
+
+		if (count != 2) {
+			pr_err("ERROR: Needs two target events.\n");
+			return -1;
+		}
+	}
 
 	skel = func_latency_bpf__open();
 	if (!skel) {
@@ -93,20 +104,44 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
 
 	skel->bss->min = INT64_MAX;
 
-	skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
-							    false, func->name);
-	if (IS_ERR(skel->links.func_begin)) {
-		pr_err("Failed to attach fentry program\n");
-		err = PTR_ERR(skel->links.func_begin);
-		goto out;
-	}
+	if (func) {
+		skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
+								    false, func->name);
+		if (IS_ERR(skel->links.func_begin)) {
+			pr_err("Failed to attach fentry program\n");
+			err = PTR_ERR(skel->links.func_begin);
+			goto out;
+		}
 
-	skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end,
-							  true, func->name);
-	if (IS_ERR(skel->links.func_end)) {
-		pr_err("Failed to attach fexit program\n");
-		err = PTR_ERR(skel->links.func_end);
-		goto out;
+		skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end,
+								  true, func->name);
+		if (IS_ERR(skel->links.func_end)) {
+			pr_err("Failed to attach fexit program\n");
+			err = PTR_ERR(skel->links.func_end);
+			goto out;
+		}
+	} else {
+		struct filter_entry *event;
+
+		event = list_first_entry(&ftrace->event_pair, struct filter_entry, list);
+
+		skel->links.event_begin = bpf_program__attach_raw_tracepoint(skel->progs.event_begin,
+									     event->name);
+		if (IS_ERR(skel->links.event_begin)) {
+			pr_err("Failed to attach first tracepoint program\n");
+			err = PTR_ERR(skel->links.event_begin);
+			goto out;
+		}
+
+		event = list_next_entry(event, list);
+
+		skel->links.event_end = bpf_program__attach_raw_tracepoint(skel->progs.event_end,
+									     event->name);
+		if (IS_ERR(skel->links.event_end)) {
+			pr_err("Failed to attach second tracepoint program\n");
+			err = PTR_ERR(skel->links.event_end);
+			goto out;
+		}
 	}
 
 	/* XXX: we don't actually use this fd - just for poll() */
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
index e731a79a753a4d2d..621e2022c8bc9648 100644
--- a/tools/perf/util/bpf_skel/func_latency.bpf.c
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -52,34 +52,89 @@ const volatile unsigned int min_latency;
 const volatile unsigned int max_latency;
 const volatile unsigned int bucket_num = NUM_BUCKET;
 
-SEC("kprobe/func")
-int BPF_PROG(func_begin)
+static bool can_record(void)
 {
-	__u64 key, now;
-
-	if (!enabled)
-		return 0;
-
-	key = bpf_get_current_pid_tgid();
-
 	if (has_cpu) {
 		__u32 cpu = bpf_get_smp_processor_id();
 		__u8 *ok;
 
 		ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
 		if (!ok)
-			return 0;
+			return false;
 	}
 
 	if (has_task) {
-		__u32 pid = key & 0xffffffff;
+		__u32 pid = bpf_get_current_pid_tgid();
 		__u8 *ok;
 
 		ok = bpf_map_lookup_elem(&task_filter, &pid);
 		if (!ok)
-			return 0;
+			return false;
 	}
+	return true;
+}
+
+static void update_latency(__s64 delta)
+{
+	__u64 val = delta;
+	__u32 key = 0;
+	__u64 *hist;
+	__u64 cmp_base = use_nsec ? 1 : 1000;
+
+	if (delta < 0)
+		return;
 
+	if (bucket_range != 0) {
+		val = delta / cmp_base;
+
+		if (min_latency > 0) {
+			if (val > min_latency)
+				val -= min_latency;
+			else
+				goto do_lookup;
+		}
+
+		// Less than 1 unit (ms or ns), or, in the future,
+		// than the min latency desired.
+		if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units )
+			key = val / bucket_range + 1;
+			if (key >= bucket_num)
+				key = bucket_num - 1;
+		}
+
+		goto do_lookup;
+	}
+	// calculate index using delta
+	for (key = 0; key < (bucket_num - 1); key++) {
+		if (delta < (cmp_base << key))
+			break;
+	}
+
+do_lookup:
+	hist = bpf_map_lookup_elem(&latency, &key);
+	if (!hist)
+		return;
+
+	__sync_fetch_and_add(hist, 1);
+
+	__sync_fetch_and_add(&total, delta); // always in nsec
+	__sync_fetch_and_add(&count, 1);
+
+	if (delta > max)
+		max = delta;
+	if (delta < min)
+		min = delta;
+}
+
+SEC("kprobe/func")
+int BPF_PROG(func_begin)
+{
+	__u64 key, now;
+
+	if (!enabled || !can_record())
+		return 0;
+
+	key = bpf_get_current_pid_tgid();
 	now = bpf_ktime_get_ns();
 
 	// overwrite timestamp for nested functions
@@ -92,7 +147,6 @@ int BPF_PROG(func_end)
 {
 	__u64 tid;
 	__u64 *start;
-	__u64 cmp_base = use_nsec ? 1 : 1000;
 
 	if (!enabled)
 		return 0;
@@ -101,56 +155,44 @@ int BPF_PROG(func_end)
 
 	start = bpf_map_lookup_elem(&functime, &tid);
 	if (start) {
-		__s64 delta = bpf_ktime_get_ns() - *start;
-		__u64 val = delta;
-		__u32 key = 0;
-		__u64 *hist;
-
+		update_latency(bpf_ktime_get_ns() - *start);
 		bpf_map_delete_elem(&functime, &tid);
+	}
 
-		if (delta < 0)
-			return 0;
+	return 0;
+}
 
-		if (bucket_range != 0) {
-			val = delta / cmp_base;
+SEC("raw_tp")
+int BPF_PROG(event_begin)
+{
+	__u64 key, now;
 
-			if (min_latency > 0) {
-				if (val > min_latency)
-					val -= min_latency;
-				else
-					goto do_lookup;
-			}
+	if (!enabled || !can_record())
+		return 0;
 
-			// Less than 1 unit (ms or ns), or, in the future,
-			// than the min latency desired.
-			if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units )
-				key = val / bucket_range + 1;
-				if (key >= bucket_num)
-					key = bucket_num - 1;
-			}
+	key = bpf_get_current_pid_tgid();
+	now = bpf_ktime_get_ns();
 
-			goto do_lookup;
-		}
-		// calculate index using delta
-		for (key = 0; key < (bucket_num - 1); key++) {
-			if (delta < (cmp_base << key))
-				break;
-		}
+	// overwrite timestamp for nested events
+	bpf_map_update_elem(&functime, &key, &now, BPF_ANY);
+	return 0;
+}
 
-do_lookup:
-		hist = bpf_map_lookup_elem(&latency, &key);
-		if (!hist)
-			return 0;
+SEC("raw_tp")
+int BPF_PROG(event_end)
+{
+	__u64 tid;
+	__u64 *start;
 
-		__sync_fetch_and_add(hist, 1);
+	if (!enabled)
+		return 0;
 
-		__sync_fetch_and_add(&total, delta); // always in nsec
-		__sync_fetch_and_add(&count, 1);
+	tid = bpf_get_current_pid_tgid();
 
-		if (delta > max)
-			max = delta;
-		if (delta < min)
-			min = delta;
+	start = bpf_map_lookup_elem(&functime, &tid);
+	if (start) {
+		update_latency(bpf_ktime_get_ns() - *start);
+		bpf_map_delete_elem(&functime, &tid);
 	}
 
 	return 0;
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
index a9bc47da83a56cd6..3f5094ac59080310 100644
--- a/tools/perf/util/ftrace.h
+++ b/tools/perf/util/ftrace.h
@@ -17,6 +17,7 @@ struct perf_ftrace {
 	struct list_head	notrace;
 	struct list_head	graph_funcs;
 	struct list_head	nograph_funcs;
+	struct list_head	event_pair;
 	struct hashmap		*profile_hash;
 	unsigned long		percpu_buffer_size;
 	bool			inherit;
-- 
2.50.0.727.gbf7dc18ff4-goog