[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251118003531.944170593@kernel.org>
Date: Mon, 17 Nov 2025 19:29:53 -0500
From: Steven Rostedt <rostedt@...nel.org>
To: linux-kernel@...r.kernel.org,
linux-trace-kernel@...r.kernel.org
Cc: Masami Hiramatsu <mhiramat@...nel.org>,
Mark Rutland <mark.rutland@....com>,
Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
Andrew Morton <akpm@...ux-foundation.org>,
Peter Zijlstra <peterz@...radead.org>,
Thomas Gleixner <tglx@...utronix.de>,
Ian Rogers <irogers@...gle.com>,
Namhyung Kim <namhyung@...nel.org>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Jiri Olsa <jolsa@...nel.org>,
Douglas Raillard <douglas.raillard@....com>
Subject: [POC][RFC][PATCH 3/3] fgraph: Add perf counters to function graph tracer
From: Steven Rostedt <rostedt@...dmis.org>
Add option to trigger perf events to function graph tracing.
Two new options are added: funcgraph-cpu-cycles and funcgraph-cache-misses
This adds the perf event right after the start of a function and again
just before the end of a function.
# cd /sys/kernel/tracing
# echo 1 > options/funcgraph-cache-misses
# echo vfs_read > set_graph_function
# echo function_graph > current_tracer
# cat trace
[..]
5) | vfs_read() {
5) | /* cache_misses: 822565 */
5) | rw_verify_area() {
5) | /* cache_misses: 824003 */
5) | security_file_permission() {
5) | /* cache_misses: 825440 */
5) | apparmor_file_permission() {
5) | /* cache_misses: 826875 */
5) | aa_file_perm() {
5) | /* cache_misses: 828326 */
5) | __rcu_read_lock() {
5) | /* cache_misses: 829766 */
5) | /* cache_misses: 830785 */
5) 5.116 us | }
5) | __rcu_read_unlock() {
5) | /* cache_misses: 832611 */
5) | /* cache_misses: 833632 */
5) 5.223 us | }
5) | /* cache_misses: 835043 */
5) + 25.462 us | }
5) | /* cache_misses: 836454 */
5) + 35.518 us | }
5) | bpf_lsm_file_permission() {
5) | /* cache_misses: 838276 */
5) | /* cache_misses: 839292 */
5) 4.613 us | }
5) | /* cache_misses: 840697 */
5) + 54.684 us | }
5) | /* cache_misses: 842107 */
5) + 64.449 us | }
The option will cause the perf event to be triggered after every function
called.
If cpu_cycles is also enabled:
# echo 1 > options/funcgraph-cpu-cycles
# cat trace
[..]
3) | vfs_read() {
3) | /* cpu_cycles: 2947481793 cache_misses: 2002984031 */
3) | rw_verify_area() {
3) | /* cpu_cycles: 2947488061 cache_misses: 2002985922 */
3) | security_file_permission() {
3) | /* cpu_cycles: 2947492867 cache_misses: 2002987812 */
3) | apparmor_file_permission() {
3) | /* cpu_cycles: 2947497713 cache_misses: 2002989700 */
3) | aa_file_perm() {
3) | /* cpu_cycles: 2947502560 cache_misses: 2002991604 */
3) | __rcu_read_lock() {
3) | /* cpu_cycles: 2947507398 cache_misses: 2002993497 */
3) | /* cpu_cycles: 2947512435 cache_misses: 2002994969 */
3) 7.586 us | }
3) | __rcu_read_unlock() {
3) | /* cpu_cycles: 2947518226 cache_misses: 2002997248 */
3) | /* cpu_cycles: 2947522328 cache_misses: 2002998722 */
3) 7.211 us | }
3) | /* cpu_cycles: 2947527067 cache_misses: 2003000586 */
3) + 37.581 us | }
3) | /* cpu_cycles: 2947531727 cache_misses: 2003002450 */
3) + 52.061 us | }
3) | bpf_lsm_file_permission() {
3) | /* cpu_cycles: 2947537274 cache_misses: 2003004725 */
3) | /* cpu_cycles: 2947541104 cache_misses: 2003006194 */
3) 7.029 us | }
3) | /* cpu_cycles: 2947545762 cache_misses: 2003008052 */
3) + 80.971 us | }
3) | /* cpu_cycles: 2947550459 cache_misses: 2003009915 */
3) + 95.515 us | }
Signed-off-by: Steven Rostedt (Google) <rostedt@...dmis.org>
---
kernel/trace/trace.h | 4 +
kernel/trace/trace_functions_graph.c | 117 +++++++++++++++++++++++++--
2 files changed, 116 insertions(+), 5 deletions(-)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index bb764a2255c7..64cdb6fda3fb 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -366,7 +366,9 @@ struct trace_array {
int perf_events;
int ftrace_perf_events;
+ int fgraph_perf_events;
u64 ftrace_perf_mask;
+ u64 fgraph_perf_mask;
struct trace_pid_list __rcu *filtered_pids;
struct trace_pid_list __rcu *filtered_no_pids;
@@ -946,6 +948,8 @@ static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash)
#define TRACE_GRAPH_PRINT_RETVAL_HEX 0x1000
#define TRACE_GRAPH_PRINT_RETADDR 0x2000
#define TRACE_GRAPH_ARGS 0x4000
+#define TRACE_GRAPH_PERF_CACHE 0x8000
+#define TRACE_GRAPH_PERF_CYCLES 0x10000
#define TRACE_GRAPH_PRINT_FILL_SHIFT 28
#define TRACE_GRAPH_PRINT_FILL_MASK (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 44d5dc5031e2..e618dd12ca0c 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -22,6 +22,8 @@ static int ftrace_graph_skip_irqs;
/* Do not record function time when task is sleeping */
unsigned int fgraph_no_sleep_time;
+static struct tracer graph_trace;
+
struct fgraph_cpu_data {
pid_t last_pid;
int depth;
@@ -88,6 +90,11 @@ static struct tracer_opt trace_opts[] = {
/* Include sleep time (scheduled out) between entry and return */
{ TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) },
+#ifdef CONFIG_PERF_EVENTS
+ { TRACER_OPT(funcgraph-cache-misses, TRACE_GRAPH_PERF_CACHE) },
+ { TRACER_OPT(funcgraph-cpu-cycles, TRACE_GRAPH_PERF_CYCLES) },
+#endif
+
{ } /* Empty entry */
};
@@ -104,6 +111,97 @@ static bool tracer_flags_is_set(struct trace_array *tr, u32 flags)
return (tr->current_trace_flags->val & flags) == flags;
}
+#ifdef CONFIG_PERF_EVENTS
+static inline void handle_perf_event(struct trace_array *tr, unsigned int trace_ctx)
+{
+ if (!tr->fgraph_perf_events)
+ return;
+ ftrace_perf_events(tr, tr->fgraph_perf_events, tr->fgraph_perf_mask, trace_ctx);
+}
+
+static int ftrace_graph_perf_event(struct trace_array *tr, int set, int bit)
+{
+ u64 mask;
+ int type;
+ int ret = 0;
+
+ /* Do nothing if the current tracer is not this tracer */
+ if (tr->current_trace != &graph_trace)
+ return 0;
+
+ switch (bit) {
+ case TRACE_GRAPH_PERF_CACHE:
+ mask = TRACE_ITER(PERF_CACHE);
+ type = PERF_TRACE_CACHE;
+ break;
+ case TRACE_GRAPH_PERF_CYCLES:
+ mask = TRACE_ITER(PERF_CYCLES);
+ type = PERF_TRACE_CYCLES;
+ break;
+ }
+
+ if (set)
+ ret = trace_perf_event_enable(type);
+ else
+ trace_perf_event_disable(type);
+
+ if (ret < 0)
+ return ret;
+
+ if (set) {
+ tr->fgraph_perf_events++;
+ tr->fgraph_perf_mask |= mask;
+ } else {
+ tr->fgraph_perf_mask &= ~mask;
+ tr->fgraph_perf_events--;
+ }
+ return 0;
+}
+
+static void ftrace_graph_perf_enable(struct trace_array *tr, int bit)
+{
+ int err;
+
+ if (!(tr->current_trace_flags->val & bit))
+ return;
+
+ err = ftrace_graph_perf_event(tr, 1, bit);
+ if (err < 0)
+ tr->current_trace_flags->val &= ~bit;
+}
+
+static void ftrace_graph_perf_disable(struct trace_array *tr, int bit)
+{
+ /* Only disable if it was enabled */
+ if (!(tr->current_trace_flags->val & bit))
+ return;
+
+ ftrace_graph_perf_event(tr, 0, bit);
+}
+
+static void fgraph_perf_init(struct trace_array *tr)
+{
+ ftrace_graph_perf_enable(tr, TRACE_GRAPH_PERF_CYCLES);
+ ftrace_graph_perf_enable(tr, TRACE_GRAPH_PERF_CACHE);
+}
+
+static void fgraph_perf_reset(struct trace_array *tr)
+{
+ ftrace_graph_perf_disable(tr, TRACE_GRAPH_PERF_CYCLES);
+ ftrace_graph_perf_disable(tr, TRACE_GRAPH_PERF_CACHE);
+}
+#else
+static inline void handle_perf_event(struct trace_array *tr, unsigned int trace_ctx)
+{
+}
+static inline void fgraph_perf_init(struct trace_array *tr)
+{
+}
+static inline void fgraph_perf_reset(struct trace_array *tr)
+{
+}
+#endif
+
/*
* DURATION column is being also used to display IRQ signs,
* following values are used by print_graph_irq and others
@@ -272,6 +370,9 @@ static int graph_entry(struct ftrace_graph_ent *trace,
ret = __graph_entry(tr, trace, trace_ctx, fregs);
}
+ if (ret)
+ handle_perf_event(tr, trace_ctx);
+
return ret;
}
@@ -324,6 +425,8 @@ void __trace_graph_return(struct trace_array *tr,
struct trace_buffer *buffer = tr->array_buffer.buffer;
struct ftrace_graph_ret_entry *entry;
+ handle_perf_event(tr, trace_ctx);
+
event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
sizeof(*entry), trace_ctx);
if (!event)
@@ -465,6 +568,8 @@ static int graph_trace_init(struct trace_array *tr)
if (!tracer_flags_is_set(tr, TRACE_GRAPH_SLEEP_TIME))
fgraph_no_sleep_time++;
+ fgraph_perf_init(tr);
+
/* Make gops functions visible before we start tracing */
smp_mb();
@@ -476,8 +581,6 @@ static int graph_trace_init(struct trace_array *tr)
return 0;
}
-static struct tracer graph_trace;
-
static int ftrace_graph_trace_args(struct trace_array *tr, int set)
{
trace_func_graph_ent_t entry;
@@ -512,6 +615,7 @@ static void graph_trace_reset(struct trace_array *tr)
if (WARN_ON_ONCE(fgraph_no_sleep_time < 0))
fgraph_no_sleep_time = 0;
+ fgraph_perf_reset(tr);
tracing_stop_cmdline_record();
unregister_ftrace_graph(tr->gops);
}
@@ -1684,9 +1788,12 @@ func_graph_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
ftrace_graph_skip_irqs = 0;
break;
- case TRACE_GRAPH_ARGS:
- return ftrace_graph_trace_args(tr, set);
- }
+#ifdef CONFIG_PERF_EVENTS
+ case TRACE_GRAPH_PERF_CACHE:
+ case TRACE_GRAPH_PERF_CYCLES:
+ return ftrace_graph_perf_event(tr, set, bit);
+#endif
+ };
return 0;
}
--
2.51.0
Powered by blists - more mailing lists