[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250113195449.72ab5d81@gandalf.local.home>
Date: Mon, 13 Jan 2025 19:54:49 -0500
From: Steven Rostedt <rostedt@...dmis.org>
To: "Masami Hiramatsu (Google)" <mhiramat@...nel.org>
Cc: Jiri Olsa <olsajiri@...il.com>, Alexei Starovoitov
<alexei.starovoitov@...il.com>, Florent Revest <revest@...omium.org>,
linux-trace-kernel@...r.kernel.org, LKML <linux-kernel@...r.kernel.org>,
Martin KaFai Lau <martin.lau@...ux.dev>, bpf <bpf@...r.kernel.org>, Alexei
Starovoitov <ast@...nel.org>, Alan Maguire <alan.maguire@...cle.com>, Mark
Rutland <mark.rutland@....com>
Subject: Re: [PATCH] fgraph: Move trace_clock_local() for return time to
function_graph tracer
On Sun, 12 Jan 2025 14:26:35 +0900
"Masami Hiramatsu (Google)" <mhiramat@...nel.org> wrote:
> From: Masami Hiramatsu (Google) <mhiramat@...nel.org>
>
> Since the ftrace_graph_ret::rettime is only referred in the function_graph
> tracer, the trace_clock_local() call in fgraph is just an overhead for
> other fgraph users.
>
> Move the trace_clock_local() for recording return time to function_graph
> tracer and the rettime field is just zeroed in the fgraph side.
> That rettime field is updated by one of the function_graph tracer and
> cached for other function_graph tracer instances.
>
> According to Jiri's report[1], removing this function will gain fprobe
> performance ~27%.
>
> [1] https://lore.kernel.org/all/Z3aSuql3fnXMVMoM@krava/
>
> Reported-by: Jiri Olsa <olsajiri@...il.com>
> Signed-off-by: Masami Hiramatsu (Google) <mhiramat@...nel.org>
I'd rather just nuke the calltime and rettime from struct ftrace_graph_ret.
How about something like this instead?
[ based on: https://lore.kernel.org/linux-trace-kernel/20250113183124.61767419@gandalf.local.home/ ]
I can start testing it (it compiles and boots).
include/linux/ftrace.h | 2 --
kernel/trace/fgraph.c | 1 -
kernel/trace/trace.h | 4 +++-
kernel/trace/trace_entries.h | 8 +++----
kernel/trace/trace_functions_graph.c | 33 ++++++++++++++++------------
kernel/trace/trace_irqsoff.c | 5 +++--
kernel/trace/trace_sched_wakeup.c | 6 +++--
7 files changed, 33 insertions(+), 26 deletions(-)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index aa9ddd1e4bb6..848ddf0d8f89 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1061,8 +1061,6 @@ struct ftrace_graph_ret {
int depth;
/* Number of functions that overran the depth limit for current task */
unsigned int overrun;
- unsigned long long calltime;
- unsigned long long rettime;
} __packed;
struct fgraph_ops;
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 30e3ddc8a8a8..a7e6c8488b7f 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -818,7 +818,6 @@ static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs
return (unsigned long)panic;
}
- trace.rettime = trace_clock_local();
#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
trace.retval = fgraph_ret_regs_return_value(ret_regs);
#endif
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 9691b47b5f3d..467a8143fbb9 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -911,7 +911,9 @@ extern int __trace_graph_retaddr_entry(struct trace_array *tr,
unsigned long retaddr);
extern void __trace_graph_return(struct trace_array *tr,
struct ftrace_graph_ret *trace,
- unsigned int trace_ctx);
+ unsigned int trace_ctx,
+ u64 calltime, u64 rettime);
+
extern void init_array_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops);
extern int allocate_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops);
extern void free_fgraph_ops(struct trace_array *tr);
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index 82fd174ebbe0..fbfb396905a6 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -124,8 +124,8 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
__field_packed( unsigned long, ret, retval )
__field_packed( int, ret, depth )
__field_packed( unsigned int, ret, overrun )
- __field_packed( unsigned long long, ret, calltime)
- __field_packed( unsigned long long, ret, rettime )
+ __field(unsigned long long, calltime )
+ __field(unsigned long long, rettime )
),
F_printk("<-- %ps (%d) (start: %llx end: %llx) over: %d retval: %lx",
@@ -146,8 +146,8 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
__field_packed( unsigned long, ret, func )
__field_packed( int, ret, depth )
__field_packed( unsigned int, ret, overrun )
- __field_packed( unsigned long long, ret, calltime)
- __field_packed( unsigned long long, ret, rettime )
+ __field(unsigned long long, calltime )
+ __field(unsigned long long, rettime )
),
F_printk("<-- %ps (%d) (start: %llx end: %llx) over: %d",
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 5504b5e4e7b4..7bff133c1543 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -270,12 +270,10 @@ __trace_graph_function(struct trace_array *tr,
struct ftrace_graph_ret ret = {
.func = ip,
.depth = 0,
- .calltime = time,
- .rettime = time,
};
__trace_graph_entry(tr, &ent, trace_ctx);
- __trace_graph_return(tr, &ret, trace_ctx);
+ __trace_graph_return(tr, &ret, trace_ctx, time, time);
}
void
@@ -287,8 +285,9 @@ trace_graph_function(struct trace_array *tr,
}
void __trace_graph_return(struct trace_array *tr,
- struct ftrace_graph_ret *trace,
- unsigned int trace_ctx)
+ struct ftrace_graph_ret *trace,
+ unsigned int trace_ctx,
+ u64 calltime, u64 rettime)
{
struct ring_buffer_event *event;
struct trace_buffer *buffer = tr->array_buffer.buffer;
@@ -300,6 +299,8 @@ void __trace_graph_return(struct trace_array *tr,
return;
entry = ring_buffer_event_data(event);
entry->ret = *trace;
+ entry->calltime = calltime;
+ entry->rettime = rettime;
trace_buffer_unlock_commit_nostack(buffer, event);
}
@@ -322,10 +323,13 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
struct fgraph_times *ftimes;
unsigned long flags;
unsigned int trace_ctx;
+ u64 calltime, rettime;
long disabled;
int size;
int cpu;
+ rettime = trace_clock_local();
+
ftrace_graph_addr_finish(gops, trace);
if (*task_var & TRACE_GRAPH_NOTRACE) {
@@ -339,7 +343,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
handle_nosleeptime(trace, ftimes, size);
- trace->calltime = ftimes->calltime;
+ calltime = ftimes->calltime;
local_irq_save(flags);
cpu = raw_smp_processor_id();
@@ -347,7 +351,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1)) {
trace_ctx = tracing_gen_ctx_flags(flags);
- __trace_graph_return(tr, trace, trace_ctx);
+ __trace_graph_return(tr, trace, trace_ctx, calltime, rettime);
}
atomic_dec(&data->disabled);
local_irq_restore(flags);
@@ -372,10 +376,8 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace,
handle_nosleeptime(trace, ftimes, size);
- trace->calltime = ftimes->calltime;
-
if (tracing_thresh &&
- (trace->rettime - ftimes->calltime < tracing_thresh))
+ (trace_clock_local() - ftimes->calltime < tracing_thresh))
return;
else
trace_graph_return(trace, gops);
@@ -861,7 +863,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
graph_ret = &ret_entry->ret;
call = &entry->graph_ent;
- duration = graph_ret->rettime - graph_ret->calltime;
+ duration = ret_entry->rettime - ret_entry->calltime;
func = call->func + iter->tr->text_delta;
@@ -1142,11 +1144,14 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
}
static enum print_line_t
-print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
+print_graph_return(struct ftrace_graph_ret_entry *retentry, struct trace_seq *s,
struct trace_entry *ent, struct trace_iterator *iter,
u32 flags)
{
- unsigned long long duration = trace->rettime - trace->calltime;
+ struct ftrace_graph_ret *trace = &retentry->ret;
+ u64 calltime = retentry->calltime;
+ u64 rettime = retentry->rettime;
+ unsigned long long duration = rettime - calltime;
struct fgraph_data *data = iter->private;
struct trace_array *tr = iter->tr;
unsigned long func;
@@ -1347,7 +1352,7 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags)
case TRACE_GRAPH_RET: {
struct ftrace_graph_ret_entry *field;
trace_assign_type(field, entry);
- return print_graph_return(&field->ret, s, entry, iter, flags);
+ return print_graph_return(field, s, entry, iter, flags);
}
case TRACE_STACK:
case TRACE_FN:
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index a4e799c1e767..1d3646ce9c34 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -221,6 +221,7 @@ static void irqsoff_graph_return(struct ftrace_graph_ret *trace,
unsigned long flags;
unsigned int trace_ctx;
u64 *calltime;
+ u64 rettime;
int size;
ftrace_graph_addr_finish(gops, trace);
@@ -228,13 +229,13 @@ static void irqsoff_graph_return(struct ftrace_graph_ret *trace,
if (!func_prolog_dec(tr, &data, &flags))
return;
+ rettime = trace_clock_local();
calltime = fgraph_retrieve_data(gops->idx, &size);
if (!calltime)
return;
- trace->calltime = *calltime;
trace_ctx = tracing_gen_ctx_flags(flags);
- __trace_graph_return(tr, trace, trace_ctx);
+ __trace_graph_return(tr, trace, trace_ctx, *calltime, rettime);
atomic_dec(&data->disabled);
}
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index c58292e424d5..c4bbaae2a2a3 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -156,6 +156,7 @@ static void wakeup_graph_return(struct ftrace_graph_ret *trace,
struct trace_array_cpu *data;
unsigned int trace_ctx;
u64 *calltime;
+ u64 rettime;
int size;
ftrace_graph_addr_finish(gops, trace);
@@ -163,12 +164,13 @@ static void wakeup_graph_return(struct ftrace_graph_ret *trace,
if (!func_prolog_preempt_disable(tr, &data, &trace_ctx))
return;
+ rettime = trace_clock_local();
+
calltime = fgraph_retrieve_data(gops->idx, &size);
if (!calltime)
return;
- trace->calltime = *calltime;
- __trace_graph_return(tr, trace, trace_ctx);
+ __trace_graph_return(tr, trace, trace_ctx, *calltime, rettime);
atomic_dec(&data->disabled);
preempt_enable_notrace();
--
2.45.2
-- Steve
Powered by blists - more mailing lists