[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220506201627.85598-5-namhyung@kernel.org>
Date: Fri, 6 May 2022 13:16:27 -0700
From: Namhyung Kim <namhyung@...nel.org>
To: Arnaldo Carvalho de Melo <acme@...nel.org>,
Jiri Olsa <jolsa@...nel.org>
Cc: Ingo Molnar <mingo@...nel.org>,
Peter Zijlstra <peterz@...radead.org>,
LKML <linux-kernel@...r.kernel.org>,
Andi Kleen <ak@...ux.intel.com>,
Ian Rogers <irogers@...gle.com>,
Song Liu <songliubraving@...com>, Hao Luo <haoluo@...gle.com>,
Milian Wolff <milian.wolff@...b.com>, bpf@...r.kernel.org,
linux-perf-users@...r.kernel.org,
Blake Jones <blakejones@...gle.com>
Subject: [PATCH 4/4] perf record: Handle argument change in sched_switch
Recently sched_switch tracepoint added a new argument for prev_state,
but it's hard to handle the change in a BPF program. Instead, we can
check the function prototype in BTF before loading the program.
Thus I make two copies of the tracepoint handler and select one based
on the BTF info.
Signed-off-by: Namhyung Kim <namhyung@...nel.org>
---
tools/perf/util/bpf_off_cpu.c | 28 +++++++++++++++
tools/perf/util/bpf_skel/off_cpu.bpf.c | 48 ++++++++++++++++++++------
2 files changed, 65 insertions(+), 11 deletions(-)
diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c
index 89f36229041d..31343db68ed3 100644
--- a/tools/perf/util/bpf_off_cpu.c
+++ b/tools/perf/util/bpf_off_cpu.c
@@ -86,6 +86,33 @@ static void off_cpu_finish(void *arg __maybe_unused)
off_cpu_bpf__destroy(skel);
}
+/* recent kernel added prev_state arg, so it needs to call the proper function */
+static void check_sched_switch_args(void)
+{
+ const struct btf *btf = bpf_object__btf(skel->obj);
+ const struct btf_type *t1, *t2, *t3;
+ u32 type_id;
+
+ type_id = btf__find_by_name_kind(btf, "bpf_trace_sched_switch",
+ BTF_KIND_TYPEDEF);
+ if ((s32)type_id < 0)
+ return;
+
+ t1 = btf__type_by_id(btf, type_id);
+ if (t1 == NULL)
+ return;
+
+ t2 = btf__type_by_id(btf, t1->type);
+ if (t2 == NULL || !btf_is_ptr(t2))
+ return;
+
+ t3 = btf__type_by_id(btf, t2->type);
+ if (t3 && btf_is_func_proto(t3) && btf_vlen(t3) == 4) {
+ /* new format: pass prev_state as 2nd arg */
+ skel->rodata->has_prev_state = true;
+ }
+}
+
int off_cpu_prepare(struct evlist *evlist, struct target *target)
{
int err, fd, i;
@@ -114,6 +141,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target)
}
set_max_rlimit();
+ check_sched_switch_args();
err = off_cpu_bpf__load(skel);
if (err) {
diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c
index c35106b9e20b..98eaba95924f 100644
--- a/tools/perf/util/bpf_skel/off_cpu.bpf.c
+++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c
@@ -72,6 +72,8 @@ int enabled = 0;
int has_cpu = 0;
int has_task = 0;
+const volatile bool has_prev_state = false;
+
/*
* Old kernel used to call it task_struct->state and now it's '__state'.
* Use BPF CO-RE "ignored suffix rule" to deal with it like below:
@@ -121,22 +123,13 @@ static inline int can_record(struct task_struct *t, int state)
return 1;
}
-SEC("tp_btf/sched_switch")
-int on_switch(u64 *ctx)
+static int off_cpu_stat(u64 *ctx, struct task_struct *prev,
+ struct task_struct *next, int state)
{
__u64 ts;
- int state;
__u32 stack_id;
- struct task_struct *prev, *next;
struct tstamp_data *pelem;
- if (!enabled)
- return 0;
-
- prev = (struct task_struct *)ctx[1];
- next = (struct task_struct *)ctx[2];
- state = get_task_state(prev);
-
ts = bpf_ktime_get_ns();
if (!can_record(prev, state))
@@ -180,4 +173,37 @@ int on_switch(u64 *ctx)
return 0;
}
+SEC("tp_btf/sched_switch")
+int on_switch(u64 *ctx)
+{
+ struct task_struct *prev, *next;
+ int prev_state;
+
+ if (!enabled)
+ return 0;
+
+ /*
+ * For v5.18+:
+ * TP_PROTO(bool preempt, int prev_state,
+ * struct task_struct *prev,
+ * struct task_struct *next)
+ *
+ * On older kernels:
+ * TP_PROTO(bool preempt, struct task_struct *prev,
+ * struct task_struct *next)
+ */
+ if (has_prev_state) {
+ prev = (struct task_struct *)ctx[2];
+ next = (struct task_struct *)ctx[3];
+ prev_state = (int)ctx[1];
+ } else {
+ prev = (struct task_struct *)ctx[1];
+ next = (struct task_struct *)ctx[2];
+
+ prev_state = get_task_state(prev);
+ }
+
+ return off_cpu_stat(ctx, prev, next, prev_state);
+}
+
char LICENSE[] SEC("license") = "Dual BSD/GPL";
--
2.36.0.512.ge40c2bad7a-goog
Powered by blists - more mailing lists