[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAEf4BzZCLwzrZPTOBEg88i1Tki6uPL73ujSE-SCSSU16HENUHA@mail.gmail.com>
Date: Tue, 29 Mar 2022 17:00:16 -0700
From: Andrii Nakryiko <andrii.nakryiko@...il.com>
To: Song Liu <song@...nel.org>
Cc: bpf <bpf@...r.kernel.org>, Networking <netdev@...r.kernel.org>,
Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Andrii Nakryiko <andrii@...nel.org>,
Kernel Team <kernel-team@...com>
Subject: Re: [PATCH bpf] tools/runqslower: fix handle__sched_switch for
updated tp sched_switch
On Tue, Mar 29, 2022 at 4:19 PM Song Liu <song@...nel.org> wrote:
>
> TP_PROTO of sched_switch is updated with a new arg prev_state, which
> causes runqslower load failure:
>
> libbpf: prog 'handle__sched_switch': BPF program load failed: Permission denied
> libbpf: prog 'handle__sched_switch': -- BEGIN PROG LOAD LOG --
> R1 type=ctx expected=fp
> 0: R1=ctx(off=0,imm=0) R10=fp0
> ; int handle__sched_switch(u64 *ctx)
> 0: (bf) r7 = r1 ; R1=ctx(off=0,imm=0) R7_w=ctx(off=0,imm=0)
> ; struct task_struct *next = (struct task_struct *)ctx[2];
> 1: (79) r6 = *(u64 *)(r7 +16)
> func 'sched_switch' arg2 has btf_id 186 type STRUCT 'task_struct'
> 2: R6_w=ptr_task_struct(off=0,imm=0) R7_w=ctx(off=0,imm=0)
> ; struct task_struct *prev = (struct task_struct *)ctx[1];
> 2: (79) r2 = *(u64 *)(r7 +8) ; R2_w=scalar() R7_w=ctx(off=0,imm=0)
> 3: (b7) r1 = 0 ; R1_w=0
> ; struct runq_event event = {};
> 4: (7b) *(u64 *)(r10 -8) = r1 ; R1_w=P0 R10=fp0 fp-8_w=00000000
> 5: (7b) *(u64 *)(r10 -16) = r1 ; R1_w=P0 R10=fp0 fp-16_w=00000000
> 6: (7b) *(u64 *)(r10 -24) = r1 ; R1_w=P0 R10=fp0 fp-24_w=00000000
> 7: (7b) *(u64 *)(r10 -32) = r1 ; R1_w=P0 R10=fp0 fp-32_w=00000000
> ; if (prev->__state == TASK_RUNNING)
> 8: (61) r1 = *(u32 *)(r2 +24)
> R2 invalid mem access 'scalar'
> processed 9 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0
> -- END PROG LOAD LOG --
> libbpf: failed to load program 'handle__sched_switch'
> libbpf: failed to load object 'runqslower_bpf'
> libbpf: failed to load BPF skeleton 'runqslower_bpf': -13
> failed to load BPF object: -13
>
> Update runqslower to fix this issue. Also, as we are on this, use BPF_PROG
> in runqslower for cleaner code.
>
> Fixes: fa2c3254d7cf ("sched/tracing: Don't re-read p->state when emitting sched_switch event")
> Signed-off-by: Song Liu <song@...nel.org>
> ---
> tools/bpf/runqslower/runqslower.bpf.c | 19 +++++--------------
> 1 file changed, 5 insertions(+), 14 deletions(-)
>
It would be much less disruptive if that prev_state was added after
"next", but oh well...
But anyways, let's handle this in a way that can handle both old
kernels and new ones and do the same change in libbpf-tool's
runqslower ([0]). Can you please follow up there as well?
We can use BPF CO-RE to detect which order of arguments running kernel
has by checking prev_state field existence in struct
trace_event_raw_sched_switch. Can you please try that? Use
bpf_core_field_exists() for that.
[0] https://github.com/iovisor/bcc/blob/master/libbpf-tools/runqslower.bpf.c
> diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c
> index 9a5c1f008fe6..30e491d8308f 100644
> --- a/tools/bpf/runqslower/runqslower.bpf.c
> +++ b/tools/bpf/runqslower/runqslower.bpf.c
> @@ -2,6 +2,7 @@
> // Copyright (c) 2019 Facebook
> #include "vmlinux.h"
> #include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
> #include "runqslower.h"
>
> #define TASK_RUNNING 0
> @@ -43,31 +44,21 @@ static int trace_enqueue(struct task_struct *t)
> }
>
> SEC("tp_btf/sched_wakeup")
> -int handle__sched_wakeup(u64 *ctx)
> +int BPF_PROG(handle__sched_wakeup, struct task_struct *p)
> {
> - /* TP_PROTO(struct task_struct *p) */
> - struct task_struct *p = (void *)ctx[0];
> -
> return trace_enqueue(p);
> }
>
> SEC("tp_btf/sched_wakeup_new")
> -int handle__sched_wakeup_new(u64 *ctx)
> +int BPF_PROG(handle__sched_wakeup_new, struct task_struct *p)
> {
> - /* TP_PROTO(struct task_struct *p) */
> - struct task_struct *p = (void *)ctx[0];
> -
> return trace_enqueue(p);
> }
>
> SEC("tp_btf/sched_switch")
> -int handle__sched_switch(u64 *ctx)
> +int BPF_PROG(handle__sched_switch, bool preempt, unsigned long prev_state,
> + struct task_struct *prev, struct task_struct *next)
> {
> - /* TP_PROTO(bool preempt, struct task_struct *prev,
> - * struct task_struct *next)
> - */
> - struct task_struct *prev = (struct task_struct *)ctx[1];
> - struct task_struct *next = (struct task_struct *)ctx[2];
> struct runq_event event = {};
> u64 *tsp, delta_us;
> long state;
> --
> 2.30.2
>
Powered by blists - more mailing lists