[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20210304221947.5a177ce2e1e94314e57c38a4@kernel.org>
Date: Thu, 4 Mar 2021 22:19:47 +0900
From: Masami Hiramatsu <mhiramat@...nel.org>
To: Steven Rostedt <rostedt@...dmis.org>
Cc: "Daniel Xu" <dxu@...uu.xyz>, linux-kernel@...r.kernel.org,
"bpf@...r.kernel.org" <bpf@...r.kernel.org>, kuba@...nel.org
Subject: Re: Broken kretprobe stack traces
On Wed, 3 Mar 2021 09:26:04 -0500
Steven Rostedt <rostedt@...dmis.org> wrote:
> On Wed, 3 Mar 2021 13:48:28 +0900
> Masami Hiramatsu <mhiramat@...nel.org> wrote:
>
> >
> > >
> > > I think (can't prove) this used to work:
>
> Would be good to find out if it did.
>
> >
> > I'm not sure the bpftrace had correctly handled it or not.
> >
> > >
> > > # bpftrace -e 'kretprobe:__tcp_retransmit_skb { @[kstack()] = count() }'
> > > Attaching 1 probe...
> > > ^C
> > >
> > > @[
> > > kretprobe_trampoline+0
> > > ]: 1
> >
> > Would you know how the bpftrace stacktracer rewinds the stack entries?
> > FYI, ftrace does it in trace_seq_print_sym()@kernel/trace/trace_output.c
> >
>
> The difference between trace events and normal function tracing stack
> traces is that it keeps its original return address. But kretprobes (and
> function graph tracing, and some bpf trampolines too) modify the return
> pointer, and that could possibly cause havoc with the stack trace.
BTW, I think if the stack tracer passes the nth of kretprobe_trampoline
or a cursor, kretprobe can find the correct return address from given task.
I've made a patch to do that only for the CONFIG_ARCH_STACKWALK=y
Here is an example on x86.
# echo r vfs_read > kprobe_events
# echo stacktrace > events/kprobes/r_vfs_read_0/trigger
# echo 1 > events/kprobes/r_vfs_read_0/enable
# echo 1 > options/sym-offset
# less trace
...
sh-132 [007] ...1 22.524917: <stack trace>
=> kretprobe_dispatcher+0x7d/0xc0
=> __kretprobe_trampoline_handler+0xdb/0x1b0
=> trampoline_handler+0x48/0x60
=> kretprobe_trampoline+0x2a/0x50
=> ksys_read+0x70/0xf0
=> __x64_sys_read+0x1a/0x20
=> do_syscall_64+0x38/0x50
=> entry_SYSCALL_64_after_hwframe+0x44/0xae
=> 0
=> 0
------
>From 77a785a3a0791171b570830d0b2f099f8a4ba337 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@...nel.org>
Date: Thu, 4 Mar 2021 14:19:24 +0900
Subject: [PATCH] kprobes: stacktrace: Recover the address changed by kretprobe
Recover the return address on the stack which changed by the
kretprobe.
Signed-off-by: Masami Hiramatsu <mhiramat@...nel.org>
---
include/linux/kprobes.h | 3 ++
kernel/kprobes.c | 81 +++++++++++++++++++++++++++--------------
kernel/stacktrace.c | 26 +++++++++++++
3 files changed, 82 insertions(+), 28 deletions(-)
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 1883a4a9f16a..a022e507d829 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -205,6 +205,9 @@ extern void arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs);
extern int arch_trampoline_kprobe(struct kprobe *p);
+unsigned long kretprobe_real_stack_tsk(struct task_struct *tsk,
+ unsigned long addr,
+ struct llist_node **cur);
/* If the trampoline handler called from a kprobe, use this version */
unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
void *trampoline_address,
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 745f08fdd7a6..b3d9dbd6086f 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1863,46 +1863,56 @@ unsigned long __weak arch_deref_entry_point(void *entry)
#ifdef CONFIG_KRETPROBES
-unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
- void *trampoline_address,
- void *frame_pointer)
+/* This assumes the tsk is current or the task which is not running. */
+unsigned long kretprobe_real_stack_tsk(struct task_struct *tsk,
+ unsigned long addr,
+ struct llist_node **cur)
{
- kprobe_opcode_t *correct_ret_addr = NULL;
struct kretprobe_instance *ri = NULL;
- struct llist_node *first, *node;
- struct kretprobe *rp;
+ struct llist_node *node = *cur;
- /* Find all nodes for this frame. */
- first = node = current->kretprobe_instances.first;
- while (node) {
- ri = container_of(node, struct kretprobe_instance, llist);
+ if (addr != (unsigned long)&kretprobe_trampoline)
+ return addr;
- BUG_ON(ri->fp != frame_pointer);
+ if (!node)
+ node = tsk->kretprobe_instances.first;
+ else
+ node = node->next;
- if (ri->ret_addr != trampoline_address) {
- correct_ret_addr = ri->ret_addr;
- /*
- * This is the real return address. Any other
- * instances associated with this task are for
- * other calls deeper on the call stack
- */
- goto found;
+ while (node) {
+ ri = container_of(node, struct kretprobe_instance, llist);
+ if (ri->ret_addr != (void *)&kretprobe_trampoline) {
+ *cur = node;
+ return (unsigned long)ri->ret_addr;
}
-
node = node->next;
}
- pr_err("Oops! Kretprobe fails to find correct return address.\n");
- BUG_ON(1);
+ return 0;
+}
-found:
- /* Unlink all nodes for this frame. */
- current->kretprobe_instances.first = node->next;
- node->next = NULL;
+unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
+ void *trampoline_address,
+ void *frame_pointer)
+{
+ kprobe_opcode_t *correct_ret_addr = NULL;
+ struct kretprobe_instance *ri = NULL;
+ struct llist_node *first, *node = NULL;
+ struct kretprobe *rp;
- /* Run them.. */
+ /* Find correct address and all nodes for this frame. */
+ correct_ret_addr = (void*)kretprobe_real_stack_tsk(current,
+ (unsigned long)&kretprobe_trampoline, &node);
+ if (!correct_ret_addr) {
+ pr_err("Oops! Kretprobe fails to find correct return address.\n");
+ BUG_ON(1);
+ }
+
+ /* Run them. */
+ first = current->kretprobe_instances.first;
while (first) {
ri = container_of(first, struct kretprobe_instance, llist);
- first = first->next;
+
+ BUG_ON(ri->fp != frame_pointer);
rp = get_kretprobe(ri);
if (rp && rp->handler) {
@@ -1913,6 +1923,21 @@ unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
rp->handler(ri, regs);
__this_cpu_write(current_kprobe, prev);
}
+ if (first == node)
+ break;
+
+ first = first->next;
+ }
+
+ /* Unlink all nodes for this frame. */
+ first = current->kretprobe_instances.first;
+ current->kretprobe_instances.first = node->next;
+ node->next = NULL;
+
+ /* Recycle them. */
+ while (first) {
+ ri = container_of(first, struct kretprobe_instance, llist);
+ first = first->next;
recycle_rp_inst(ri);
}
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index 9f8117c7cfdd..416f357e64b8 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -13,6 +13,7 @@
#include <linux/export.h>
#include <linux/kallsyms.h>
#include <linux/stacktrace.h>
+#include <linux/kprobes.h>
/**
* stack_trace_print - Print the entries in the stack trace
@@ -76,6 +77,10 @@ struct stacktrace_cookie {
unsigned int size;
unsigned int skip;
unsigned int len;
+#ifdef CONFIG_KRETPROBES
+ struct llist_node *cur;
+ struct task_struct *tsk;
+#endif
};
static bool stack_trace_consume_entry(void *cookie, unsigned long addr)
@@ -89,6 +94,7 @@ static bool stack_trace_consume_entry(void *cookie, unsigned long addr)
c->skip--;
return true;
}
+ addr = kretprobe_real_stack_tsk(c->tsk, addr, &c->cur);
c->store[c->len++] = addr;
return c->len < c->size;
}
@@ -116,6 +122,10 @@ unsigned int stack_trace_save(unsigned long *store, unsigned int size,
.store = store,
.size = size,
.skip = skipnr + 1,
+#ifdef CONFIG_KRETPROBES
+ .cur = NULL,
+ .tsk = current,
+#endif
};
arch_stack_walk(consume_entry, &c, current, NULL);
@@ -141,6 +151,10 @@ unsigned int stack_trace_save_tsk(struct task_struct *tsk, unsigned long *store,
.size = size,
/* skip this function if they are tracing us */
.skip = skipnr + (current == tsk),
+#ifdef CONFIG_KRETPROBES
+ .cur = NULL,
+ .tsk = tsk,
+#endif
};
if (!try_get_task_stack(tsk))
@@ -168,6 +182,10 @@ unsigned int stack_trace_save_regs(struct pt_regs *regs, unsigned long *store,
.store = store,
.size = size,
.skip = skipnr,
+#ifdef CONFIG_KRETPROBES
+ .cur = NULL,
+ .tsk = current,
+#endif
};
arch_stack_walk(consume_entry, &c, current, regs);
@@ -194,6 +212,10 @@ int stack_trace_save_tsk_reliable(struct task_struct *tsk, unsigned long *store,
struct stacktrace_cookie c = {
.store = store,
.size = size,
+#ifdef CONFIG_KRETPROBES
+ .cur = NULL,
+ .tsk = tsk,
+#endif
};
int ret;
@@ -224,6 +246,10 @@ unsigned int stack_trace_save_user(unsigned long *store, unsigned int size)
struct stacktrace_cookie c = {
.store = store,
.size = size,
+#ifdef CONFIG_KRETPROBES
+ .cur = NULL,
+ .tsk = current,
+#endif
};
mm_segment_t fs;
--
2.25.1
Thank you,
--
Masami Hiramatsu <mhiramat@...nel.org>
Powered by blists - more mailing lists