>From c538e33c6705c9284264a5035185edb55ace1786 Mon Sep 17 00:00:00 2001 Message-Id: From: Mao Han Date: Fri, 8 Mar 2019 13:21:33 +0800 Subject: [PATCH 1/3] csky: Add perf callchin support This patch add support for perf callchain sampling on csky platform. Both fp and dwarf unwinding are supported with this patch. When fp is used to unwind the stack, the program being sampled and the C library need to be compiled with -mbacktrace for user callchains, kernel callchains require CONFIG_STACKTRACE = y. Unwinding with dwarf requires compilation with -fexceptions, otherwise there will be not debug information inside the excutable file. Signed-off-by: Mao Han --- arch/csky/Kconfig | 2 + arch/csky/include/uapi/asm/perf_regs.h | 48 ++++++++++++ arch/csky/kernel/Makefile | 2 + arch/csky/kernel/perf_callchain.c | 133 +++++++++++++++++++++++++++++++++ arch/csky/kernel/perf_regs.c | 41 ++++++++++ 5 files changed, 226 insertions(+) create mode 100644 arch/csky/include/uapi/asm/perf_regs.h create mode 100644 arch/csky/kernel/perf_callchain.c create mode 100644 arch/csky/kernel/perf_regs.c diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 398113c..93b535d 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -35,6 +35,8 @@ config CSKY select HAVE_KERNEL_LZO select HAVE_KERNEL_LZMA select HAVE_PERF_EVENTS + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP select HAVE_C_RECORDMCOUNT select HAVE_DMA_API_DEBUG select HAVE_DMA_CONTIGUOUS diff --git a/arch/csky/include/uapi/asm/perf_regs.h b/arch/csky/include/uapi/asm/perf_regs.h new file mode 100644 index 0000000..337d8fa --- /dev/null +++ b/arch/csky/include/uapi/asm/perf_regs.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd. + +#ifndef _ASM_CSKY_PERF_REGS_H +#define _ASM_CSKY_PERF_REGS_H + +enum perf_event_csky_regs { + PERF_REG_CSKY_TLS, + PERF_REG_CSKY_LR, + PERF_REG_CSKY_PC, + PERF_REG_CSKY_SR, + PERF_REG_CSKY_SP, + PERF_REG_CSKY_ORIG_A0, + PERF_REG_CSKY_R0, + PERF_REG_CSKY_R1, + PERF_REG_CSKY_R2, + PERF_REG_CSKY_R3, + PERF_REG_CSKY_R4, + PERF_REG_CSKY_R5, + PERF_REG_CSKY_R6, + PERF_REG_CSKY_R7, + PERF_REG_CSKY_R8, + PERF_REG_CSKY_R9, + PERF_REG_CSKY_R10, + PERF_REG_CSKY_R11, + PERF_REG_CSKY_R12, + PERF_REG_CSKY_R13, + PERF_REG_CSKY_R16, + PERF_REG_CSKY_R17, + PERF_REG_CSKY_R18, + PERF_REG_CSKY_R19, + PERF_REG_CSKY_R20, + PERF_REG_CSKY_R21, + PERF_REG_CSKY_R22, + PERF_REG_CSKY_R23, + PERF_REG_CSKY_R24, + PERF_REG_CSKY_R25, + PERF_REG_CSKY_R26, + PERF_REG_CSKY_R27, + PERF_REG_CSKY_R28, + PERF_REG_CSKY_R29, + PERF_REG_CSKY_R30, + PERF_REG_CSKY_HI, + PERF_REG_CSKY_LO, + PERF_REG_CSKY_DCSR, + PERF_REG_CSKY_MAX, +}; +#endif /* _ASM_CSKY_PERF_REGS_H */ diff --git a/arch/csky/kernel/Makefile b/arch/csky/kernel/Makefile index 484e6d3..3549d0d 100644 --- a/arch/csky/kernel/Makefile +++ b/arch/csky/kernel/Makefile @@ -9,6 +9,8 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_CSKY_PMU_V1) += perf_event.o +obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o +obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) diff --git a/arch/csky/kernel/perf_callchain.c b/arch/csky/kernel/perf_callchain.c new file mode 100644 index 0000000..0ed8279 --- /dev/null +++ b/arch/csky/kernel/perf_callchain.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd. + +#include +#include + +/* Kernel callchain */ +struct stackframe { + unsigned long fp; + unsigned long lr; +}; + +static int +unwind_frame_kernel(struct stackframe *frame) +{ + int graph = 0; + + /* 0x3 means misalignment */ + if (!kstack_end((void *)frame->fp) && + !((unsigned long)frame->fp & 0x3) && + ((unsigned long)frame->fp >= TASK_SIZE)) { + frame->lr = ((struct stackframe *)frame->fp)->lr; + frame->fp = ((struct stackframe *)frame->fp)->fp; + /* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */ + if (__kernel_text_address(frame->lr)) + frame->lr = ftrace_graph_ret_addr + (NULL, &graph, frame->lr, NULL); + return 0; + } else { + return -EPERM; + } +} + +static void notrace +walk_stackframe(struct stackframe *fr, + struct perf_callchain_entry_ctx *entry) +{ + while (1) { + int ret; + + perf_callchain_store(entry, fr->lr); + + ret = unwind_frame_kernel(fr); + if (ret < 0) + break; + } +} + +/* + * Get the return address for a single stackframe and return a pointer to the + * next frame tail. + */ +static unsigned long +user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp, + unsigned long reg_lr) +{ + struct stackframe buftail; + unsigned long lr = 0; + unsigned long *user_frame_tail = (unsigned long *)fp; + + /* Check accessibility of one struct frame_tail beyond */ + if (!access_ok(user_frame_tail, sizeof(buftail))) + return 0; + if (__copy_from_user_inatomic + (&buftail, user_frame_tail, sizeof(buftail))) + return 0; + + if (reg_lr != 0) { + lr = reg_lr; + } else { + lr = buftail.lr; + } + fp = buftail.fp; + perf_callchain_store(entry, lr); + return fp; +} + +/* + * This will be called when the target is in user mode + * This function will only be called when we use + * "PERF_SAMPLE_CALLCHAIN" in + * kernel/events/core.c:perf_prepare_sample() + * + * How to trigger perf_callchain_[user/kernel] : + * $ perf record -e cpu-clock --call-graph fp ./program + * $ perf report --call-graph + * + * On C-SKY platform, the program being sampled and the C library + * need to be compiled with * -mbacktrace, otherwise the user + * stack will not contain function frame. + */ +void +perf_callchain_user(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + unsigned long fp = 0; + + /* C-SKY does not support virtualization. */ + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + return; + } + + fp = regs->regs[4]; + perf_callchain_store(entry, regs->pc); + /* + * While backtrace from leaf function, lr is normally + * not saved inside frame on C-SKY, so get lr from pt_regs + * at the sample point. However, lr value can be incorrect if + * lr is used as temp register + */ + fp = user_backtrace(entry, fp, regs->lr); + + while ((entry->nr < entry->max_stack) && + fp && !((unsigned long)fp & 0x3)) + fp = user_backtrace(entry, fp, 0); +} + +void +perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + struct stackframe fr; + + /* C-SKY does not support virtualization. */ + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + pr_warn("C-SKY does not support perf in guest mode!"); + return; + } + + fr.fp = regs->regs[4]; + fr.lr = regs->lr; + walk_stackframe(&fr, entry); +} diff --git a/arch/csky/kernel/perf_regs.c b/arch/csky/kernel/perf_regs.c new file mode 100644 index 0000000..55fa389 --- /dev/null +++ b/arch/csky/kernel/perf_regs.c @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd. + +#include +#include +#include +#include +#include +#include +#include + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + if (WARN_ON_ONCE((u32)idx >= PERF_REG_CSKY_MAX)) + return 0; + + return ((long *)regs)[idx]; +} + +#define REG_RESERVED (~((1ULL << PERF_REG_CSKY_MAX) - 1)) + +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + return PERF_SAMPLE_REGS_ABI_32; +} + +void perf_get_regs_user(struct perf_regs *regs_user, + struct pt_regs *regs, + struct pt_regs *regs_user_copy) +{ + regs_user->regs = task_pt_regs(current); + regs_user->abi = perf_reg_abi(current); +} -- 2.7.4