[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAAhV-H41XxvVhtkL3WhHH_iFVWu1E28dPNZAbNCOgtrzfrZmpQ@mail.gmail.com>
Date: Tue, 15 Nov 2022 18:54:53 +0800
From: Huacai Chen <chenhuacai@...nel.org>
To: Qing Zhang <zhangqing@...ngson.cn>
Cc: Steven Rostedt <rostedt@...dmis.org>,
Ingo Molnar <mingo@...hat.com>, loongarch@...ts.linux.dev,
linux-kernel@...r.kernel.org, Jinyang He <hejinyang@...ngson.cn>
Subject: Re: [PATCH v6 3/9] LoongArch/ftrace: Add dynamic function tracer support
Hi, Qing,
On Tue, Nov 15, 2022 at 10:55 AM Qing Zhang <zhangqing@...ngson.cn> wrote:
>
> The compiler has inserted 2 NOPs before the regular function prologue.
> T series registers are available and safe because of LoongArch psABI.
>
> At runtime, replace nop with bl to enable ftrace call and replace bl with
> nop to disable ftrace call. The bl requires us to save the original RA
> value, so here it saves RA at t0.
> details are:
>
> | Compiled | Disabled | Enabled |
> +------------+------------------------+------------------------+
> | nop | move t0, ra | move t0, ra |
> | nop | nop | bl ftrace_caller |
> | func_body | func_body | func_body |
>
> The RA value will be recovered by ftrace_regs_entry, and restored into RA
> before returning to the regular function prologue. When a function is not
> being traced, the move t0, ra is not harmful.
>
> 1) ftrace_make_call, ftrace_make_nop (in kernel/ftrace.c)
> The two functions turn each recorded call site of filtered functions
> into a call to ftrace_caller or nops.
>
> 2) ftracce_update_ftrace_func (in kernel/ftrace.c)
> turns the nops at ftrace_call into a call to a generic entry for
> function tracers.
>
> 3) ftrace_caller (in kernel/mcount-dyn.S)
> The entry where each _mcount call sites calls to once they are
> filtered to be traced.
>
> Co-developed-by: Jinyang He <hejinyang@...ngson.cn>
> Signed-off-by: Jinyang He <hejinyang@...ngson.cn>
> Signed-off-by: Qing Zhang <zhangqing@...ngson.cn>
> ---
> arch/loongarch/Kconfig | 1 +
> arch/loongarch/include/asm/ftrace.h | 16 ++++
> arch/loongarch/include/asm/inst.h | 15 ++++
> arch/loongarch/include/asm/unwind.h | 2 +-
> arch/loongarch/kernel/Makefile | 5 ++
> arch/loongarch/kernel/ftrace_dyn.c | 111 ++++++++++++++++++++++++
> arch/loongarch/kernel/inst.c | 92 ++++++++++++++++++++
> arch/loongarch/kernel/mcount-dyn.S | 89 +++++++++++++++++++
> arch/loongarch/kernel/unwind_prologue.c | 35 ++++++--
> 9 files changed, 360 insertions(+), 6 deletions(-)
> create mode 100644 arch/loongarch/kernel/ftrace_dyn.c
> create mode 100644 arch/loongarch/kernel/mcount-dyn.S
>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index e6b1defca1f1..615ce62422b8 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -89,6 +89,7 @@ config LOONGARCH
> select HAVE_C_RECORDMCOUNT
> select HAVE_DEBUG_STACKOVERFLOW
> select HAVE_DMA_CONTIGUOUS
> + select HAVE_DYNAMIC_FTRACE
> select HAVE_EBPF_JIT
> select HAVE_EXIT_THREAD
> select HAVE_FAST_GUP
> diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h
> index 6a3e76234618..76ca58767f4d 100644
> --- a/arch/loongarch/include/asm/ftrace.h
> +++ b/arch/loongarch/include/asm/ftrace.h
> @@ -10,9 +10,25 @@
> #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
>
> #ifndef __ASSEMBLY__
> +#ifndef CONFIG_DYNAMIC_FTRACE
> extern void _mcount(void);
> #define mcount _mcount
> +#endif
>
> +#ifdef CONFIG_DYNAMIC_FTRACE
> +static inline unsigned long ftrace_call_adjust(unsigned long addr)
> +{
> + return addr;
> +}
> +
> +struct dyn_arch_ftrace {
> +};
> +
> +struct dyn_ftrace;
> +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
> +#define ftrace_init_nop ftrace_init_nop
> +
> +#endif /* CONFIG_DYNAMIC_FTRACE */
> #endif /* __ASSEMBLY__ */
> #endif /* CONFIG_FUNCTION_TRACER */
> #endif /* _ASM_LOONGARCH_FTRACE_H */
> diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
> index a52913787183..0ec775d39ca5 100644
> --- a/arch/loongarch/include/asm/inst.h
> +++ b/arch/loongarch/include/asm/inst.h
> @@ -11,6 +11,9 @@
> #define INSN_NOP 0x03400000
> #define INSN_BREAK 0x002a0000
>
> +#define INSN_NOP 0x03400000
> +#define INSN_BREAK 0x002a0000
They are already defined.
Huacai
> +
> #define ADDR_IMMMASK_LU52ID 0xFFF0000000000000
> #define ADDR_IMMMASK_LU32ID 0x000FFFFF00000000
> #define ADDR_IMMMASK_ADDU16ID 0x00000000FFFF0000
> @@ -349,6 +352,18 @@ static inline bool is_stack_alloc_ins(union loongarch_instruction *ip)
> is_imm12_negative(ip->reg2i12_format.immediate);
> }
>
> +int larch_insn_read(void *addr, u32 *insnp);
> +int larch_insn_write(void *addr, u32 insn);
> +int larch_insn_patch_text(void *addr, u32 insn);
> +
> +u32 larch_insn_gen_nop(void);
> +u32 larch_insn_gen_b(unsigned long pc, unsigned long dest);
> +u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest);
> +
> +u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj,
> + enum loongarch_gpr rk);
> +u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj);
> +
> u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm);
> u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm);
> u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, unsigned long pc, unsigned long dest);
> diff --git a/arch/loongarch/include/asm/unwind.h b/arch/loongarch/include/asm/unwind.h
> index 6af4718bdf01..a51eec00efb8 100644
> --- a/arch/loongarch/include/asm/unwind.h
> +++ b/arch/loongarch/include/asm/unwind.h
> @@ -20,7 +20,7 @@ struct unwind_state {
> char type; /* UNWINDER_XXX */
> struct stack_info stack_info;
> struct task_struct *task;
> - bool first, error;
> + bool first, error, is_ftrace;
> unsigned long sp, pc, ra;
> };
>
> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> index 3f71bce1c7ce..c5e2bfd8247d 100644
> --- a/arch/loongarch/kernel/Makefile
> +++ b/arch/loongarch/kernel/Makefile
> @@ -16,8 +16,13 @@ obj-$(CONFIG_EFI) += efi.o
> obj-$(CONFIG_CPU_HAS_FPU) += fpu.o
>
> ifdef CONFIG_FUNCTION_TRACER
> +ifndef CONFIG_DYNAMIC_FTRACE
> obj-y += mcount.o ftrace.o
> CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
> +else
> +obj-y += mcount-dyn.o ftrace_dyn.o
> +CFLAGS_REMOVE_ftrace_dyn.o = $(CC_FLAGS_FTRACE)
> +endif
> CFLAGS_REMOVE_inst.o = $(CC_FLAGS_FTRACE)
> CFLAGS_REMOVE_time.o = $(CC_FLAGS_FTRACE)
> CFLAGS_REMOVE_perf_event.o = $(CC_FLAGS_FTRACE)
> diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c
> new file mode 100644
> index 000000000000..1f8955be8b64
> --- /dev/null
> +++ b/arch/loongarch/kernel/ftrace_dyn.c
> @@ -0,0 +1,111 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Based on arch/arm64/kernel/ftrace.c
> + *
> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> + */
> +
> +#include <linux/ftrace.h>
> +#include <linux/uaccess.h>
> +
> +#include <asm/inst.h>
> +
> +static int ftrace_modify_code(unsigned long pc, u32 old, u32 new,
> + bool validate)
> +{
> + u32 replaced;
> +
> + if (validate) {
> + if (larch_insn_read((void *)pc, &replaced))
> + return -EFAULT;
> +
> + if (replaced != old)
> + return -EINVAL;
> + }
> +
> + if (larch_insn_patch_text((void *)pc, new))
> + return -EPERM;
> +
> + return 0;
> +}
> +
> +int ftrace_update_ftrace_func(ftrace_func_t func)
> +{
> + unsigned long pc;
> + u32 new;
> +
> + pc = (unsigned long)&ftrace_call;
> + new = larch_insn_gen_bl(pc, (unsigned long)func);
> +
> + return ftrace_modify_code(pc, 0, new, false);
> +}
> +
> +/*
> + * The compiler has inserted 2 NOPs before the regular function prologue.
> + * T series registers are available and safe because of LoongArch psABI.
> + *
> + * At runtime, replace nop with bl to enable ftrace call and replace bl with
> + * nop to disable ftrace call. The bl requires us to save the original RA value,
> + * so here it saves RA at t0.
> + * details are:
> + *
> + * | Compiled | Disabled | Enabled |
> + * +------------+------------------------+------------------------+
> + * | nop | move t0, ra | move t0, ra |
> + * | nop | nop | bl ftrace_caller |
> + * | func_body | func_body | func_body |
> + *
> + * The RA value will be recovered by ftrace_regs_entry, and restored into RA
> + * before returning to the regular function prologue. When a function is not
> + * being traced, the move t0, ra is not harmful.
> + */
> +
> +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
> +{
> + unsigned long pc;
> + u32 old, new;
> +
> + pc = rec->ip;
> + old = larch_insn_gen_nop();
> + new = larch_insn_gen_move(LOONGARCH_GPR_T0, LOONGARCH_GPR_RA);
> +
> + return ftrace_modify_code(pc, old, new, true);
> +}
> +
> +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
> +{
> + unsigned long pc;
> + u32 old, new;
> +
> + pc = rec->ip + LOONGARCH_INSN_SIZE;
> +
> + old = larch_insn_gen_nop();
> + new = larch_insn_gen_bl(pc, addr);
> +
> + return ftrace_modify_code(pc, old, new, true);
> +}
> +
> +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
> + unsigned long addr)
> +{
> + unsigned long pc;
> + u32 old, new;
> +
> + pc = rec->ip + LOONGARCH_INSN_SIZE;
> +
> + new = larch_insn_gen_nop();
> + old = larch_insn_gen_bl(pc, addr);
> +
> + return ftrace_modify_code(pc, old, new, true);
> +}
> +
> +void arch_ftrace_update_code(int command)
> +{
> + command |= FTRACE_MAY_SLEEP;
> + ftrace_modify_all_code(command);
> +}
> +
> +int __init ftrace_dyn_arch_init(void)
> +{
> + return 0;
> +}
> diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c
> index b1df0ec34bd1..d62cdf4a9ffb 100644
> --- a/arch/loongarch/kernel/inst.c
> +++ b/arch/loongarch/kernel/inst.c
> @@ -2,8 +2,83 @@
> /*
> * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
> */
> +#include <linux/sizes.h>
> +#include <linux/uaccess.h>
> +
> +#include <asm/cacheflush.h>
> #include <asm/inst.h>
>
> +static DEFINE_RAW_SPINLOCK(patch_lock);
> +
> +int larch_insn_read(void *addr, u32 *insnp)
> +{
> + int ret;
> + u32 val;
> +
> + ret = copy_from_kernel_nofault(&val, addr, LOONGARCH_INSN_SIZE);
> + if (!ret)
> + *insnp = val;
> +
> + return ret;
> +}
> +
> +int larch_insn_write(void *addr, u32 insn)
> +{
> + int ret;
> + unsigned long flags = 0;
> +
> + raw_spin_lock_irqsave(&patch_lock, flags);
> + ret = copy_to_kernel_nofault(addr, &insn, LOONGARCH_INSN_SIZE);
> + raw_spin_unlock_irqrestore(&patch_lock, flags);
> +
> + return ret;
> +}
> +
> +int larch_insn_patch_text(void *addr, u32 insn)
> +{
> + int ret;
> + u32 *tp = addr;
> +
> + if ((unsigned long)tp & 3)
> + return -EINVAL;
> +
> + ret = larch_insn_write(tp, insn);
> + if (!ret)
> + flush_icache_range((unsigned long)tp,
> + (unsigned long)tp + LOONGARCH_INSN_SIZE);
> +
> + return ret;
> +}
> +
> +u32 larch_insn_gen_nop(void)
> +{
> + return INSN_NOP;
> +}
> +
> +u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest)
> +{
> + unsigned int immediate_l, immediate_h;
> + union loongarch_instruction insn;
> + long offset = dest - pc;
> +
> + if ((offset & 3) || offset < -SZ_128M || offset >= SZ_128M) {
> + pr_warn("The generated bl instruction is out of range.\n");
> + return INSN_BREAK;
> + }
> +
> + offset >>= 2;
> +
> + immediate_l = offset & 0xffff;
> + offset >>= 16;
> + immediate_h = offset & 0x3ff;
> +
> + insn.reg0i26_format.opcode = bl_op;
> + insn.reg0i26_format.immediate_l = immediate_l;
> + insn.reg0i26_format.immediate_h = immediate_h;
> +
> + return insn.word;
> +}
> +
> u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm)
> {
> union loongarch_instruction insn;
> @@ -38,3 +113,20 @@ u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, unsigned l
>
> return insn.word;
> }
> +
> +u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj, enum loongarch_gpr rk)
> +{
> + union loongarch_instruction insn;
> +
> + insn.reg3_format.opcode = or_op;
> + insn.reg3_format.rd = rd;
> + insn.reg3_format.rj = rj;
> + insn.reg3_format.rk = rk;
> +
> + return insn.word;
> +}
> +
> +u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj)
> +{
> + return larch_insn_gen_or(rd, rj, 0);
> +}
> diff --git a/arch/loongarch/kernel/mcount-dyn.S b/arch/loongarch/kernel/mcount-dyn.S
> new file mode 100644
> index 000000000000..205925bc3822
> --- /dev/null
> +++ b/arch/loongarch/kernel/mcount-dyn.S
> @@ -0,0 +1,89 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> + */
> +
> +#include <asm/export.h>
> +#include <asm/regdef.h>
> +#include <asm/stackframe.h>
> +#include <asm/ftrace.h>
> +
> + .text
> +/*
> + * Due to -fpatchable-function-entry=2: the compiler inserted 2 NOPs before the
> + * regular C function prologue. When PC arrived here, the last 2 instructions
> + * as follows,
> + * move t0, ra
> + * bl callsite (for modules, callsite is a tramplione)
> + *
> + * modules tramplione as follows,
> + * lu12i.w t1, callsite[31:12]
> + * lu32i.d t1, callsite[51:32]
> + * lu52i.d t1, t1, callsite[63:52]
> + * jirl zero, t1, callsite[11:0] >> 2
> + *
> + * See arch/loongarch/kernel/ftrace_dyn.c for details. Here, pay attention to
> + * that the T series regs are available and safe because each C functions
> + * follows the LoongArch psABI well.
> + */
> +
> + .macro ftrace_regs_entry
> + PTR_ADDI sp, sp, -PT_SIZE
> + /* Save trace function ra at PT_ERA */
> + PTR_S ra, sp, PT_ERA
> + /* Save parent ra at PT_R1(RA) */
> + PTR_S t0, sp, PT_R1
> + PTR_S a0, sp, PT_R4
> + PTR_S a1, sp, PT_R5
> + PTR_S a2, sp, PT_R6
> + PTR_S a3, sp, PT_R7
> + PTR_S a4, sp, PT_R8
> + PTR_S a5, sp, PT_R9
> + PTR_S a6, sp, PT_R10
> + PTR_S a7, sp, PT_R11
> + PTR_S fp, sp, PT_R22
> +
> + PTR_ADDI t8, sp, PT_SIZE
> + PTR_S t8, sp, PT_R3
> +
> + .endm
> +
> +SYM_CODE_START(ftrace_caller)
> + ftrace_regs_entry
> + b ftrace_common
> +SYM_CODE_END(ftrace_caller)
> +
> +SYM_CODE_START(ftrace_common)
> + PTR_ADDI a0, ra, -8 /* arg0: ip */
> + move a1, t0 /* arg1: parent_ip */
> + la.pcrel t1, function_trace_op
> + PTR_L a2, t1, 0 /* arg2: op */
> + move a3, sp /* arg3: regs */
> + .globl ftrace_call
> +ftrace_call:
> + bl ftrace_stub
> +/*
> + * As we didn't use S series regs in this assmembly code and all calls
> + * are C function which will save S series regs by themselves, there is
> + * no need to restore S series regs. The T series is available and safe
> + * at the callsite, so there is no need to restore the T series regs.
> + */
> +ftrace_common_return:
> + PTR_L a0, sp, PT_R4
> + PTR_L a1, sp, PT_R5
> + PTR_L a2, sp, PT_R6
> + PTR_L a3, sp, PT_R7
> + PTR_L a4, sp, PT_R8
> + PTR_L a5, sp, PT_R9
> + PTR_L a6, sp, PT_R10
> + PTR_L a7, sp, PT_R11
> + PTR_L fp, sp, PT_R22
> + PTR_L ra, sp, PT_R1
> + PTR_L t0, sp, PT_ERA
> + PTR_ADDI sp, sp, PT_SIZE
> + jr t0
> +SYM_CODE_END(ftrace_common)
> +
> +SYM_FUNC_START(ftrace_stub)
> + jr ra
> +SYM_FUNC_END(ftrace_stub)
> diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c
> index b206d9159205..c5df4ae73e0d 100644
> --- a/arch/loongarch/kernel/unwind_prologue.c
> +++ b/arch/loongarch/kernel/unwind_prologue.c
> @@ -13,9 +13,7 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
>
> if (unwind_done(state))
> return 0;
> - else if (state->type)
> - return state->pc;
> - else if (state->first)
> + else if (state->type || state->first)
> return state->pc;
>
> return *(unsigned long *)(state->sp);
> @@ -39,16 +37,41 @@ static bool unwind_by_guess(struct unwind_state *state)
> return false;
> }
>
> +static inline void unwind_state_fixup(struct unwind_state *state)
> +{
> +#ifdef CONFIG_DYNAMIC_FTRACE
> + static unsigned long ftrace_case = (unsigned long)ftrace_call + 4;
> +
> + if (state->pc == ftrace_case)
> + state->is_ftrace = true;
> +#endif
> +}
> +
> static bool unwind_by_prologue(struct unwind_state *state)
> {
> struct stack_info *info = &state->stack_info;
> union loongarch_instruction *ip, *ip_end;
> unsigned long frame_size = 0, frame_ra = -1;
> unsigned long size, offset, pc = state->pc;
> + struct pt_regs *regs;
>
> if (state->sp >= info->end || state->sp < info->begin)
> return false;
>
> + if (state->is_ftrace) {
> + /*
> + * As we meet ftrace_regs_entry, reset first flag like first doing
> + * tracing, Prologue analysis will stop soon because PC is at entry.
> + */
> + regs = (struct pt_regs *)state->sp;
> + state->pc = regs->csr_era;
> + state->ra = regs->regs[1];
> + state->sp = regs->regs[3];
> + state->first = true;
> + state->is_ftrace = false;
> + return true;
> + }
> +
> if (!kallsyms_lookup_size_offset(pc, &size, &offset))
> return false;
>
> @@ -94,7 +117,7 @@ static bool unwind_by_prologue(struct unwind_state *state)
>
> state->pc = *(unsigned long *)(state->sp + frame_ra);
> state->sp = state->sp + frame_size;
> - return !!__kernel_text_address(state->pc);
> + goto out;
>
> first:
> state->first = false;
> @@ -103,7 +126,9 @@ static bool unwind_by_prologue(struct unwind_state *state)
>
> state->pc = state->ra;
>
> - return !!__kernel_text_address(state->ra);
> +out:
> + unwind_state_fixup(state);
> + return !!__kernel_text_address(state->pc);
> }
>
> void unwind_start(struct unwind_state *state, struct task_struct *task,
> --
> 2.36.0
>
>
Powered by blists - more mailing lists