lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAAhV-H41XxvVhtkL3WhHH_iFVWu1E28dPNZAbNCOgtrzfrZmpQ@mail.gmail.com>
Date:   Tue, 15 Nov 2022 18:54:53 +0800
From:   Huacai Chen <chenhuacai@...nel.org>
To:     Qing Zhang <zhangqing@...ngson.cn>
Cc:     Steven Rostedt <rostedt@...dmis.org>,
        Ingo Molnar <mingo@...hat.com>, loongarch@...ts.linux.dev,
        linux-kernel@...r.kernel.org, Jinyang He <hejinyang@...ngson.cn>
Subject: Re: [PATCH v6 3/9] LoongArch/ftrace: Add dynamic function tracer support

Hi, Qing,

On Tue, Nov 15, 2022 at 10:55 AM Qing Zhang <zhangqing@...ngson.cn> wrote:
>
> The compiler has inserted 2 NOPs before the regular function prologue.
> T series registers are available and safe because of LoongArch psABI.
>
> At runtime, replace nop with bl to enable ftrace call and replace bl with
> nop to disable ftrace call. The bl requires us to save the original RA
> value, so here it saves RA at t0.
> details are:
>
> | Compiled   |       Disabled         |        Enabled         |
> +------------+------------------------+------------------------+
> | nop        | move     t0, ra        | move     t0, ra        |
> | nop        | nop                    | bl      ftrace_caller  |
> | func_body  | func_body              | func_body              |
>
> The RA value will be recovered by ftrace_regs_entry, and restored into RA
> before returning to the regular function prologue. When a function is not
> being traced, the move t0, ra is not harmful.
>
> 1) ftrace_make_call, ftrace_make_nop (in kernel/ftrace.c)
>    The two functions turn each recorded call site of filtered functions
>    into a call to ftrace_caller or nops.
>
> 2) ftracce_update_ftrace_func (in kernel/ftrace.c)
>    turns the nops at ftrace_call into a call to a generic entry for
>    function tracers.
>
> 3) ftrace_caller (in kernel/mcount-dyn.S)
>    The entry where each _mcount call sites calls to once they are
>    filtered to be traced.
>
> Co-developed-by: Jinyang He <hejinyang@...ngson.cn>
> Signed-off-by: Jinyang He <hejinyang@...ngson.cn>
> Signed-off-by: Qing Zhang <zhangqing@...ngson.cn>
> ---
>  arch/loongarch/Kconfig                  |   1 +
>  arch/loongarch/include/asm/ftrace.h     |  16 ++++
>  arch/loongarch/include/asm/inst.h       |  15 ++++
>  arch/loongarch/include/asm/unwind.h     |   2 +-
>  arch/loongarch/kernel/Makefile          |   5 ++
>  arch/loongarch/kernel/ftrace_dyn.c      | 111 ++++++++++++++++++++++++
>  arch/loongarch/kernel/inst.c            |  92 ++++++++++++++++++++
>  arch/loongarch/kernel/mcount-dyn.S      |  89 +++++++++++++++++++
>  arch/loongarch/kernel/unwind_prologue.c |  35 ++++++--
>  9 files changed, 360 insertions(+), 6 deletions(-)
>  create mode 100644 arch/loongarch/kernel/ftrace_dyn.c
>  create mode 100644 arch/loongarch/kernel/mcount-dyn.S
>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index e6b1defca1f1..615ce62422b8 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -89,6 +89,7 @@ config LOONGARCH
>         select HAVE_C_RECORDMCOUNT
>         select HAVE_DEBUG_STACKOVERFLOW
>         select HAVE_DMA_CONTIGUOUS
> +       select HAVE_DYNAMIC_FTRACE
>         select HAVE_EBPF_JIT
>         select HAVE_EXIT_THREAD
>         select HAVE_FAST_GUP
> diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h
> index 6a3e76234618..76ca58767f4d 100644
> --- a/arch/loongarch/include/asm/ftrace.h
> +++ b/arch/loongarch/include/asm/ftrace.h
> @@ -10,9 +10,25 @@
>  #define MCOUNT_INSN_SIZE 4             /* sizeof mcount call */
>
>  #ifndef __ASSEMBLY__
> +#ifndef CONFIG_DYNAMIC_FTRACE
>  extern void _mcount(void);
>  #define mcount _mcount
> +#endif
>
> +#ifdef CONFIG_DYNAMIC_FTRACE
> +static inline unsigned long ftrace_call_adjust(unsigned long addr)
> +{
> +       return addr;
> +}
> +
> +struct dyn_arch_ftrace {
> +};
> +
> +struct dyn_ftrace;
> +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
> +#define ftrace_init_nop ftrace_init_nop
> +
> +#endif /* CONFIG_DYNAMIC_FTRACE */
>  #endif /* __ASSEMBLY__ */
>  #endif /* CONFIG_FUNCTION_TRACER */
>  #endif /* _ASM_LOONGARCH_FTRACE_H */
> diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
> index a52913787183..0ec775d39ca5 100644
> --- a/arch/loongarch/include/asm/inst.h
> +++ b/arch/loongarch/include/asm/inst.h
> @@ -11,6 +11,9 @@
>  #define INSN_NOP               0x03400000
>  #define INSN_BREAK             0x002a0000
>
> +#define INSN_NOP 0x03400000
> +#define INSN_BREAK 0x002a0000
They are already defined.

Huacai
> +
>  #define ADDR_IMMMASK_LU52ID    0xFFF0000000000000
>  #define ADDR_IMMMASK_LU32ID    0x000FFFFF00000000
>  #define ADDR_IMMMASK_ADDU16ID  0x00000000FFFF0000
> @@ -349,6 +352,18 @@ static inline bool is_stack_alloc_ins(union loongarch_instruction *ip)
>                 is_imm12_negative(ip->reg2i12_format.immediate);
>  }
>
> +int larch_insn_read(void *addr, u32 *insnp);
> +int larch_insn_write(void *addr, u32 insn);
> +int larch_insn_patch_text(void *addr, u32 insn);
> +
> +u32 larch_insn_gen_nop(void);
> +u32 larch_insn_gen_b(unsigned long pc, unsigned long dest);
> +u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest);
> +
> +u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj,
> +                       enum loongarch_gpr rk);
> +u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj);
> +
>  u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm);
>  u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm);
>  u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, unsigned long pc, unsigned long dest);
> diff --git a/arch/loongarch/include/asm/unwind.h b/arch/loongarch/include/asm/unwind.h
> index 6af4718bdf01..a51eec00efb8 100644
> --- a/arch/loongarch/include/asm/unwind.h
> +++ b/arch/loongarch/include/asm/unwind.h
> @@ -20,7 +20,7 @@ struct unwind_state {
>         char type; /* UNWINDER_XXX */
>         struct stack_info stack_info;
>         struct task_struct *task;
> -       bool first, error;
> +       bool first, error, is_ftrace;
>         unsigned long sp, pc, ra;
>  };
>
> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> index 3f71bce1c7ce..c5e2bfd8247d 100644
> --- a/arch/loongarch/kernel/Makefile
> +++ b/arch/loongarch/kernel/Makefile
> @@ -16,8 +16,13 @@ obj-$(CONFIG_EFI)            += efi.o
>  obj-$(CONFIG_CPU_HAS_FPU)      += fpu.o
>
>  ifdef CONFIG_FUNCTION_TRACER
> +ifndef CONFIG_DYNAMIC_FTRACE
>  obj-y += mcount.o ftrace.o
>  CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
> +else
> +obj-y += mcount-dyn.o ftrace_dyn.o
> +CFLAGS_REMOVE_ftrace_dyn.o = $(CC_FLAGS_FTRACE)
> +endif
>  CFLAGS_REMOVE_inst.o = $(CC_FLAGS_FTRACE)
>  CFLAGS_REMOVE_time.o = $(CC_FLAGS_FTRACE)
>  CFLAGS_REMOVE_perf_event.o = $(CC_FLAGS_FTRACE)
> diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c
> new file mode 100644
> index 000000000000..1f8955be8b64
> --- /dev/null
> +++ b/arch/loongarch/kernel/ftrace_dyn.c
> @@ -0,0 +1,111 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Based on arch/arm64/kernel/ftrace.c
> + *
> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> + */
> +
> +#include <linux/ftrace.h>
> +#include <linux/uaccess.h>
> +
> +#include <asm/inst.h>
> +
> +static int ftrace_modify_code(unsigned long pc, u32 old, u32 new,
> +                             bool validate)
> +{
> +       u32 replaced;
> +
> +       if (validate) {
> +               if (larch_insn_read((void *)pc, &replaced))
> +                       return -EFAULT;
> +
> +               if (replaced != old)
> +                       return -EINVAL;
> +       }
> +
> +       if (larch_insn_patch_text((void *)pc, new))
> +               return -EPERM;
> +
> +       return 0;
> +}
> +
> +int ftrace_update_ftrace_func(ftrace_func_t func)
> +{
> +       unsigned long pc;
> +       u32 new;
> +
> +       pc = (unsigned long)&ftrace_call;
> +       new = larch_insn_gen_bl(pc, (unsigned long)func);
> +
> +       return ftrace_modify_code(pc, 0, new, false);
> +}
> +
> +/*
> + * The compiler has inserted 2 NOPs before the regular function prologue.
> + * T series registers are available and safe because of LoongArch psABI.
> + *
> + * At runtime, replace nop with bl to enable ftrace call and replace bl with
> + * nop to disable ftrace call. The bl requires us to save the original RA value,
> + * so here it saves RA at t0.
> + * details are:
> + *
> + * | Compiled   |       Disabled         |        Enabled         |
> + * +------------+------------------------+------------------------+
> + * | nop        | move     t0, ra        | move     t0, ra        |
> + * | nop        | nop                    | bl      ftrace_caller  |
> + * | func_body  | func_body              | func_body              |
> + *
> + * The RA value will be recovered by ftrace_regs_entry, and restored into RA
> + * before returning to the regular function prologue. When a function is not
> + * being traced, the move t0, ra is not harmful.
> + */
> +
> +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
> +{
> +       unsigned long pc;
> +       u32 old, new;
> +
> +       pc = rec->ip;
> +       old = larch_insn_gen_nop();
> +       new = larch_insn_gen_move(LOONGARCH_GPR_T0, LOONGARCH_GPR_RA);
> +
> +       return ftrace_modify_code(pc, old, new, true);
> +}
> +
> +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
> +{
> +       unsigned long pc;
> +       u32 old, new;
> +
> +       pc = rec->ip + LOONGARCH_INSN_SIZE;
> +
> +       old = larch_insn_gen_nop();
> +       new = larch_insn_gen_bl(pc, addr);
> +
> +       return ftrace_modify_code(pc, old, new, true);
> +}
> +
> +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
> +                   unsigned long addr)
> +{
> +       unsigned long pc;
> +       u32 old, new;
> +
> +       pc = rec->ip + LOONGARCH_INSN_SIZE;
> +
> +       new = larch_insn_gen_nop();
> +       old = larch_insn_gen_bl(pc, addr);
> +
> +       return ftrace_modify_code(pc, old, new, true);
> +}
> +
> +void arch_ftrace_update_code(int command)
> +{
> +       command |= FTRACE_MAY_SLEEP;
> +       ftrace_modify_all_code(command);
> +}
> +
> +int __init ftrace_dyn_arch_init(void)
> +{
> +       return 0;
> +}
> diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c
> index b1df0ec34bd1..d62cdf4a9ffb 100644
> --- a/arch/loongarch/kernel/inst.c
> +++ b/arch/loongarch/kernel/inst.c
> @@ -2,8 +2,83 @@
>  /*
>   * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
>   */
> +#include <linux/sizes.h>
> +#include <linux/uaccess.h>
> +
> +#include <asm/cacheflush.h>
>  #include <asm/inst.h>
>
> +static DEFINE_RAW_SPINLOCK(patch_lock);
> +
> +int larch_insn_read(void *addr, u32 *insnp)
> +{
> +       int ret;
> +       u32 val;
> +
> +       ret = copy_from_kernel_nofault(&val, addr, LOONGARCH_INSN_SIZE);
> +       if (!ret)
> +               *insnp = val;
> +
> +       return ret;
> +}
> +
> +int larch_insn_write(void *addr, u32 insn)
> +{
> +       int ret;
> +       unsigned long flags = 0;
> +
> +       raw_spin_lock_irqsave(&patch_lock, flags);
> +       ret = copy_to_kernel_nofault(addr, &insn, LOONGARCH_INSN_SIZE);
> +       raw_spin_unlock_irqrestore(&patch_lock, flags);
> +
> +       return ret;
> +}
> +
> +int larch_insn_patch_text(void *addr, u32 insn)
> +{
> +       int ret;
> +       u32 *tp = addr;
> +
> +       if ((unsigned long)tp & 3)
> +               return -EINVAL;
> +
> +       ret = larch_insn_write(tp, insn);
> +       if (!ret)
> +               flush_icache_range((unsigned long)tp,
> +                                  (unsigned long)tp + LOONGARCH_INSN_SIZE);
> +
> +       return ret;
> +}
> +
> +u32 larch_insn_gen_nop(void)
> +{
> +       return INSN_NOP;
> +}
> +
> +u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest)
> +{
> +       unsigned int immediate_l, immediate_h;
> +       union loongarch_instruction insn;
> +       long offset = dest - pc;
> +
> +       if ((offset & 3) || offset < -SZ_128M || offset >= SZ_128M) {
> +               pr_warn("The generated bl instruction is out of range.\n");
> +               return INSN_BREAK;
> +       }
> +
> +       offset >>= 2;
> +
> +       immediate_l = offset & 0xffff;
> +       offset >>= 16;
> +       immediate_h = offset & 0x3ff;
> +
> +       insn.reg0i26_format.opcode = bl_op;
> +       insn.reg0i26_format.immediate_l = immediate_l;
> +       insn.reg0i26_format.immediate_h = immediate_h;
> +
> +       return insn.word;
> +}
> +
>  u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm)
>  {
>         union loongarch_instruction insn;
> @@ -38,3 +113,20 @@ u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, unsigned l
>
>         return insn.word;
>  }
> +
> +u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj, enum loongarch_gpr rk)
> +{
> +       union loongarch_instruction insn;
> +
> +       insn.reg3_format.opcode = or_op;
> +       insn.reg3_format.rd = rd;
> +       insn.reg3_format.rj = rj;
> +       insn.reg3_format.rk = rk;
> +
> +       return insn.word;
> +}
> +
> +u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj)
> +{
> +       return larch_insn_gen_or(rd, rj, 0);
> +}
> diff --git a/arch/loongarch/kernel/mcount-dyn.S b/arch/loongarch/kernel/mcount-dyn.S
> new file mode 100644
> index 000000000000..205925bc3822
> --- /dev/null
> +++ b/arch/loongarch/kernel/mcount-dyn.S
> @@ -0,0 +1,89 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> + */
> +
> +#include <asm/export.h>
> +#include <asm/regdef.h>
> +#include <asm/stackframe.h>
> +#include <asm/ftrace.h>
> +
> +       .text
> +/*
> + * Due to -fpatchable-function-entry=2: the compiler inserted 2 NOPs before the
> + * regular C function prologue. When PC arrived here, the last 2 instructions
> + * as follows,
> + *     move            t0, ra
> + *     bl              callsite (for modules, callsite is a tramplione)
> + *
> + * modules tramplione as follows,
> + *     lu12i.w         t1, callsite[31:12]
> + *     lu32i.d         t1, callsite[51:32]
> + *     lu52i.d         t1, t1, callsite[63:52]
> + *     jirl            zero, t1, callsite[11:0] >> 2
> + *
> + * See arch/loongarch/kernel/ftrace_dyn.c for details. Here, pay attention to
> + * that the T series regs are available and safe because each C functions
> + * follows the LoongArch psABI well.
> + */
> +
> +       .macro  ftrace_regs_entry
> +       PTR_ADDI sp, sp, -PT_SIZE
> +       /* Save trace function ra at PT_ERA */
> +       PTR_S   ra, sp, PT_ERA
> +       /* Save parent ra at PT_R1(RA) */
> +       PTR_S   t0, sp, PT_R1
> +       PTR_S   a0, sp, PT_R4
> +       PTR_S   a1, sp, PT_R5
> +       PTR_S   a2, sp, PT_R6
> +       PTR_S   a3, sp, PT_R7
> +       PTR_S   a4, sp, PT_R8
> +       PTR_S   a5, sp, PT_R9
> +       PTR_S   a6, sp, PT_R10
> +       PTR_S   a7, sp, PT_R11
> +       PTR_S   fp, sp, PT_R22
> +
> +       PTR_ADDI t8, sp, PT_SIZE
> +       PTR_S   t8, sp, PT_R3
> +
> +       .endm
> +
> +SYM_CODE_START(ftrace_caller)
> +       ftrace_regs_entry
> +       b       ftrace_common
> +SYM_CODE_END(ftrace_caller)
> +
> +SYM_CODE_START(ftrace_common)
> +       PTR_ADDI        a0, ra, -8      /* arg0: ip */
> +       move            a1, t0          /* arg1: parent_ip */
> +       la.pcrel        t1, function_trace_op
> +       PTR_L           a2, t1, 0       /* arg2: op */
> +       move            a3, sp          /* arg3: regs */
> +       .globl ftrace_call
> +ftrace_call:
> +       bl              ftrace_stub
> +/*
> + * As we didn't use S series regs in this assmembly code and all calls
> + * are C function which will save S series regs by themselves, there is
> + * no need to restore S series regs. The T series is available and safe
> + * at the callsite, so there is no need to restore the T series regs.
> + */
> +ftrace_common_return:
> +       PTR_L   a0, sp, PT_R4
> +       PTR_L   a1, sp, PT_R5
> +       PTR_L   a2, sp, PT_R6
> +       PTR_L   a3, sp, PT_R7
> +       PTR_L   a4, sp, PT_R8
> +       PTR_L   a5, sp, PT_R9
> +       PTR_L   a6, sp, PT_R10
> +       PTR_L   a7, sp, PT_R11
> +       PTR_L   fp, sp, PT_R22
> +       PTR_L   ra, sp, PT_R1
> +       PTR_L   t0, sp, PT_ERA
> +       PTR_ADDI sp, sp, PT_SIZE
> +       jr      t0
> +SYM_CODE_END(ftrace_common)
> +
> +SYM_FUNC_START(ftrace_stub)
> +       jr      ra
> +SYM_FUNC_END(ftrace_stub)
> diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c
> index b206d9159205..c5df4ae73e0d 100644
> --- a/arch/loongarch/kernel/unwind_prologue.c
> +++ b/arch/loongarch/kernel/unwind_prologue.c
> @@ -13,9 +13,7 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
>
>         if (unwind_done(state))
>                 return 0;
> -       else if (state->type)
> -               return state->pc;
> -       else if (state->first)
> +       else if (state->type || state->first)
>                 return state->pc;
>
>         return *(unsigned long *)(state->sp);
> @@ -39,16 +37,41 @@ static bool unwind_by_guess(struct unwind_state *state)
>         return false;
>  }
>
> +static inline void unwind_state_fixup(struct unwind_state *state)
> +{
> +#ifdef CONFIG_DYNAMIC_FTRACE
> +       static unsigned long ftrace_case = (unsigned long)ftrace_call + 4;
> +
> +       if (state->pc == ftrace_case)
> +               state->is_ftrace = true;
> +#endif
> +}
> +
>  static bool unwind_by_prologue(struct unwind_state *state)
>  {
>         struct stack_info *info = &state->stack_info;
>         union loongarch_instruction *ip, *ip_end;
>         unsigned long frame_size = 0, frame_ra = -1;
>         unsigned long size, offset, pc = state->pc;
> +       struct pt_regs *regs;
>
>         if (state->sp >= info->end || state->sp < info->begin)
>                 return false;
>
> +       if (state->is_ftrace) {
> +               /*
> +                * As we meet ftrace_regs_entry, reset first flag like first doing
> +                * tracing, Prologue analysis will stop soon because PC is at entry.
> +                */
> +               regs = (struct pt_regs *)state->sp;
> +               state->pc = regs->csr_era;
> +               state->ra = regs->regs[1];
> +               state->sp = regs->regs[3];
> +               state->first = true;
> +               state->is_ftrace = false;
> +               return true;
> +       }
> +
>         if (!kallsyms_lookup_size_offset(pc, &size, &offset))
>                 return false;
>
> @@ -94,7 +117,7 @@ static bool unwind_by_prologue(struct unwind_state *state)
>
>         state->pc = *(unsigned long *)(state->sp + frame_ra);
>         state->sp = state->sp + frame_size;
> -       return !!__kernel_text_address(state->pc);
> +       goto out;
>
>  first:
>         state->first = false;
> @@ -103,7 +126,9 @@ static bool unwind_by_prologue(struct unwind_state *state)
>
>         state->pc = state->ra;
>
> -       return !!__kernel_text_address(state->ra);
> +out:
> +       unwind_state_fixup(state);
> +       return !!__kernel_text_address(state->pc);
>  }
>
>  void unwind_start(struct unwind_state *state, struct task_struct *task,
> --
> 2.36.0
>
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ