[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <YmFgVbPuv1criVBq@lakrids>
Date: Thu, 21 Apr 2022 14:47:01 +0100
From: Mark Rutland <mark.rutland@....com>
To: Wang ShaoBo <bobo.shaobowang@...wei.com>
Cc: cj.chengjian@...wei.com, huawei.libin@...wei.com,
xiexiuqi@...wei.com, liwei391@...wei.com,
linux-kernel@...r.kernel.org, linux-arm-kernel@...ts.infradead.org,
catalin.marinas@....com, will@...nel.org, rostedt@...dmis.org,
zengshun.wu@...look.com
Subject: Re: [RFC PATCH -next v2 4/4] arm64/ftrace: implement long jump for
dynamic trampolines
On Wed, Mar 16, 2022 at 06:01:32PM +0800, Wang ShaoBo wrote:
> From: Cheng Jian <cj.chengjian@...wei.com>
>
> When kaslr is enabled, long jump should be implemented for jumping
> to according trampoline and to callback from this trampoline.
I believe this is also potentially a problem with some debug
configurations, even without KASLR (e.g. I have built a ~900MiB Image in
the past), so we *might* need this more generally, even if it is
unusual.
> Firstly, we use -fpatchable-function-entry=5,3 to reserve 3 NOPs at
> the end of each function, and also 2 NOPs at the head as 3b23e4991fb6
> ("arm64: implement ftrace with regs") does, so rec->ip should be
> rec->ip + 4 after change in case there is no extra instrument
> (such as bti ) be put the head.
>
> But should note this will increase the size of the text section:
> //Using defconfig with -fpatchable-function-entry=5,3://
> text data bss dec hex filename
> 27095761 14578580 533268 42207609 2840979 vmlinux
>
> //Using defconfig with -fpatchable-function-entry=2://
> text data bss dec hex filename
> 26394597 14575988 533268 41503853 2794c6d vmlinux
So that's ~687KiB for a ~40M kernel.
> When registering new ftrace function, we put this ftrace_ops registered
> into our trampoline allocated dynamically but not the ftrace_ops_list,
>
> .text section:
> funcA: | funcA(): funcB(): |
> `-> +-----+ | | ... mov x9 x30 |
> | ... | | | adrp <- bl <> |
> | nop | | | add
> | nop | | | br x16 ---+
> funcB | nop | | | ftrace_(regs_)caller_tramp:
> `-> +-----+ | `--> +---------------------+
> | nop | | | ... |
> | nop | | ftrace callsite +---------------------+
> | ... | | `-----> | PLT entry: |
> | nop | | | adrp |
> | nop | | | add |
> funcC: | nop | | ftrace graph callsite | br x16 |
> `-> +-----+ | `-----> +---------------------+
> | nop | | | ... |
> | nop | | +---------------------+
>
> There are three steps to concreate this conection when registering
> a ftrace_ops for a function to be traced (Take funcB as example):
>
> Step 1: use make_init_nop() to generate 'mov x9 x30' for LR reservation.
> funcB:
> | mov x9 x30
> | nop //rec->ip
> | ...
So this is the same as today.
> Step 2: create trampoline and use ftrace_make_call() to update plt entry:
> funcA:
> | ...
> | adrp
> | add
> | br x16 //pointer to trampoline allocated
>
> Step 3: fill plt entry to where ftrace_ops(addr stored in x2) contained:
> trampoline:
> | ...
> | adrp
> | add
> | br x16 //pointer to callback handler
>
> At the end automically replacing nop to bl in funB to enable profiling.
> funcB:
> | mov x9 x30
> | bl <> //to first adrp in funA
> | ...
... but these steps have a bunch of work which cannot be done
atomically, which worries me, because it means we can't safely modify a
patch-site without some *very* expensive global synchronization. I
believe we need some synchronization to safely cleanup the dynamic
trampolines themselves, but I don't know what form that takes today --
how are those freed safely?
When I last looked at this, I'd assumed we'd need a 6-instruction
patch-site (and force function alignment) so that we could have a 64-bit
value that can be patched atomically, e.g.
NOP
NOP
NOP
NOP
func:
(BTI)
NOP
NOP
...
... which we could split up as:
__trampoline_ptr:
.quad <trampoline_addr>
__pre_patch:
LDR X16, __trampoline_ptr
BL X17 /// or `B __post_patch` if disabled
func:
(BTI)
MOV X9, X30
B __pre_patch // or `NOP` if disabled
__post_patch:
...
... and the trampoline would have to adjust the X30 it receives as
that'd point to `func`, and it needs to point to `__post_patch` to
return back into the function.
Thanks,
Mark.
> If funcB was bound to another ftrace_ops, Step2 and Step3 will be repeated,
> If funcB was bound to multiple ftrace_ops, this trampoline will not be used
> but jump to ftrace_(regs)_caller.
>
> Signed-off-by: Cheng Jian <cj.chengjian@...wei.com>
> Signed-off-by: Wang ShaoBo <bobo.shaobowang@...wei.com>
> ---
> arch/arm64/Makefile | 2 +-
> arch/arm64/include/asm/ftrace.h | 2 +-
> arch/arm64/include/asm/module.h | 9 +++
> arch/arm64/kernel/entry-ftrace.S | 8 ++
> arch/arm64/kernel/ftrace.c | 127 +++++++++++++++++--------------
> arch/arm64/kernel/module-plts.c | 50 ++++++++++++
> 6 files changed, 140 insertions(+), 58 deletions(-)
>
> diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
> index 2f1de88651e6..3139d6d9ee52 100644
> --- a/arch/arm64/Makefile
> +++ b/arch/arm64/Makefile
> @@ -130,7 +130,7 @@ CHECKFLAGS += -D__aarch64__
>
> ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_REGS),y)
> KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
> - CC_FLAGS_FTRACE := -fpatchable-function-entry=2
> + CC_FLAGS_FTRACE := -fpatchable-function-entry=5,3
> endif
>
> # Default value
> diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
> index 1494cfa8639b..d0562c13b1dc 100644
> --- a/arch/arm64/include/asm/ftrace.h
> +++ b/arch/arm64/include/asm/ftrace.h
> @@ -70,7 +70,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
> * See ftrace_init_nop() for the callsite sequence.
> */
> if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
> - return addr + AARCH64_INSN_SIZE;
> + return addr + AARCH64_INSN_SIZE + sizeof(struct plt_entry);
> /*
> * addr is the address of the mcount call instruction.
> * recordmcount does the necessary offset calculation.
> diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
> index 4e7fa2623896..30f5b1cd5c9c 100644
> --- a/arch/arm64/include/asm/module.h
> +++ b/arch/arm64/include/asm/module.h
> @@ -36,6 +36,14 @@ extern u64 module_alloc_base;
> #define module_alloc_base ((u64)_etext - MODULES_VSIZE)
> #endif
>
> +enum {
> + PLT_MAKE_NOP,
> + PLT_MAKE_CALL,
> + PLT_MAKE_CALL_LINK,
> + PLT_MAKE_LCALL,
> + PLT_MAKE_LCALL_LINK
> +};
> +
> struct plt_entry {
> /*
> * A program that conforms to the AArch64 Procedure Call Standard
> @@ -58,6 +66,7 @@ static inline bool is_forbidden_offset_for_adrp(void *place)
> }
>
> struct plt_entry get_plt_entry(u64 dst, void *pc);
> +struct plt_entry update_plt_entry(u64 pc, u64 dst, int record);
> bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b);
>
> static inline bool plt_entry_is_initialized(const struct plt_entry *e)
> diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
> index 88462d925446..71290f0e0740 100644
> --- a/arch/arm64/kernel/entry-ftrace.S
> +++ b/arch/arm64/kernel/entry-ftrace.S
> @@ -154,8 +154,12 @@ SYM_INNER_LABEL(ftrace_regs_caller_tramp_ops, SYM_L_GLOBAL)
> mov x3, sp // regs
> SYM_INNER_LABEL(ftrace_regs_caller_tramp_call, SYM_L_GLOBAL)
> nop
> + nop
> + nop
> #ifdef CONFIG_FUNCTION_GRAPH_TRACER
> SYM_INNER_LABEL(ftrace_regs_caller_tramp_graph_call, SYM_L_GLOBAL)
> + nop
> + nop
> nop // ftrace_graph_caller()
> #endif
> ftrace_regs_restore
> @@ -175,8 +179,12 @@ SYM_INNER_LABEL(ftrace_caller_tramp_ops, SYM_L_GLOBAL)
> mov x3, sp // regs
> SYM_INNER_LABEL(ftrace_caller_tramp_call, SYM_L_GLOBAL)
> nop
> + nop
> + nop
> #ifdef CONFIG_FUNCTION_GRAPH_TRACER
> SYM_INNER_LABEL(ftrace_caller_tramp_graph_call, SYM_L_GLOBAL)
> + nop
> + nop
> nop // ftrace_graph_caller()
> #endif
> ftrace_regs_restore
> diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
> index d35a042baf75..4011aceefe8c 100644
> --- a/arch/arm64/kernel/ftrace.c
> +++ b/arch/arm64/kernel/ftrace.c
> @@ -234,27 +234,55 @@ ftrace_trampoline_get_func(struct ftrace_ops *ops, bool is_graph)
> return func;
> }
>
> +static struct plt_entry *
> +update_ftrace_tramp_plt(unsigned long pc, unsigned long addr, bool enable, bool link)
> +{
> + int i;
> + struct plt_entry entry;
> + u32 *pentry;
> + long offset;
> + struct plt_entry *plt = (struct plt_entry *)pc;
> +
> + offset = (long)pc - (long)addr;
> +
> + if (!enable)
> + entry = update_plt_entry(pc, addr, PLT_MAKE_NOP);
> + else if (offset < -SZ_128M || offset >= SZ_128M) {
> + if (link)
> + entry = update_plt_entry(pc, addr, PLT_MAKE_LCALL_LINK);
> + else
> + entry = update_plt_entry(pc, addr, PLT_MAKE_LCALL);
> + } else {
> + if (link)
> + entry = update_plt_entry(pc, addr, PLT_MAKE_CALL_LINK);
> + else
> + entry = update_plt_entry(pc, addr, PLT_MAKE_CALL);
> + }
> +
> + pentry = (u32 *)&entry;
> + for (i = 0; i < sizeof(struct plt_entry) / AARCH64_INSN_SIZE; i++)
> + aarch64_insn_patch_text_nosync((void *)((u32*)pc + i), *(pentry + i));
> +
> + return plt;
> +}
> +
> static int
> ftrace_trampoline_modify_call(struct ftrace_ops *ops, bool is_graph, bool active)
> {
> unsigned long offset;
> unsigned long pc;
> ftrace_func_t func;
> - u32 nop, branch;
>
> offset = calc_trampoline_call_offset(ops->flags &
> FTRACE_OPS_FL_SAVE_REGS, is_graph);
> pc = ops->trampoline + offset;
>
> func = ftrace_trampoline_get_func(ops, is_graph);
> - nop = aarch64_insn_gen_nop();
> - branch = aarch64_insn_gen_branch_imm(pc, (unsigned long)func,
> - AARCH64_INSN_BRANCH_LINK);
>
> - if (active)
> - return ftrace_modify_code(pc, 0, branch, false);
> - else
> - return ftrace_modify_code(pc, 0, nop, false);
> + if (update_ftrace_tramp_plt(pc, (unsigned long)func, active, true))
> + return 0;
> +
> + return -EINVAL;
> }
>
> extern int ftrace_graph_active;
> @@ -262,13 +290,6 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
> {
> unsigned int size;
>
> - /*
> - * If kaslr is enabled, the address of tramp and ftrace call
> - * may be far away, Therefore, long jump support is required.
> - */
> - if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
> - return;
> -
> if (!ops->trampoline) {
> ops->trampoline = create_trampoline(ops, &size);
> if (!ops->trampoline)
> @@ -349,17 +370,34 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
> return ftrace_modify_code(pc, 0, new, false);
> }
>
> -static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
> +extern void ftrace_kernel_plt(void);
> +static struct plt_entry *
> +get_ftrace_plt(struct module *mod, unsigned long pc, unsigned long addr)
> {
> -#ifdef CONFIG_ARM64_MODULE_PLTS
> - struct plt_entry *plt = mod->arch.ftrace_trampolines;
> + struct plt_entry *plt;
>
> - if (addr == FTRACE_ADDR)
> - return &plt[FTRACE_PLT_IDX];
> - if (addr == FTRACE_REGS_ADDR &&
> - IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
> - return &plt[FTRACE_REGS_PLT_IDX];
> + if (mod) {
> +#ifdef CONFIG_ARM64_MODULE_PLTS
> + plt = mod->arch.ftrace_trampolines;
> +
> + if (addr == FTRACE_ADDR)
> + return &plt[FTRACE_PLT_IDX];
> + else if (addr == FTRACE_REGS_ADDR &&
> + IS_ENABLED(CONFIG_FTRACE_WITH_REGS))
> + return &plt[FTRACE_REGS_PLT_IDX];
> + else {
> #endif
> + pc = pc - sizeof(struct plt_entry) - AARCH64_INSN_SIZE;
> + return update_ftrace_tramp_plt(pc, addr, true, false);
> +#ifdef CONFIG_ARM64_MODULE_PLTS
> + }
> +#endif
> + } else {
> + WARN_ON(addr == FTRACE_ADDR || addr == FTRACE_REGS_ADDR);
> + pc = pc - sizeof(struct plt_entry) - AARCH64_INSN_SIZE;
> + return update_ftrace_tramp_plt(pc, addr, true, false);
> + }
> +
> return NULL;
> }
>
> @@ -376,9 +414,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
> struct module *mod;
> struct plt_entry *plt;
>
> - if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
> - return -EINVAL;
> -
> /*
> * On kernels that support module PLTs, the offset between the
> * branch instruction and its target may legally exceed the
> @@ -393,12 +428,10 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
> mod = __module_text_address(pc);
> preempt_enable();
>
> - if (WARN_ON(!mod))
> - return -EINVAL;
> -
> - plt = get_ftrace_plt(mod, addr);
> + plt = get_ftrace_plt(mod, pc, addr);
> if (!plt) {
> - pr_err("ftrace: no module PLT for %ps\n", (void *)addr);
> + pr_err("ftrace: no %s PLT for %ps\n",
> + mod ? "module" : "kernel", (void *)addr);
> return -EINVAL;
> }
>
> @@ -474,36 +507,18 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
> if (offset < -SZ_128M || offset >= SZ_128M) {
> u32 replaced;
>
> - if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
> - return -EINVAL;
> -
> - /*
> - * 'mod' is only set at module load time, but if we end up
> - * dealing with an out-of-range condition, we can assume it
> - * is due to a module being loaded far away from the kernel.
> - */
> - if (!mod) {
> - preempt_disable();
> - mod = __module_text_address(pc);
> - preempt_enable();
> -
> - if (WARN_ON(!mod))
> - return -EINVAL;
> - }
> -
> /*
> - * The instruction we are about to patch may be a branch and
> - * link instruction that was redirected via a PLT entry. In
> - * this case, the normal validation will fail, but we can at
> - * least check that we are dealing with a branch and link
> - * instruction that points into the right module.
> + * The instruction we are about to patch may be a branch
> + * and link instruction that was redirected via a PLT entry
> + * supported by a dynamic trampoline or module, in this case,
> + * the normal validation will fail, but we can at least check
> + * that we are dealing with a branch and link instruction
> + * that points into the right place.
> */
> if (aarch64_insn_read((void *)pc, &replaced))
> return -EFAULT;
>
> - if (!aarch64_insn_is_bl(replaced) ||
> - !within_module(pc + aarch64_get_branch_offset(replaced),
> - mod))
> + if (!aarch64_insn_is_bl(replaced))
> return -EINVAL;
>
> validate = false;
> diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
> index e53493d8b208..b7a832dfaa69 100644
> --- a/arch/arm64/kernel/module-plts.c
> +++ b/arch/arm64/kernel/module-plts.c
> @@ -37,6 +37,56 @@ struct plt_entry get_plt_entry(u64 dst, void *pc)
> return plt;
> }
>
> +struct plt_entry update_plt_entry(u64 pc, u64 dst, int record)
> +{
> + u32 adrp, add, br;
> +
> + switch (record) {
> + case PLT_MAKE_NOP:
> + adrp = aarch64_insn_gen_nop();
> + add = aarch64_insn_gen_nop();
> + br = aarch64_insn_gen_nop();
> + break;
> +
> + case PLT_MAKE_CALL_LINK:
> + adrp = aarch64_insn_gen_nop();
> + add = aarch64_insn_gen_nop();
> + br = aarch64_insn_gen_branch_imm(pc + 8, dst,
> + AARCH64_INSN_BRANCH_LINK);
> + break;
> +
> + case PLT_MAKE_LCALL_LINK:
> + adrp = aarch64_insn_gen_adr(pc, dst, AARCH64_INSN_REG_16,
> + AARCH64_INSN_ADR_TYPE_ADRP);
> + add = aarch64_insn_gen_add_sub_imm(AARCH64_INSN_REG_16,
> + AARCH64_INSN_REG_16, dst % SZ_4K,
> + AARCH64_INSN_VARIANT_64BIT,
> + AARCH64_INSN_ADSB_ADD);
> + br = aarch64_insn_gen_branch_reg(AARCH64_INSN_REG_16,
> + AARCH64_INSN_BRANCH_LINK);
> + break;
> +
> + case PLT_MAKE_LCALL:
> + adrp = aarch64_insn_gen_adr(pc, dst, AARCH64_INSN_REG_16,
> + AARCH64_INSN_ADR_TYPE_ADRP);
> + add = aarch64_insn_gen_add_sub_imm(AARCH64_INSN_REG_16,
> + AARCH64_INSN_REG_16,
> + dst % SZ_4K,
> + AARCH64_INSN_VARIANT_64BIT,
> + AARCH64_INSN_ADSB_ADD);
> + br = aarch64_insn_gen_branch_reg(AARCH64_INSN_REG_16,
> + AARCH64_INSN_BRANCH_NOLINK);
> + break;
> + default:
> + pr_err("[%s %d] error flag %d\n", __func__, __LINE__, record);
> + BUG();
> + break;
> + }
> +
> + return (struct plt_entry){ cpu_to_le32(adrp), cpu_to_le32(add),
> + cpu_to_le32(br) };
> +}
> +
> bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b)
> {
> u64 p, q;
> --
> 2.25.1
>
Powered by blists - more mailing lists