lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAKwvOdncQM4i=Xm=fP2teY0qsKfUnL2Tgts-0GjZGH4nVx_JaQ@mail.gmail.com>
Date:   Mon, 12 Aug 2019 16:49:54 -0700
From:   Nick Desaulniers <ndesaulniers@...gle.com>
To:     Nathan Huckleberry <nhuck@...gle.com>
Cc:     Russell King <linux@...linux.org.uk>,
        Linux ARM <linux-arm-kernel@...ts.infradead.org>,
        LKML <linux-kernel@...r.kernel.org>,
        clang-built-linux <clang-built-linux@...glegroups.com>,
        Tri Vo <trong@...gle.com>
Subject: Re: [RFC PATCH] ARM: UNWINDER_FRAME_POINTER implementation for Clang

On Thu, Aug 1, 2019 at 4:10 PM 'Nathan Huckleberry' via Clang Built
Linux <clang-built-linux@...glegroups.com> wrote:
>
> The stackframe setup when compiled with clang is different.
> Since the stack unwinder expects the gcc stackframe setup it
> fails to print backtraces. This patch adds support for the
> clang stackframe setup.
>
> Cc: clang-built-linux@...glegroups.com
> Suggested-by: Tri Vo <trong@...gle.com>
> Signed-off-by: Nathan Huckleberry <nhuck@...gle.com>

Thanks for the patch! This is something definitely useful to have
implemented with Clang.  Some initial thoughts below:

> ---
>  arch/arm/Kconfig.debug   |   4 +-
>  arch/arm/Makefile        |   2 +-
>  arch/arm/lib/backtrace.S | 134 ++++++++++++++++++++++++++++++++++++---
>  3 files changed, 128 insertions(+), 12 deletions(-)
>
> diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
> index 85710e078afb..92fca7463e21 100644
> --- a/arch/arm/Kconfig.debug
> +++ b/arch/arm/Kconfig.debug
> @@ -56,7 +56,7 @@ choice
>
>  config UNWINDER_FRAME_POINTER
>         bool "Frame pointer unwinder"
> -       depends on !THUMB2_KERNEL && !CC_IS_CLANG
> +       depends on !THUMB2_KERNEL
>         select ARCH_WANT_FRAME_POINTERS
>         select FRAME_POINTER
>         help
> @@ -1872,7 +1872,7 @@ config DEBUG_UNCOMPRESS
>           When this option is set, the selected DEBUG_LL output method
>           will be re-used for normal decompressor output on multiplatform
>           kernels.
> -
> +
>
>  config UNCOMPRESS_INCLUDE
>         string
> diff --git a/arch/arm/Makefile b/arch/arm/Makefile
> index c3624ca6c0bc..a593d9c4e18a 100644
> --- a/arch/arm/Makefile
> +++ b/arch/arm/Makefile
> @@ -36,7 +36,7 @@ KBUILD_CFLAGS += $(call cc-option,-mno-unaligned-access)
>  endif
>
>  ifeq ($(CONFIG_FRAME_POINTER),y)
> -KBUILD_CFLAGS  +=-fno-omit-frame-pointer -mapcs -mno-sched-prolog
> +KBUILD_CFLAGS  +=-fno-omit-frame-pointer $(call cc-option,-mapcs,) $(call cc-option,-mno-sched-prolog,)
>  endif
>
>  ifeq ($(CONFIG_CPU_BIG_ENDIAN),y)
> diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S
> index 1d5210eb4776..fd64eec9f6ae 100644
> --- a/arch/arm/lib/backtrace.S
> +++ b/arch/arm/lib/backtrace.S
> @@ -14,10 +14,7 @@
>  @ fp is 0 or stack frame
>
>  #define frame  r4
> -#define sv_fp  r5
> -#define sv_pc  r6

It looks like sv_fp and sv_pc have the same values for both GCC and
Clang, maybe they don't need to be moved here?

>  #define mask   r7
> -#define offset r8

So GCC has an offset while Clang has sv_lr.

>
>  ENTRY(c_backtrace)
>
> @@ -25,7 +22,8 @@ ENTRY(c_backtrace)
>                 ret     lr
>  ENDPROC(c_backtrace)
>  #else
> -               stmfd   sp!, {r4 - r8, lr}      @ Save an extra register so we have a location...
> +               stmfd   sp!, {r4 - r8, fp, lr}  @ Save an extra register

Not having a preprocessor guard here makes it seem like GCC will
always have to move the additional register, even if it's not using
it?

> +                                               @ so we have a location..
>                 movs    frame, r0               @ if frame pointer is zero
>                 beq     no_frame                @ we have no stack frames
>
> @@ -35,11 +33,119 @@ ENDPROC(c_backtrace)
>   THUMB(                orreq   mask, #0x03             )
>                 movne   mask, #0                @ mask for 32-bit
>
> -1:             stmfd   sp!, {pc}               @ calculate offset of PC stored
> -               ldr     r0, [sp], #4            @ by stmfd for this CPU
> -               adr     r1, 1b
> -               sub     offset, r0, r1
>
> +#if defined(CONFIG_CC_IS_CLANG)

#ifdef CONFIG_CC_IS_CLANG

I'd only use `#if defined(foo)` if there were 2 or more things I
needed to check: `#if defined(foo) || defined(bar)`.

> +/*
> + * Clang does not store pc or sp in function prologues
> + *             so we don't know exactly where the function
> + *             starts.
> + * We can treat the current frame's lr as the saved pc and the
> + *             preceding frame's lr as the lr, but we can't
> + *             trace the most recent call.
> + * Inserting a false stack frame allows us to reference the
> + *             function called last in the stacktrace.
> + * If the call instruction was a bl we can look at the callers
> + *             branch instruction to calculate the saved pc.
> + * We can recover the pc in most cases, but in cases such as
> + *             calling function pointers we cannot. In this
> + *             case, default to using the lr. This will be
> + *             some address in the function, but will not
> + *             be the function start.
> + * Unfortunately due to the stack frame layout we can't dump
> + *              r0 - r3, but these are less frequently saved.
> + *
> + * Stack frame layout:
> + *             <larger addresses>
> + *             saved lr
> + *    frame => saved fp
> + *             optionally saved caller registers (r4 - r10)
> + *             optionally saved arguments (r0 - r3)
> + *             <top of stack frame>
> + *             <smaller addressses>

s/addressses/addresses/

> + *
> + * Functions start with the following code sequence:
> + * corrected pc =>  stmfd sp!, {..., fp, lr}
> + *                 add fp, sp, #x
> + *                 stmfd sp!, {r0 - r3} (optional)
> + */
> +#define sv_fp  r5
> +#define sv_pc  r6
> +#define sv_lr   r8
> +               add     frame, sp, #20          @ switch to false frame
> +for_each_frame:        tst     frame, mask             @ Check for address exceptions
> +               bne     no_frame
> +
> +1001:          ldr     sv_pc, [frame, #4]      @ get saved 'pc'
> +1002:          ldr     sv_fp, [frame, #0]      @ get saved fp

These two sections seem to differ between GCC and Clang only for the
offsets. Could these be made into preprocessor defines and more code
shared?

> +
> +               teq     sv_fp, #0               @ make sure next frame exists
> +               beq     no_frame
> +
> +1003:          ldr     sv_lr, [sv_fp, #4]      @ get saved lr from next frame
> +
> +               //try to find function start

Use either /* c89 comments */ or @arm assembler comments.

> +               ldr     r0, [sv_lr, #-4]        @ get call instruction
> +               ldr     r3, .Ldsi+8
> +               and     r2, r3, r0              @ is this a bl call
> +               teq     r2, r3
> +               bne     finished_setup          @ give up if it's not
> +               and     r0, #0xffffff           @ get call offset 24-bit int
> +               lsl     r0, r0, #8              @ sign extend offset
> +               asr     r0, r0, #8
> +               ldr     sv_pc, [sv_fp, #4]      @ get lr address
> +               add     sv_pc, sv_pc, #-4       @ get call instruction address
> +               add     sv_pc, sv_pc, #8        @ take care of prefetch
> +               add     sv_pc, sv_pc, r0, lsl #2 @ find function start
> +               b       finished_setup

Do we need an explicit branch to this label? Wouldn't we just fall
through to it?j

> +
> +finished_setup:
> +
> +               bic     sv_pc, sv_pc, mask      @ mask PC/LR for the mode
> +
> +1004:          mov     r0, sv_pc
> +
> +               mov     r1, sv_lr
> +               mov     r2, frame
> +               bic     r1, r1, mask            @ mask PC/LR for the mode
> +               bl      dump_backtrace_entry
> +
> +1005:          ldr     r1, [sv_pc, #0]         @ if stmfd sp!, {..., fp, lr}
> +               ldr     r3, .Ldsi               @ instruction exists,
> +               teq     r3, r1, lsr #11
> +               ldr     r0, [frame]             @ locals are stored in
> +                                               @ the preceding frame
> +               subeq   r0, r0, #4
> +               bleq    dump_backtrace_stm      @ dump saved registers
> +
> +               teq     sv_fp, #0               @ zero saved fp means
> +               beq     no_frame                @ no further frames
> +
> +               cmp     sv_fp, frame            @ next frame must be
> +               mov     frame, sv_fp            @ above the current frame
> +               bhi     for_each_frame
> +
> +1006:          adr     r0, .Lbad
> +               mov     r1, frame
> +               bl      printk
> +no_frame:      ldmfd   sp!, {r4 - r8, fp, pc}
> +ENDPROC(c_backtrace)
> +               .pushsection __ex_table,"a"
> +               .align  3
> +               .long   1001b, 1006b
> +               .long   1002b, 1006b
> +               .long   1003b, 1006b
> +               .long   1004b, 1006b
> +               .popsection
> +
> +.Lbad:         .asciz  "Backtrace aborted due to bad frame pointer <%p>\n"
> +               .align

Probably don't need to duplicate the above (up to ENDPROC), but the
below hunk looks compiler specific.

> +.Ldsi:         .word   0xe92d4800 >> 11        @ stmfd sp!, {... fp, lr}
> +               .word   0xe92d0000 >> 11        @ stmfd sp!, {}
> +               .word   0x0b000000              @ bl if these bits are set
> +
> +ENDPROC(c_backtrace)

Duplicate ENDPROC?

> +
> +#else
>  /*
>   * Stack frame layout:
>   *             optionally saved caller registers (r4 - r10)
> @@ -55,6 +161,15 @@ ENDPROC(c_backtrace)
>   *                  stmfd sp!, {r0 - r3} (optional)
>   * corrected pc =>  stmfd sp!, {..., fp, ip, lr, pc}
>   */
> +#define sv_fp  r5
> +#define sv_pc  r6
> +#define offset r8
> +
> +1:             stmfd   sp!, {pc}               @ calculate offset of PC stored
> +               ldr     r0, [sp], #4            @ by stmfd for this CPU
> +               adr     r1, 1b
> +               sub     offset, r0, r1
> +
>  for_each_frame:        tst     frame, mask             @ Check for address exceptions
>                 bne     no_frame
>
> @@ -98,7 +213,7 @@ for_each_frame:      tst     frame, mask             @ Check for address exceptions
>  1006:          adr     r0, .Lbad
>                 mov     r1, frame
>                 bl      printk
> -no_frame:      ldmfd   sp!, {r4 - r8, pc}
> +no_frame:      ldmfd   sp!, {r4 - r8, fp, pc}

More work for GCC...

>  ENDPROC(c_backtrace)
>
>                 .pushsection __ex_table,"a"
> @@ -115,3 +230,4 @@ ENDPROC(c_backtrace)
>                 .word   0xe92d0000 >> 11        @ stmfd sp!, {}
>
>  #endif
> +#endif

It would be nice to put comments on the end of these #endif's what
condition they're terminating:

#endif /* CONFIG_CC_IS_CLANG
#endif /* !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK) */
Maybe also the #else's above.

Will send more thoughts tomorrow/throughout the week.
-- 
Thanks,
~Nick Desaulniers

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ