lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <75dbeff6-fd0c-aec1-5821-919088eda6c4@efficios.com>
Date:   Fri, 24 Mar 2023 08:33:35 -0400
From:   Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
To:     Steven Rostedt <rostedt@...dmis.org>
Cc:     linux-kernel@...r.kernel.org, Mark Rutland <mark.rutland@....com>,
        Masami Hiramatsu <mhiramat@...nel.org>,
        "Jose E . Marchesi" <jose.marchesi@...cle.com>,
        Nick Desaulniers <ndesaulniers@...gle.com>,
        Sami Tolvanen <samitolvanen@...gle.com>,
        Peter Zijlstra <peterz@...radead.org>
Subject: Re: [PATCH v3] tracepoint: Fix CFI failures with tp_sub_func

On 2023-03-24 08:31, Mathieu Desnoyers wrote:
> When CLANG_CFI is in use, using tracing will occasionally result in
> CFI failures, e.g.
> 
> | CFI failure at syscall_trace_enter+0x66c/0x7d0 (target: tp_stub_func+0x0/0x2c; expected type: 0x4877830c)
> | Internal error: Oops - CFI: 00000000f200823a [#1] PREEMPT SMP
> | CPU: 2 PID: 118 Comm: klogd Not tainted 6.3.0-rc3-00002-gc242ea6f2f98 #2
> | Hardware name: linux,dummy-virt (DT)
> | pstate: 30400005 (nzCV daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> | pc : syscall_trace_enter+0x66c/0x7d0
> | lr : syscall_trace_enter+0x5e8/0x7d0
> | sp : ffff800015ce7d80
> | x29: ffff800015ce7d80 x28: ffff000017538000 x27: 0000000000000003
> | x26: ffff8000084c9454 x25: ffff00001182bd10 x24: dfff800000000000
> | x23: 1fffe00002ea7001 x22: ffff00001182bd10 x21: ffff000017538008
> | x20: ffff000017538000 x19: ffff800015ce7eb0 x18: 0000000000000000
> | x17: 000000004877830c x16: 00000000a540670c x15: 0000000000000000
> | x14: 0000000000000000 x13: 0000000000000000 x12: ff80800008039d8c
> | x11: ff80800008039dd0 x10: 0000000000000000 x9 : 0000000000000000
> | x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000
> | x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000
> | x2 : 00000000000000ce x1 : ffff800015ce7eb0 x0 : ffff800013d55000
> | Call trace:
> |  syscall_trace_enter+0x66c/0x7d0
> |  el0_svc_common+0x1dc/0x268
> |  do_el0_svc+0x70/0x1a4
> |  el0_svc+0x58/0x14c
> |  el0t_64_sync_handler+0x84/0xf0
> |  el0t_64_sync+0x190/0x194
> | Code: 72906191 72a90ef1 6b11021f 54000040 (d4304740)
> | ---[ end trace 0000000000000000 ]---
> 
> This happens because the function prototype of tp_sub_func doesn't match
> the prototype of the tracepoint function. As each tracepoint may have a
> distinct prototype, it's not possible to share a common stub function.
> 
> Avoid this by comparing the tracepoint function pointer to the value 1
> before calling each function.
> 
> Prior to this change, it_func_ptr->func was loaded twice per loop
> iteration:
> 
>      it_func = READ_ONCE((it_func_ptr)->func);
> [...]
>    } while ((++it_func_ptr)->func);
> 
> With this change, the loaded it_func is used for comparisons with 0 (end
> of loop), 1 (skip) and as a function pointer, thus saving a load per
> loop.
> 
> The comparison with <= TRACEPOINT_FUNC_SKIP is done on purpose to branch
> over both comparisons with 0 and 1 with a single branch for each loop
> and an extra conditional branch on the last loop to compare with 0.
> 
> A simplified example of this compiled on godbolt.org:
> 
>    #include <stdint.h>
> 
>    typedef void (*f_t)(void);
>    volatile f_t *arr;
> 
>    void fct(void)
>    {
>        volatile f_t *iter = &arr[0];
> 
>        for (;;) {
>            f_t l_iter = *iter;
>            if (((uintptr_t)l_iter <= 0x1)) {
>                if ((uintptr_t)l_iter == 0x1)
>                    continue;
>                else
>                    break;
>            }
>            l_iter();
>            iter++;
>        }
>    }
> 
> Generates:
> 
> x86-64 gcc 12.2 -O2:
> 
>    fct:
>            push    rbx
>            mov     rbx, QWORD PTR arr[rip]
>    .L29:
>            mov     rax, QWORD PTR [rbx]
>            cmp     rax, 1
>            jbe     .L38
>    .L30:
>            call    rax
>            mov     rax, QWORD PTR [rbx+8]
>            add     rbx, 8
>            cmp     rax, 1
>            ja      .L30
>    .L38:
>            je      .L29
>            pop     rbx
>            ret
> 
> clang 16.0.0 -O2:
> 
>    fct:                                    # @fct
>            push    rbx
>            mov     rbx, qword ptr [rip + arr]
>    .LBB0_1:                                # =>This Inner Loop Header: Depth=1
>            mov     rax, qword ptr [rbx]
>            cmp     rax, 1
>            ja      .LBB0_4
>            je      .LBB0_1
>            jmp     .LBB0_3
>    .LBB0_4:                                #   in Loop: Header=BB0_1 Depth=1
>            call    rax
>            add     rbx, 8
>            jmp     .LBB0_1
>    .LBB0_3:
>            pop     rbx
>            ret
>    arr:
>            .quad   0
> 
> On x86, the result of the "cmp" instruction is used to conditionally
> branch based on both inequality (jbe, ja) and equality (je), thus
> keeping the code size small and limiting the number of instructions to
> execute on this tracepoint instrumentation fast path.
> 
> Reported-by: Mark Rutland <mark.rutland@....com>
> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
> Tested-by: Mark Rutland <mark.rutlnand@....com>

It appears there was a typo in this Tested-by address from Mark. Steven, 
should I resend or can you correct this as you pick it up ?

Thanks,

Mathieu

> Acked-by: Mark Rutland <mark.rutland@....com>
> Cc: Steven Rostedt <rostedt@...dmis.org>
> Cc: Mark Rutland <mark.rutland@....com>
> Cc: Masami Hiramatsu <mhiramat@...nel.org>
> Cc: Jose E. Marchesi <jose.marchesi@...cle.com>
> Cc: Nick Desaulniers <ndesaulniers@...gle.com>
> Cc: Sami Tolvanen <samitolvanen@...gle.com>
> Cc: Peter Zijlstra <peterz@...radead.org>
> ---
> Changes since v2:
> - Fix typo in commit message.
> - Add acked-by, tested-by.
> ---
>   include/linux/tracepoint.h | 14 ++++++++++++--
>   kernel/tracepoint.c        | 20 +++++++-------------
>   2 files changed, 19 insertions(+), 15 deletions(-)
> 
> diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
> index 4b33b95eb8be..0aeac249d412 100644
> --- a/include/linux/tracepoint.h
> +++ b/include/linux/tracepoint.h
> @@ -33,6 +33,9 @@ struct trace_eval_map {
>   
>   #define TRACEPOINT_DEFAULT_PRIO	10
>   
> +/* Reserved value for tracepoint callback. */
> +#define TRACEPOINT_FUNC_SKIP	((void *) 0x1)
> +
>   extern struct srcu_struct tracepoint_srcu;
>   
>   extern int
> @@ -314,11 +317,18 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
>   		it_func_ptr =						\
>   			rcu_dereference_raw((&__tracepoint_##_name)->funcs); \
>   		if (it_func_ptr) {					\
> -			do {						\
> +			for (;;) {					\
>   				it_func = READ_ONCE((it_func_ptr)->func); \
> +				if ((uintptr_t) it_func <= (uintptr_t) TRACEPOINT_FUNC_SKIP) { \
> +					if (it_func == TRACEPOINT_FUNC_SKIP) \
> +						continue;		\
> +					else				\
> +						break;			\
> +				}					\
>   				__data = (it_func_ptr)->data;		\
>   				((void(*)(void *, proto))(it_func))(__data, args); \
> -			} while ((++it_func_ptr)->func);		\
> +				it_func_ptr++;				\
> +			}						\
>   		}							\
>   		return 0;						\
>   	}								\
> diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
> index f23144af5743..2fa108ddbbc2 100644
> --- a/kernel/tracepoint.c
> +++ b/kernel/tracepoint.c
> @@ -98,12 +98,6 @@ struct tp_probes {
>   	struct tracepoint_func probes[];
>   };
>   
> -/* Called in removal of a func but failed to allocate a new tp_funcs */
> -static void tp_stub_func(void)
> -{
> -	return;
> -}
> -
>   static inline void *allocate_probes(int count)
>   {
>   	struct tp_probes *p  = kmalloc(struct_size(p, probes, count),
> @@ -193,8 +187,8 @@ func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func,
>   	if (old) {
>   		/* (N -> N+1), (N != 0, 1) probes */
>   		for (iter_probes = 0; old[iter_probes].func; iter_probes++) {
> -			if (old[iter_probes].func == tp_stub_func)
> -				continue;	/* Skip stub functions. */
> +			if (old[iter_probes].func == TRACEPOINT_FUNC_SKIP)
> +				continue;
>   			if (old[iter_probes].func == tp_func->func &&
>   			    old[iter_probes].data == tp_func->data)
>   				return ERR_PTR(-EEXIST);
> @@ -208,7 +202,7 @@ func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func,
>   	if (old) {
>   		nr_probes = 0;
>   		for (iter_probes = 0; old[iter_probes].func; iter_probes++) {
> -			if (old[iter_probes].func == tp_stub_func)
> +			if (old[iter_probes].func == TRACEPOINT_FUNC_SKIP)
>   				continue;
>   			/* Insert before probes of lower priority */
>   			if (pos < 0 && old[iter_probes].prio < prio)
> @@ -246,7 +240,7 @@ static void *func_remove(struct tracepoint_func **funcs,
>   		for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
>   			if ((old[nr_probes].func == tp_func->func &&
>   			     old[nr_probes].data == tp_func->data) ||
> -			    old[nr_probes].func == tp_stub_func)
> +			    old[nr_probes].func == TRACEPOINT_FUNC_SKIP)
>   				nr_del++;
>   		}
>   	}
> @@ -269,7 +263,7 @@ static void *func_remove(struct tracepoint_func **funcs,
>   			for (i = 0; old[i].func; i++) {
>   				if ((old[i].func != tp_func->func ||
>   				     old[i].data != tp_func->data) &&
> -				    old[i].func != tp_stub_func)
> +				    old[i].func != TRACEPOINT_FUNC_SKIP)
>   					new[j++] = old[i];
>   			}
>   			new[nr_probes - nr_del].func = NULL;
> @@ -277,12 +271,12 @@ static void *func_remove(struct tracepoint_func **funcs,
>   		} else {
>   			/*
>   			 * Failed to allocate, replace the old function
> -			 * with calls to tp_stub_func.
> +			 * with TRACEPOINT_FUNC_SKIP.
>   			 */
>   			for (i = 0; old[i].func; i++) {
>   				if (old[i].func == tp_func->func &&
>   				    old[i].data == tp_func->data)
> -					WRITE_ONCE(old[i].func, tp_stub_func);
> +					WRITE_ONCE(old[i].func, TRACEPOINT_FUNC_SKIP);
>   			}
>   			*funcs = old;
>   		}

-- 
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ