[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3f890bd0-b883-4bd1-a0f1-4cf78db42857@oracle.com>
Date: Thu, 30 Jan 2025 07:07:32 -0800
From: Indu Bhagat <indu.bhagat@...cle.com>
To: Josh Poimboeuf <jpoimboe@...nel.org>, x86@...nel.org
Cc: Peter Zijlstra <peterz@...radead.org>,
Steven Rostedt <rostedt@...dmis.org>, Ingo Molnar <mingo@...nel.org>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
linux-kernel@...r.kernel.org, Mark Rutland <mark.rutland@....com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Jiri Olsa <jolsa@...nel.org>, Namhyung Kim <namhyung@...nel.org>,
Ian Rogers <irogers@...gle.com>,
Adrian Hunter <adrian.hunter@...el.com>,
linux-perf-users@...r.kernel.org, Mark Brown <broonie@...nel.org>,
linux-toolchains@...r.kernel.org, Jordan Rome <jordalgo@...a.com>,
Sam James <sam@...too.org>, linux-trace-kernel@...r.kernel.org,
Andrii Nakryiko <andrii.nakryiko@...il.com>,
Jens Remus <jremus@...ux.ibm.com>,
Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
Florian Weimer <fweimer@...hat.com>, Andy Lutomirski <luto@...nel.org>,
Masami Hiramatsu <mhiramat@...nel.org>, Weinan Liu <wnliu@...gle.com>
Subject: Re: [PATCH v4 19/39] unwind_user/sframe: Add support for reading
.sframe contents
On 1/21/25 6:31 PM, Josh Poimboeuf wrote:
> In preparation for using sframe to unwind user space stacks, add an
> sframe_find() interface for finding the sframe information associated
> with a given text address.
>
> For performance, use user_read_access_begin() and the corresponding
> unsafe_*() accessors. Note that use of pr_debug() in uaccess-enabled
> regions would break noinstr validation, so there aren't any debug
> messages yet. That will be added in a subsequent commit.
>
> Signed-off-by: Josh Poimboeuf <jpoimboe@...nel.org>
> ---
> include/linux/sframe.h | 5 +
> kernel/unwind/sframe.c | 295 ++++++++++++++++++++++++++++++++++-
> kernel/unwind/sframe_debug.h | 35 +++++
> 3 files changed, 331 insertions(+), 4 deletions(-)
> create mode 100644 kernel/unwind/sframe_debug.h
>
> diff --git a/include/linux/sframe.h b/include/linux/sframe.h
> index ff4b9d1dbd00..2e70085a1e89 100644
> --- a/include/linux/sframe.h
> +++ b/include/linux/sframe.h
> @@ -3,11 +3,14 @@
> #define _LINUX_SFRAME_H
>
> #include <linux/mm_types.h>
> +#include <linux/srcu.h>
> #include <linux/unwind_user_types.h>
>
> #ifdef CONFIG_HAVE_UNWIND_USER_SFRAME
>
> struct sframe_section {
> + struct rcu_head rcu;
> +
> unsigned long sframe_start;
> unsigned long sframe_end;
> unsigned long text_start;
> @@ -28,6 +31,7 @@ extern void sframe_free_mm(struct mm_struct *mm);
> extern int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end,
> unsigned long text_start, unsigned long text_end);
> extern int sframe_remove_section(unsigned long sframe_addr);
> +extern int sframe_find(unsigned long ip, struct unwind_user_frame *frame);
>
> static inline bool current_has_sframe(void)
> {
> @@ -42,6 +46,7 @@ static inline bool current_has_sframe(void)
> static inline void sframe_free_mm(struct mm_struct *mm) {}
> static inline int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end, unsigned long text_start, unsigned long text_end) { return -ENOSYS; }
> static inline int sframe_remove_section(unsigned long sframe_addr) { return -ENOSYS; }
> +static inline int sframe_find(unsigned long ip, struct unwind_user_frame *frame) { return -ENOSYS; }
> static inline bool current_has_sframe(void) { return false; }
>
> #endif /* CONFIG_HAVE_UNWIND_USER_SFRAME */
> diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c
> index fa7d87ffd00a..1a35615a361e 100644
> --- a/kernel/unwind/sframe.c
> +++ b/kernel/unwind/sframe.c
> @@ -15,9 +15,287 @@
> #include <linux/unwind_user_types.h>
>
> #include "sframe.h"
> +#include "sframe_debug.h"
>
> -#define dbg(fmt, ...) \
> - pr_debug("%s (%d): " fmt, current->comm, current->pid, ##__VA_ARGS__)
> +struct sframe_fre {
> + unsigned int size;
> + s32 ip_off;
> + s32 cfa_off;
> + s32 ra_off;
> + s32 fp_off;
> + u8 info;
> +};
> +
> +DEFINE_STATIC_SRCU(sframe_srcu);
> +
> +static __always_inline unsigned char fre_type_to_size(unsigned char fre_type)
> +{
> + if (fre_type > 2)
> + return 0;
> + return 1 << fre_type;
> +}
> +
> +static __always_inline unsigned char offset_size_enum_to_size(unsigned char off_size)
> +{
> + if (off_size > 2)
> + return 0;
> + return 1 << off_size;
> +}
> +
> +static __always_inline int __read_fde(struct sframe_section *sec,
> + unsigned int fde_num,
> + struct sframe_fde *fde)
> +{
> + unsigned long fde_addr, ip;
> +
> + fde_addr = sec->fdes_start + (fde_num * sizeof(struct sframe_fde));
> + unsafe_copy_from_user(fde, (void __user *)fde_addr,
> + sizeof(struct sframe_fde), Efault);
> +
> + ip = sec->sframe_start + fde->start_addr;
> + if (ip < sec->text_start || ip > sec->text_end)
> + return -EINVAL;
> +
> + return 0;
> +
> +Efault:
> + return -EFAULT;
> +}
> +
> +static __always_inline int __find_fde(struct sframe_section *sec,
> + unsigned long ip,
> + struct sframe_fde *fde)
> +{
> + s32 ip_off, func_off_low = S32_MIN, func_off_high = S32_MAX;
> + struct sframe_fde __user *first, *low, *high, *found = NULL;
> + int ret;
> +
> + ip_off = ip - sec->sframe_start;
> +
> + first = (void __user *)sec->fdes_start;
> + low = first;
> + high = first + sec->num_fdes - 1;
> +
> + while (low <= high) {
> + struct sframe_fde __user *mid;
> + s32 func_off;
> +
> + mid = low + ((high - low) / 2);
> +
> + unsafe_get_user(func_off, (s32 __user *)mid, Efault);
> +
> + if (ip_off >= func_off) {
> + if (func_off < func_off_low)
> + return -EFAULT;
> +
> + func_off_low = func_off;
> +
> + found = mid;
> + low = mid + 1;
> + } else {
> + if (func_off > func_off_high)
> + return -EFAULT;
> +
> + func_off_high = func_off;
> +
> + high = mid - 1;
> + }
> + }
> +
> + if (!found)
> + return -EINVAL;
> +
> + ret = __read_fde(sec, found - first, fde);
> + if (ret)
> + return ret;
> +
> + /* make sure it's not in a gap */
> + if (ip_off < fde->start_addr || ip_off >= fde->start_addr + fde->func_size)
> + return -EINVAL;
> +
> + return 0;
> +
> +Efault:
> + return -EFAULT;
> +}
> +
> +#define __UNSAFE_GET_USER_INC(to, from, type, label) \
> +({ \
> + type __to; \
> + unsafe_get_user(__to, (type __user *)from, label); \
> + from += sizeof(__to); \
> + to = (typeof(to))__to; \
> +})
> +
> +#define UNSAFE_GET_USER_INC(to, from, size, label) \
> +({ \
> + switch (size) { \
> + case 1: \
> + __UNSAFE_GET_USER_INC(to, from, u8, label); \
> + break; \
> + case 2: \
> + __UNSAFE_GET_USER_INC(to, from, u16, label); \
> + break; \
> + case 4: \
> + __UNSAFE_GET_USER_INC(to, from, u32, label); \
> + break; \
> + default: \
> + return -EFAULT; \
> + } \
> +})
> +
> +static __always_inline int __read_fre(struct sframe_section *sec,
> + struct sframe_fde *fde,
> + unsigned long fre_addr,
> + struct sframe_fre *fre)
> +{
> + unsigned char fde_type = SFRAME_FUNC_FDE_TYPE(fde->info);
> + unsigned char fre_type = SFRAME_FUNC_FRE_TYPE(fde->info);
> + unsigned char offset_count, offset_size;
> + s32 ip_off, cfa_off, ra_off, fp_off;
> + unsigned long cur = fre_addr;
> + unsigned char addr_size;
> + u8 info;
> +
> + addr_size = fre_type_to_size(fre_type);
> + if (!addr_size)
> + return -EFAULT;
> +
> + if (fre_addr + addr_size + 1 > sec->fres_end)
> + return -EFAULT;
> +
> + UNSAFE_GET_USER_INC(ip_off, cur, addr_size, Efault);
> + if (fde_type == SFRAME_FDE_TYPE_PCINC && ip_off > fde->func_size)
> + return -EFAULT;
> +
> + UNSAFE_GET_USER_INC(info, cur, 1, Efault);
> + offset_count = SFRAME_FRE_OFFSET_COUNT(info);
> + offset_size = offset_size_enum_to_size(SFRAME_FRE_OFFSET_SIZE(info));
> + if (!offset_count || !offset_size)
> + return -EFAULT;
> +
> + if (cur + (offset_count * offset_size) > sec->fres_end)
> + return -EFAULT;
> +
> + fre->size = addr_size + 1 + (offset_count * offset_size);
> +
> + UNSAFE_GET_USER_INC(cfa_off, cur, offset_size, Efault);
> + offset_count--;
> +
> + ra_off = sec->ra_off;
> + if (!ra_off) {
> + if (!offset_count--)
> + return -EFAULT;
> +
> + UNSAFE_GET_USER_INC(ra_off, cur, offset_size, Efault);
> + }
> +
> + fp_off = sec->fp_off;
> + if (!fp_off && offset_count) {
> + offset_count--;
> + UNSAFE_GET_USER_INC(fp_off, cur, offset_size, Efault);
> + }
> +
> + if (offset_count)
> + return -EFAULT;
> +
> + fre->ip_off = ip_off;
> + fre->cfa_off = cfa_off;
> + fre->ra_off = ra_off;
> + fre->fp_off = fp_off;
> + fre->info = info;
> +
> + return 0;
> +
> +Efault:
> + return -EFAULT;
> +}
> +
> +static __always_inline int __find_fre(struct sframe_section *sec,
> + struct sframe_fde *fde, unsigned long ip,
> + struct unwind_user_frame *frame)
> +{
> + unsigned char fde_type = SFRAME_FUNC_FDE_TYPE(fde->info);
> + struct sframe_fre *fre, *prev_fre = NULL;
> + struct sframe_fre fres[2];
> + unsigned long fre_addr;
> + bool which = false;
> + unsigned int i;
> + s32 ip_off;
> +
> + ip_off = (s32)(ip - sec->sframe_start) - fde->start_addr;
> +
> + if (fde_type == SFRAME_FDE_TYPE_PCMASK)
> + ip_off %= fde->rep_size;
> +
> + fre_addr = sec->fres_start + fde->fres_off;
> +
> + for (i = 0; i < fde->fres_num; i++) {
> + int ret;
> +
> + /*
> + * Alternate between the two fre_addr[] entries for 'fre' and
> + * 'prev_fre'.
> + */
> + fre = which ? fres : fres + 1;
> + which = !which;
> +
> + ret = __read_fre(sec, fde, fre_addr, fre);
> + if (ret)
> + return ret;
> +
It should be possible to only read the ip_off and info from FRE and
defer the reading of offsets (as done in __read_fre) until later when
you do need the offsets. See below.
We can find the relevant FRE with the following pieces of information:
- ip_off
- fre_size (this will mean we need to read the uin8_t info in the FRE)
> + fre_addr += fre->size;
> +
> + if (prev_fre && fre->ip_off <= prev_fre->ip_off)
> + return -EFAULT;
> +
> + if (fre->ip_off > ip_off)
> + break;
> +
> + prev_fre = fre;
> + }
> +
> + if (!prev_fre)
> + return -EINVAL;
> + fre = prev_fre;
> +
(Invoke the __read_fre here as we know now that this FRE is what we are
looking for.)
> + frame->cfa_off = fre->cfa_off;
> + frame->ra_off = fre->ra_off;
> + frame->fp_off = fre->fp_off;
> + frame->use_fp = SFRAME_FRE_CFA_BASE_REG_ID(fre->info) == SFRAME_BASE_REG_FP;
> +
> + return 0;
> +}
Powered by blists - more mailing lists