lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <24e46e1c-e9fa-4d44-97f2-068bda6e54b4@oracle.com>
Date:   Thu, 9 Nov 2023 11:31:59 -0800
From:   Indu Bhagat <indu.bhagat@...cle.com>
To:     Josh Poimboeuf <jpoimboe@...nel.org>,
        Peter Zijlstra <peterz@...radead.org>,
        Steven Rostedt <rostedt@...dmis.org>,
        Ingo Molnar <mingo@...nel.org>,
        Arnaldo Carvalho de Melo <acme@...nel.org>
Cc:     linux-kernel@...r.kernel.org, x86@...nel.org,
        Mark Rutland <mark.rutland@....com>,
        Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
        Jiri Olsa <jolsa@...nel.org>,
        Namhyung Kim <namhyung@...nel.org>,
        Ian Rogers <irogers@...gle.com>,
        Adrian Hunter <adrian.hunter@...el.com>,
        linux-perf-users@...r.kernel.org, Mark Brown <broonie@...nel.org>,
        linux-toolchains@...r.kernel.org
Subject: Re: [PATCH RFC 09/10] unwind: Introduce SFrame user space unwinding

On 11/8/23 16:41, Josh Poimboeuf wrote:
> Some distros have started compiling frame pointers into all their
> packages to enable the kernel to do system-wide profiling of user space.
> Unfortunately that creates a runtime performance penalty across the
> entire system.  Using DWARF instead isn't feasible due to complexity and
> slowness.
> 
> For in-kernel unwinding we solved this problem with the creation of the
> ORC unwinder for x86_64.  Similarly, for user space the GNU assembler
> has created the SFrame format starting with binutils 2.40.  SFrame is a
> simpler version of .eh_frame which gets placed in the .sframe section.
> 
> Add support for unwinding user space using SFrame.
> 
> More information about SFrame can be found here:
> 
>    - https://lwn.net/Articles/932209/
>    - https://lwn.net/Articles/940686/
>    - https://sourceware.org/binutils/docs/sframe-spec.html
> 
> Signed-off-by: Josh Poimboeuf <jpoimboe@...nel.org>
> ---
>   arch/Kconfig                |   3 +
>   arch/x86/include/asm/mmu.h  |   2 +-
>   fs/binfmt_elf.c             |  46 +++-
>   include/linux/mm_types.h    |   3 +
>   include/linux/sframe.h      |  46 ++++
>   include/linux/user_unwind.h |   1 +
>   include/uapi/linux/elf.h    |   1 +
>   include/uapi/linux/prctl.h  |   3 +
>   kernel/fork.c               |  10 +
>   kernel/sys.c                |  11 +
>   kernel/unwind/Makefile      |   1 +
>   kernel/unwind/sframe.c      | 414 ++++++++++++++++++++++++++++++++++++
>   kernel/unwind/sframe.h      | 217 +++++++++++++++++++
>   kernel/unwind/user.c        |  15 +-
>   mm/init-mm.c                |   2 +
>   15 files changed, 768 insertions(+), 7 deletions(-)
>   create mode 100644 include/linux/sframe.h
>   create mode 100644 kernel/unwind/sframe.c
>   create mode 100644 kernel/unwind/sframe.h
> 

Thanks for working on this.

> diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c
> new file mode 100644
> index 000000000000..b167c19497e5
> --- /dev/null
> +++ b/kernel/unwind/sframe.c
> @@ -0,0 +1,414 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include <linux/sched.h>
> +#include <linux/slab.h>
> +#include <linux/srcu.h>
> +#include <linux/uaccess.h>
> +#include <linux/mm.h>
> +#include <linux/sframe.h>
> +#include <linux/user_unwind.h>
> +
> +#include "sframe.h"
> +
> +#define SFRAME_FILENAME_LEN 32
> +
> +struct sframe_section {
> +	struct rcu_head rcu;
> +
> +	unsigned long sframe_addr;
> +	unsigned long text_addr;
> +
> +	unsigned long fdes_addr;
> +	unsigned long fres_addr;
> +	unsigned int  fdes_num;
> +	signed char ra_off, fp_off;
> +};
> +
> +DEFINE_STATIC_SRCU(sframe_srcu);
> +
> +#define __SFRAME_GET_USER(out, user_ptr, type)				\
> +({									\
> +	type __tmp;							\
> +	if (get_user(__tmp, (type *)user_ptr))				\
> +		return -EFAULT;						\
> +	user_ptr += sizeof(__tmp);					\
> +	out = __tmp;							\
> +})
> +
> +#define SFRAME_GET_USER_SIGNED(out, user_ptr, size)			\
> +({									\
> +	switch (size) {							\
> +	case 1:								\
> +		__SFRAME_GET_USER(out, user_ptr, s8);			\
> +		break;							\
> +	case 2:								\
> +		__SFRAME_GET_USER(out, user_ptr, s16);			\
> +		break;							\
> +	case 4:								\
> +		__SFRAME_GET_USER(out, user_ptr, s32);			\
> +		break;							\
> +	default:							\
> +		return -EINVAL;						\
> +	}								\
> +})
> +
> +#define SFRAME_GET_USER_UNSIGNED(out, user_ptr, size)			\
> +({									\
> +	switch (size) {							\
> +	case 1:								\
> +		__SFRAME_GET_USER(out, user_ptr, u8);			\
> +		break;							\
> +	case 2:								\
> +		__SFRAME_GET_USER(out, user_ptr, u16);			\
> +		break;							\
> +	case 4:								\
> +		__SFRAME_GET_USER(out, user_ptr, u32);			\
> +		break;							\
> +	default:							\
> +		return -EINVAL;						\
> +	}								\
> +})
> +
> +static unsigned char fre_type_to_size(unsigned char fre_type)
> +{
> +	if (fre_type > 2)
> +		return 0;
> +	return 1 << fre_type;
> +}
> +
> +static unsigned char offset_size_enum_to_size(unsigned char off_size)
> +{
> +	if (off_size > 2)
> +		return 0;
> +	return 1 << off_size;
> +}
> +
> +static int find_fde(struct sframe_section *sec, unsigned long ip,
> +		    struct sframe_fde *fde)
> +{
> +	s32 func_off, ip_off;
> +	struct sframe_fde __user *first, *last, *mid, *found;
> +
> +	ip_off = ip - sec->sframe_addr;
> +
> +	first = (void *)sec->fdes_addr;
> +	last = first + sec->fdes_num;
> +	while (first <= last) {
> +		mid = first + ((last - first) / 2);
> +		if (get_user(func_off, (s32 *)mid))
> +			return -EFAULT;
> +		if (ip_off >= func_off) {
> +			found = mid;
> +			first = mid + 1;
> +		} else
> +			last = mid - 1;
> +	}
> +
> +	if (!found)
> +		return -EINVAL;
> +
> +	if (copy_from_user(fde, found, sizeof(*fde)))
> +		return -EFAULT;
> +
> +	return 0;
> +}
> +
> +static int find_fre(struct sframe_section *sec, struct sframe_fde *fde,
> +		    unsigned long ip, struct user_unwind_frame *frame)
> +{
> +	unsigned char fde_type = SFRAME_FUNC_FDE_TYPE(fde->info);
> +	unsigned char fre_type = SFRAME_FUNC_FRE_TYPE(fde->info);
> +	s32 fre_ip_off, cfa_off, ra_off, fp_off, ip_off;
> +	unsigned char offset_count, offset_size;
> +	unsigned char addr_size;
> +	void __user *f, *last_f;
> +	u8 fre_info;
> +	int i;
> +
> +	addr_size = fre_type_to_size(fre_type);
> +	if (!addr_size)
> +		return -EINVAL;
> +
> +	ip_off = ip - sec->sframe_addr - fde->start_addr;
> +
> +	f = (void *)sec->fres_addr + fde->fres_off;
> +
> +	for (i = 0; i < fde->fres_num; i++) {
> +
> +		SFRAME_GET_USER_UNSIGNED(fre_ip_off, f, addr_size);
> +
> +		if (fde_type == SFRAME_FDE_TYPE_PCINC) {
> +			if (fre_ip_off > ip_off)
> +				break;
> +		} else {
> +			/* SFRAME_FDE_TYPE_PCMASK */
> +#if 0 /* sframe v2 */
> +			if (ip_off % fde->rep_size < fre_ip_off)
> +				break;
> +#endif
> +		}
> +
> +		SFRAME_GET_USER_UNSIGNED(fre_info, f, 1);
> +
> +		offset_count = SFRAME_FRE_OFFSET_COUNT(fre_info);
> +		offset_size  = offset_size_enum_to_size(SFRAME_FRE_OFFSET_SIZE(fre_info));
> +
> +		if (!offset_count || !offset_size)
> +			return -EINVAL;
> +
> +		last_f = f;
> +		f += offset_count * offset_size;
> +	}
> +
> +	if (!last_f)
> +		return -EINVAL;
> +
> +	f = last_f;
> +
> +	SFRAME_GET_USER_UNSIGNED(cfa_off, f, offset_size);
> +	offset_count--;
> +
> +	ra_off = sec->ra_off;
> +	if (!ra_off) {
> +		if (!offset_count--)
> +			return -EINVAL;
> +		SFRAME_GET_USER_SIGNED(ra_off, f, offset_size);
> +	}
> +
> +	fp_off = sec->fp_off;
> +	if (!fp_off && offset_count) {
> +		offset_count--;
> +		SFRAME_GET_USER_SIGNED(fp_off, f, offset_size);
> +	}
> +
> +	if (offset_count)
> +		return -EINVAL;
> +
> +	frame->cfa_off = cfa_off;
> +	frame->ra_off = ra_off;
> +	frame->fp_off = fp_off;
> +	frame->use_fp = SFRAME_FRE_CFA_BASE_REG_ID(fre_info) == SFRAME_BASE_REG_FP;
> +
> +	return 0;
> +}
> +
> +int sframe_find(unsigned long ip, struct user_unwind_frame *frame)
> +{
> +	struct mm_struct *mm = current->mm;
> +	struct sframe_section *sec;
> +	struct sframe_fde fde;
> +	int srcu_idx;
> +	int ret = -EINVAL;
> +
> +	srcu_idx = srcu_read_lock(&sframe_srcu);
> +
> +	sec = mtree_load(&mm->sframe_mt, ip);
> +	if (!sec) {
> +		srcu_read_unlock(&sframe_srcu, srcu_idx);
> +		return -EINVAL;
> +	}
> +
> +
> +	ret = find_fde(sec, ip, &fde);
> +	if (ret)
> +		goto err_unlock;
> +
> +	ret = find_fre(sec, &fde, ip, frame);
> +	if (ret)
> +		goto err_unlock;
> +
> +	srcu_read_unlock(&sframe_srcu, srcu_idx);
> +	return 0;
> +
> +err_unlock:
> +	srcu_read_unlock(&sframe_srcu, srcu_idx);
> +	return ret;
> +}
> +
> +static int get_sframe_file(unsigned long sframe_addr, struct sframe_file *file)
> +{
> +	struct mm_struct *mm = current->mm;
> +	struct vm_area_struct *sframe_vma, *text_vma, *vma;
> +	VMA_ITERATOR(vmi, mm, 0);
> +
> +	mmap_read_lock(mm);
> +
> +	sframe_vma = vma_lookup(mm, sframe_addr);
> +	if (!sframe_vma || !sframe_vma->vm_file)
> +		goto err_unlock;
> +
> +	text_vma = NULL;
> +
> +	for_each_vma(vmi, vma) {
> +		if (vma->vm_file != sframe_vma->vm_file)
> +			continue;
> +		if (vma->vm_flags & VM_EXEC) {
> +			if (text_vma) {
> +				/*
> +				 * Multiple EXEC segments in a single file
> +				 * aren't currently supported, is that a thing?
> +				 */
> +				WARN_ON_ONCE(1);
> +				goto err_unlock;
> +			}
> +			text_vma = vma;
> +		}
> +	}
> +
> +	file->sframe_addr	= sframe_addr;
> +	file->text_start	= text_vma->vm_start;
> +	file->text_end		= text_vma->vm_end;
> +
> +	mmap_read_unlock(mm);
> +	return 0;
> +
> +err_unlock:
> +	mmap_read_unlock(mm);
> +	return -EINVAL;
> +}
> +
> +static int validate_sframe_addrs(struct sframe_file *file)
> +{
> +	struct mm_struct *mm = current->mm;
> +	struct vm_area_struct *text_vma;
> +
> +	mmap_read_lock(mm);
> +
> +	if (!vma_lookup(mm, file->sframe_addr))
> +		goto err_unlock;
> +
> +	text_vma = vma_lookup(mm, file->text_start);
> +	if (!(text_vma->vm_flags & VM_EXEC))
> +		goto err_unlock;
> +
> +	if (vma_lookup(mm, file->text_end-1) != text_vma)
> +		goto err_unlock;
> +
> +	mmap_read_unlock(mm);
> +	return 0;
> +
> +err_unlock:
> +	mmap_read_unlock(mm);
> +	return -EINVAL;
> +}
> +
> +int __sframe_add_section(struct sframe_file *file)
> +{
> +	struct maple_tree *sframe_mt = &current->mm->sframe_mt;
> +	struct sframe_section *sec;
> +	struct sframe_header shdr;
> +	unsigned long header_end;
> +	int ret;
> +
> +	if (copy_from_user(&shdr, (void *)file->sframe_addr, sizeof(shdr)))
> +		return -EFAULT;
> +
> +	if (shdr.preamble.magic != SFRAME_MAGIC ||
> +	    shdr.preamble.version != SFRAME_VERSION_1 ||
> +	    (!shdr.preamble.flags & SFRAME_F_FDE_SORTED) ||
> +	    shdr.auxhdr_len || !shdr.num_fdes || !shdr.num_fres ||
> +	    shdr.fdes_off > shdr.fres_off) {
> +		return -EINVAL;
> +	}
> +

I would say that it will be ideal to start the support with 
SFRAME_VERSION_2 onwards, if we have a choice.

The structure SFrame FDE in SFRAME_VERSION_1 was unaligned on-disk.  We 
fixed that in SFRAME_VERSION_2 (Binutils 2.41) by adding some padding as 
you have already noted. For x86_64, its not an issue though, yes.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ