[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <202405080052.21E569F@keescook>
Date: Wed, 8 May 2024 01:04:33 -0700
From: Kees Cook <keescook@...omium.org>
To: Vignesh Balasubramanian <vigbalas@....com>
Cc: linux-kernel@...r.kernel.org, linux-toolchains@...r.kernel.org,
mpe@...erman.id.au, npiggin@...il.com, christophe.leroy@...roup.eu,
aneesh.kumar@...nel.org, naveen.n.rao@...ux.ibm.com,
ebiederm@...ssion.com, x86@...nel.org,
linuxppc-dev@...ts.ozlabs.org, linux-mm@...ck.org, bpetkov@....com,
jinisusan.george@....com, matz@...e.de, binutils@...rceware.org,
jhb@...ebsd.org, felix.willgerodt@...el.com
Subject: Re: [PATCH v2 1/1] x86/elf: Add a new .note section containing
Xfeatures information to x86 core files
On Tue, May 07, 2024 at 03:23:31PM +0530, Vignesh Balasubramanian wrote:
> Add a new .note section containing type, size, offset and flags of
> every xfeature that is present.
>
> This information will be used by the debuggers to understand the XSAVE
> layout of the machine where the core file is dumped, and to read XSAVE
> registers, especially during cross-platform debugging.
>
> Some background:
>
> The XSAVE layouts of modern AMD and Intel CPUs differ, especially since
> Memory Protection Keys and the AVX-512 features have been inculcated into
> the AMD CPUs.
> This is since AMD never adopted (and hence never left room in the XSAVE
> layout for) the Intel MPX feature. Tools like GDB had assumed a fixed XSAVE
> layout matching that of Intel (based on the XCR0 mask).
> Hence, the core dumps from AMD CPUs didn't match the known size for the
> XCR0 mask. This resulted in GDB and other tools not being able to access
> the values of the AVX-512 and PKRU registers on AMD CPUs.
> To solve this, an interim solution has been accepted into GDB, and is
> already a part of GDB 14, thanks to these series of patches
> [ https://sourceware.org/pipermail/gdb-patches/2023-March/198081.html ].
> But this patch series depends on heuristics based on the total XSAVE
> register set size and the XCR0 mask to infer the layouts of the various
> register blocks for core dumps, and hence, is not a foolproof mechanism to
> determine the layout of the XSAVE area.
>
> Hence this new core dump note has been proposed as a more sturdy mechanism
> to allow GDB/LLDB and other relevant tools to determine the layout of the
> XSAVE area of the machine where the corefile was dumped.
> The new core dump note (which is being proposed as a per-process .note
> section), NT_X86_XSAVE_LAYOUT (0x205) contains an array of structures.
> Each structure describes an individual extended feature containing offset,
> size and flags (that is obtained through CPUID instruction) in a format
> roughly matching the follow C structure:
>
> struct xfeat_component {
> u32 xfeat_type;
> u32 xfeat_sz;
> u32 xfeat_off;
> u32 xfeat_flags;
> };
>
> Co-developed-by: Jini Susan George <jinisusan.george@....com>
> Signed-off-by: Jini Susan George <jinisusan.george@....com>
> Signed-off-by: Vignesh Balasubramanian <vigbalas@....com>
> ---
> v1->v2: Removed kernel internal defn dependency, code improvements
>
> arch/x86/Kconfig | 1 +
> arch/x86/include/asm/elf.h | 34 +++++++++
> arch/x86/kernel/fpu/xstate.c | 141 +++++++++++++++++++++++++++++++++++
> fs/binfmt_elf.c | 4 +-
> include/uapi/linux/elf.h | 1 +
> 5 files changed, 179 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 928820e61cb5..cc67daab3396 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -105,6 +105,7 @@ config X86
> select ARCH_HAS_DEBUG_WX
> select ARCH_HAS_ZONE_DMA_SET if EXPERT
> select ARCH_HAVE_NMI_SAFE_CMPXCHG
> + select ARCH_HAVE_EXTRA_ELF_NOTES
> select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
> select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
> select ARCH_MIGHT_HAVE_PC_PARPORT
> diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
> index 1fb83d47711f..5952574db64b 100644
> --- a/arch/x86/include/asm/elf.h
> +++ b/arch/x86/include/asm/elf.h
> @@ -13,6 +13,40 @@
> #include <asm/auxvec.h>
> #include <asm/fsgsbase.h>
>
> +struct xfeat_component {
> + u32 xfeat_type;
> + u32 xfeat_sz;
> + u32 xfeat_off;
> + u32 xfeat_flags;
> +} __packed;
> +
> +_Static_assert(sizeof(struct xfeat_component)%4 == 0, "xfeat_component is not aligned");
> +
> +enum custom_feature {
> + FEATURE_XSAVE_FP = 0,
> + FEATURE_XSAVE_SSE = 1,
> + FEATURE_XSAVE_YMM = 2,
> + FEATURE_XSAVE_BNDREGS = 3,
> + FEATURE_XSAVE_BNDCSR = 4,
> + FEATURE_XSAVE_OPMASK = 5,
> + FEATURE_XSAVE_ZMM_Hi256 = 6,
> + FEATURE_XSAVE_Hi16_ZMM = 7,
> + FEATURE_XSAVE_PT = 8,
> + FEATURE_XSAVE_PKRU = 9,
> + FEATURE_XSAVE_PASID = 10,
> + FEATURE_XSAVE_CET_USER = 11,
> + FEATURE_XSAVE_CET_SHADOW_STACK = 12,
> + FEATURE_XSAVE_HDC = 13,
> + FEATURE_XSAVE_UINTR = 14,
> + FEATURE_XSAVE_LBR = 15,
> + FEATURE_XSAVE_HWP = 16,
> + FEATURE_XSAVE_XTILE_CFG = 17,
> + FEATURE_XSAVE_XTILE_DATA = 18,
> + FEATURE_MAX,
> + FEATURE_XSAVE_EXTENDED_START = FEATURE_XSAVE_YMM,
> + FEATURE_XSAVE_EXTENDED_END = FEATURE_XSAVE_XTILE_DATA,
> +};
> +
> typedef unsigned long elf_greg_t;
>
> #define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t))
> diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
> index 33a214b1a4ce..3d1c3c96e34d 100644
> --- a/arch/x86/kernel/fpu/xstate.c
> +++ b/arch/x86/kernel/fpu/xstate.c
> @@ -13,6 +13,7 @@
> #include <linux/seq_file.h>
> #include <linux/proc_fs.h>
> #include <linux/vmalloc.h>
> +#include <linux/coredump.h>
>
> #include <asm/fpu/api.h>
> #include <asm/fpu/regset.h>
> @@ -87,6 +88,8 @@ static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init;
> #define XSTATE_FLAG_SUPERVISOR BIT(0)
> #define XSTATE_FLAG_ALIGNED64 BIT(1)
>
> +static const char owner_name[] = "LINUX";
This needs to move under the CONFIG_COREDUMP below (so says the build
bots).
> +
> /*
> * Return whether the system supports a given xfeature.
> *
> @@ -1837,3 +1840,141 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
> return 0;
> }
> #endif /* CONFIG_PROC_PID_ARCH_STATUS */
> +
> +#ifdef CONFIG_COREDUMP
> +static int get_sub_leaf(int custom_xfeat)
Why is this "int"? I don't imagine there are negative features?
> +{
> + switch (custom_xfeat) {
> + case FEATURE_XSAVE_YMM: return XFEATURE_YMM;
> + case FEATURE_XSAVE_BNDREGS: return XFEATURE_BNDREGS;
> + case FEATURE_XSAVE_BNDCSR: return XFEATURE_BNDCSR;
> + case FEATURE_XSAVE_OPMASK: return XFEATURE_OPMASK;
> + case FEATURE_XSAVE_ZMM_Hi256: return XFEATURE_ZMM_Hi256;
> + case FEATURE_XSAVE_Hi16_ZMM: return XFEATURE_Hi16_ZMM;
> + case FEATURE_XSAVE_PT: return XFEATURE_PT_UNIMPLEMENTED_SO_FAR;
> + case FEATURE_XSAVE_PKRU: return XFEATURE_PKRU;
> + case FEATURE_XSAVE_PASID: return XFEATURE_PASID;
> + case FEATURE_XSAVE_CET_USER: return XFEATURE_CET_USER;
> + case FEATURE_XSAVE_CET_SHADOW_STACK: return XFEATURE_CET_KERNEL_UNUSED;
> + case FEATURE_XSAVE_HDC: return XFEATURE_RSRVD_COMP_13;
> + case FEATURE_XSAVE_UINTR: return XFEATURE_RSRVD_COMP_14;
> + case FEATURE_XSAVE_LBR: return XFEATURE_LBR;
> + case FEATURE_XSAVE_HWP: return XFEATURE_RSRVD_COMP_16;
> + case FEATURE_XSAVE_XTILE_CFG: return XFEATURE_XTILE_CFG;
> + case FEATURE_XSAVE_XTILE_DATA: return XFEATURE_XTILE_DATA;
> + default:
> + pr_warn_ratelimited("Not a valid XSAVE Feature.");
This isn't very friendly; it's keeping secrets about the unknown value. :)
Also it's missing a newline. How about:
pr_warn_ratelimited("Not a known XSAVE Feature: %u\n",
custom_xfeat);
> + return 0;
> + }
> +}
> +
> +/*
> + * Dump type, size, offset and flag values for every xfeature that is present.
> + */
> +static int dump_xsave_layout_desc(struct coredump_params *cprm)
> +{
> + u32 supported_features = 0;
> + struct xfeat_component xc;
> + u32 eax, ebx, ecx, edx;
> + int num_records = 0;
> + int sub_leaf = 0;
> + int i;
> +
> + /* Find supported extended features */
> + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
> + supported_features = eax;
> +
> + for (i = FEATURE_XSAVE_EXTENDED_START;
> + i <= FEATURE_XSAVE_EXTENDED_END; i++) {
> + sub_leaf = get_sub_leaf(i);
> + if (!sub_leaf)
> + continue;
> + if (supported_features & (1U << sub_leaf)) {
> + cpuid_count(XSTATE_CPUID, sub_leaf, &eax, &ebx, &ecx, &edx);
> + xc.xfeat_type = i;
> + xc.xfeat_sz = eax;
> + xc.xfeat_off = ebx;
> + /* Reserved for future use */
> + xc.xfeat_flags = 0;
> +
> + if (!dump_emit(cprm, &xc,
> + sizeof(struct xfeat_component)))
> + return 0;
> + num_records++;
> + }
> + }
> +
> + return num_records;
> +}
> +
> +static int get_xsave_desc_size(void)
This can return u32: never negative.
> +{
> + int supported_features = 0;
> + int xfeatures_count = 0;
> + u32 eax, ebx, ecx, edx;
> + int sub_leaf = 0;
> + int i;
"i" can be u32 and then we can fix the get_sub_leaf() arg type.
> +
> + /* Find supported extended features */
> + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
> + supported_features = eax;
> +
> + for (i = FEATURE_XSAVE_EXTENDED_START;
> + i <= FEATURE_XSAVE_EXTENDED_END; i++) {
> + sub_leaf = get_sub_leaf(i);
> + if (!sub_leaf)
> + continue;
> + if (supported_features & (1U << sub_leaf))
> + xfeatures_count++;
> + }
> +
> + return xfeatures_count * (sizeof(struct xfeat_component));
> +}
> +
> +int elf_coredump_extra_notes_write(struct coredump_params *cprm)
> +{
> + int num_records = 0;
> + struct elf_note en;
> +
> + en.n_namesz = sizeof(owner_name);
> + en.n_descsz = get_xsave_desc_size();
> + en.n_type = NT_X86_XSAVE_LAYOUT;
> +
> + if (!dump_emit(cprm, &en, sizeof(en)))
> + return 1;
> + if (!dump_emit(cprm, owner_name, en.n_namesz))
> + return 1;
> + if (!dump_align(cprm, 4))
> + return 1;
> +
> + num_records = dump_xsave_layout_desc(cprm);
> + if (!num_records) {
> + pr_warn_ratelimited("Error adding XSTATE layout ELF note. XSTATE buffer in the core file will be unparseable.");
Missing trailing newline.
> + return 1;
> + }
> +
> + /* Total size should be equal to the number of records */
> + if ((sizeof(struct xfeat_component) * num_records) != en.n_descsz) {
> + pr_warn_ratelimited("Error adding XSTATE layout ELF note. The size of the .note section does not match with the total size of the records.");
Same.
> + return 1;
> + }
> +
> + return 0;
> +}
> +
> +/*
> + * Return the size of new note.
> + */
> +int elf_coredump_extra_notes_size(void)
> +{
> + int size = 0;
> +
> + /* NOTE Header */
> + size += sizeof(struct elf_note);
> + /* name + align */
> + size += roundup(sizeof(owner_name), 4);
> + size += get_xsave_desc_size();
> +
> + return size;
> +}
> +#endif
Since it's a long if/endif, add: /* CONFIG_COREDUMP */ after the endif
here.
> diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
> index 5397b552fbeb..833bcb7e957b 100644
> --- a/fs/binfmt_elf.c
> +++ b/fs/binfmt_elf.c
> @@ -2000,7 +2000,7 @@ static int elf_core_dump(struct coredump_params *cprm)
> {
> size_t sz = info.size;
>
> - /* For cell spufs */
> + /* For cell spufs and x86 xstate */
> sz += elf_coredump_extra_notes_size();
>
> phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
> @@ -2064,7 +2064,7 @@ static int elf_core_dump(struct coredump_params *cprm)
> if (!write_note_info(&info, cprm))
> goto end_coredump;
>
> - /* For cell spufs */
> + /* For cell spufs and x86 xstate */
> if (elf_coredump_extra_notes_write(cprm))
> goto end_coredump;
>
> diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
> index b54b313bcf07..e30a9b47dc87 100644
> --- a/include/uapi/linux/elf.h
> +++ b/include/uapi/linux/elf.h
> @@ -411,6 +411,7 @@ typedef struct elf64_shdr {
> #define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */
> /* Old binutils treats 0x203 as a CET state */
> #define NT_X86_SHSTK 0x204 /* x86 SHSTK state */
> +#define NT_X86_XSAVE_LAYOUT 0x205 /* XSAVE layout description */
> #define NT_S390_HIGH_GPRS 0x300 /* s390 upper register halves */
> #define NT_S390_TIMER 0x301 /* s390 timer register */
> #define NT_S390_TODCMP 0x302 /* s390 TOD clock comparator register */
> --
> 2.34.1
>
Otherwise looks good. I'd like to see feedback from Intel folks too.
Thanks for working on this!
-Kees
--
Kees Cook
Powered by blists - more mailing lists