[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <05e9c473-e8da-4424-9322-70da66f73fb6@t-8ch.de>
Date: Sun, 1 Feb 2026 17:42:01 +0100
From: Thomas Weißschuh <linux@...ssschuh.net>
To: Daniel Palmer <daniel@...ngy.jp>
Cc: w@....eu, kees@...nel.org, linux-kernel@...r.kernel.org
Subject: Re: [RFC PATCH 3/9] tools/nolibc: Add basic ELF self-relocation
support for static PIE
On 2026-01-31 16:44:34+0900, Daniel Palmer wrote:
> Currently nolibc programs cannot be compiled with -static-pie.
> Which is basically no shared libraries, no interpreter, but contain
> relocation information in the ELF to allow the program to be fixed
> up to run at the address that the kernel loaded it to.
>
> There might be use cases for static PIE but mine is for nommu.
> The ELF FDPIC loader can actually load normal ELFs is long as they
> can be relocated.
>
> This very basic implementation does the following:
>
> - Works out if we are PIE and need to be relocated. ELF type == ET_DYN
> - Works out if we are static PIE, have no interpreter, and need to
> relocate ourselves.
> - Calculates the base address using the location of the program
> headers. This is probably not correct.
> - Finds the ELF relocation data.
> - Calls an arch specific function to handle each of the relocations.
>
> Note that from testing a lot of archs don't produce static PIE
> binaries with the -static-pie option and you need to compile with
> -pie -Wl,--no-dynamic-linker to get a static PIE binary.
>
> Currently REL and RELA formats are supported.
>
> Signed-off-by: Daniel Palmer <daniel@...ngy.jp>
> ---
> tools/include/nolibc/Makefile | 1 +
> tools/include/nolibc/crt.h | 7 +
> tools/include/nolibc/reloc.h | 240 ++++++++++++++++++++++++++++++++++
> 3 files changed, 248 insertions(+)
> create mode 100644 tools/include/nolibc/reloc.h
>
> diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile
> index 8118e22844f1..2b968a097854 100644
> --- a/tools/include/nolibc/Makefile
> +++ b/tools/include/nolibc/Makefile
> @@ -38,6 +38,7 @@ all_files := \
> math.h \
> nolibc.h \
> poll.h \
> + reloc.h \
> sched.h \
> signal.h \
> stackprotector.h \
> diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h
> index 899062c00fb7..3c1c8d738ac7 100644
> --- a/tools/include/nolibc/crt.h
> +++ b/tools/include/nolibc/crt.h
> @@ -10,6 +10,7 @@
> #ifndef NOLIBC_NO_RUNTIME
>
> #include "compiler.h"
> +#include "reloc.h"
>
> char **environ __attribute__((weak));
> const unsigned long *_auxv __attribute__((weak));
> @@ -100,6 +101,12 @@ void __no_stack_protector _start_c(long *sp)
> for (auxv = (void *)envp; *auxv++;)
> ;
>
> + /*
> + * Do relocation if required and supported, this must happen before any
> + * global variables are updated or used.
> + */
> + _relocate(auxv);
> +
> __start_c(argc, argv, envp, auxv);
> }
>
> diff --git a/tools/include/nolibc/reloc.h b/tools/include/nolibc/reloc.h
> new file mode 100644
> index 000000000000..98c42af6f845
> --- /dev/null
> +++ b/tools/include/nolibc/reloc.h
> @@ -0,0 +1,240 @@
> +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
> +/*
> + * Self relocation support for NOLIBC
> + * Copyright (C) 2026 Daniel Palmer<daniel@...ngy.jp>
> + *
> + * This allows a PIE compiled nolibc binary relocate itself
> + * instead of relying on a dynamic linker. So called "static PIE".
> + * With some care binaries produced with this relocation code
> + * can run via the FDPIC ELF loader on nommu systems.
> + *
> + * I am not expert in all of the different options for GCC but
> + * this works for me for x86:
> + * gcc -nostdlib -fpie -Os -include <path to nolibc.h> \
> + * -static-pie -o helloworld helloworld.c
> + *
> + * For some targets -static-pie doesn't work but setting PIE and
> + * then disabling the linker results in a static-pie:
> + * gcc -nostdlib -fpie -Os -include <path to nolibc.h> \
> + * -Wl,--no-dynamic-linker -pie -o helloworld helloworld.c
These last few paragraphs don't belong here IMO.
> + */
> +
> +#ifndef _NOLIBC_RELOC_H
> +#define _NOLIBC_RELOC_H
> +
> +#ifdef NOLIBC_ARCH_HAS_RELOC
Internal symbols which are not meant to be provided by the user should
use an underscore prefix.
> +#include "elf.h"
> +#include <linux/auxvec.h>
> +
> +#ifdef NOLIBC_ARCH_ELF32
> +#define elf_ehdr Elf32_Ehdr
> +#define elf_phdr Elf32_Phdr
> +/* 32bit ARM, x86 uses REL instead of RELA */
> +#ifdef NOLIBC_ARCH_ELF_REL
> +#define elf_rel Elf32_Rel
> +#else
> +#define elf_rela Elf32_Rela
> +#endif
> +#define elf_dyn Elf32_Dyn
> +#define elf_addr Elf32_Addr
> +#define elf_r_type(_x) ELF32_R_TYPE(_x)
> +#else
> +#define elf_ehdr Elf64_Ehdr
> +#define elf_phdr Elf64_Phdr
> +#define elf_dyn Elf64_Dyn
> +#define elf_rela Elf64_Rela
> +#define elf_addr Elf64_Addr
> +#define elf_r_type(_x) ELF64_R_TYPE(_x)
> +#endif
These symbols are polluting the global namespace.
They should also use some underscore and 'nolibc' prefix.
> +
> +#ifdef NOLIBC_ARCH_ELF_REL
> +/*
> + * Your arch needs to provide this to actually handle doing each of the
> + * relocations if it uses the REL format.
> + */
> +static int __relocate_rel(unsigned long base, elf_rel *entry);
> +
> +/* Generic implementation of R_x_RELATIVE for REL */
> +#define __relocate_rel_relative(_base, _entry) \
> + do { \
> + elf_addr *_addr; \
> + int addend; \
> + \
> + _addr = (elf_addr *)(_base + _entry->r_offset); \
> + addend = *_addr; \
> + *_addr = _base + addend; \
> + } while (0)
> +
> +static int __relocate(unsigned long base,
> + unsigned long rel_off,
> + unsigned long rel_count)
> +{
> + elf_rel *rel = (elf_rel *)(base + rel_off);
> + unsigned long i;
> +
> + for (i = 0; i < rel_count; i++) {
> + if (__relocate_rel(base, &rel[i]))
> + return -1;
> + }
> +
> + return 0;
> +}
> +#else
> +/*
> + * Your arch needs to provide this to actually handle doing each of the
> + * relocations if it uses the RELA format.
> + */
> +static int __relocate_rela(unsigned long base, elf_rela *entry);
> +
> +/* Generic implementation of R_x_RELATIVE for RELA */
> +#define __relocate_rela_relative(_base, _entry) \
> + do { \
> + elf_addr *_addr; \
> + \
> + _addr = (elf_addr *)(_base + _entry->r_offset); \
> + *_addr = (elf_addr) (_base + _entry->r_addend); \
> + } while (0)
Does this need to be a macro?
> +
> +static int __relocate(unsigned long base,
> + unsigned long rela_off,
> + unsigned long rela_count)
> +{
> + elf_rela *rela = (elf_rela *)(base + rela_off);
> + unsigned long i;
> +
> + for (i = 0; i < rela_count; i++) {
> + if (__relocate_rela(base, &rela[i]))
> + return -1;
> + }
> +
> + return 0;
> +}
> +#endif
> +
> +static void _relocate(const unsigned long *auxv)
> +{
> + unsigned long rel_rela_count = 0;
> + unsigned long rel_rela_off = 0;
> + unsigned long phdr_addr = 0;
> + unsigned long phdr_num = 0;
> + unsigned long phdr_sz = 0;
> + elf_phdr *phdr_dyn = NULL;
> + unsigned long base;
> + unsigned long i;
> + int remaining;
> + elf_ehdr *ehdr;
> + elf_dyn *dyn;
> +
> + for (remaining = 3; remaining; ) {
> + if (!auxv[0] && !auxv[1])
> + break;
> +
> + switch (auxv[0]) {
> + case AT_NOTELF:
> + return;
> +
> + case AT_PHDR:
> + phdr_addr = auxv[1];
> + remaining--;
> + break;
> +
> + case AT_PHNUM:
> + phdr_num = auxv[1];
> + remaining--;
> + break;
> +
> + /*
> + * Not sure if this is even needed, should match
> + * the size of the program header type?
> + */
> + case AT_PHENT:
> + phdr_sz = auxv[1];
> + remaining--;
> + break;
> + }
I think we either ignore this or abort on any mismatch.
> +
> + auxv += 2;
> + }
> +
> + if (remaining)
> + goto failed;
> +
> + /*
> + * Everything I could find said that the way to find the base for relocation
> + * should be done by searching for the first PT_LOAD and then using the ofset
> + * of that against the adressed of the program headers. So FIXME.
> + */
> + base = phdr_addr - sizeof(elf_ehdr);
What was wrong with AT_BASE?
> +
> + /* Check that we are PIE */
> + ehdr = (elf_ehdr *) base;
> + if (ehdr->e_type != ET_DYN)
> + return;
> +
> + for (i = 0, remaining = 1; (i < phdr_num) && remaining; i++) {
> + elf_phdr *phdr = (elf_phdr *)(phdr_addr + (phdr_sz * i));
> +
> + switch (phdr->p_type) {
> + case PT_INTERP:
> + /* Interp was set, we were relocated already?, return */
> + return;
> + case PT_DYNAMIC:
> + phdr_dyn = phdr;
> + remaining--;
> + break;
> + }
> + }
> +
> + if (!phdr_dyn)
> + goto failed;
> +
> + dyn = (elf_dyn *)(base + phdr_dyn->p_offset);
> + for (; dyn->d_tag != DT_NULL; dyn++) {
> + switch (dyn->d_tag) {
> +#ifdef NOLIBC_ARCH_ELF_REL
> + case DT_REL:
> + rel_rela_off = dyn->d_un.d_ptr;
> + break;
> + case DT_RELCOUNT:
> + rel_rela_count = dyn->d_un.d_val;
> + break;
> + }
> +#else
> + case DT_RELA:
> + rel_rela_off = dyn->d_un.d_ptr;
> + break;
> + case DT_RELACOUNT:
> + rel_rela_count = dyn->d_un.d_val;
> + break;
> + }
> +#endif
> +
> + /* Got what we came for, exit loop */
> + if (rel_rela_off && rel_rela_count)
> + break;
> + }
> +
> + if (!rel_rela_off || !rel_rela_count)
> + goto failed;
> +
> + if (__relocate(base, rel_rela_off, rel_rela_count))
> + goto failed;
> +
> + return;
> +
> +failed:
> + __builtin_trap();
> +}
> +#else
> +static void _relocate(const unsigned long *auxv __attribute__((unused)))
> +{
> + /*
> + * Maybe if you build a program that needs relocation
> + * but it's not supported detect that and trap here.
> + * But for now trust that people know what they are doing.
> + */
> +}
> +#endif /* NOLIBC_ARCH_HAS_RELOC */
> +
> +#endif /* _NOLIBC_RELOC_H */
> --
> 2.51.0
>
Powered by blists - more mailing lists