[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20221106064434.zrx5wjyzxtjgc2ly@gmail.com>
Date: Sat, 5 Nov 2022 23:44:34 -0700
From: Fangrui Song <i@...kray.me>
To: Pedro Falcato <pedro.falcato@...il.com>
Cc: linux-kernel@...r.kernel.org, Kees Cook <keescook@...omium.org>,
linux-mm@...ck.org, sam@...too.org,
Alexander Viro <viro@...iv.linux.org.uk>,
Eric Biederman <ebiederm@...ssion.com>,
linux-fsdevel@...r.kernel.org, Rich Felker <dalias@...c.org>
Subject: Re: [PATCH] fs/binfmt_elf: Fix memsz > filesz handling
On 2022-11-06, Pedro Falcato wrote:
>The old code for ELF interpreter loading could only handle
>1 memsz > filesz segment. This is incorrect, as evidenced
>by the elf program loading code, which could handle multiple
>such segments.
>
>This patch fixes memsz > filesz handling for elf interpreters
>and refactors interpreter/program BSS clearing into a common
>codepath.
>
>This bug was uncovered on builds of ppc64le musl libc with
>llvm lld 15.0.0, since ppc64 does not allocate file space
>for its .plt.
>
>Cc: Rich Felker <dalias@...c.org>
>Signed-off-by: Pedro Falcato <pedro.falcato@...il.com>
>---
> fs/binfmt_elf.c | 170 ++++++++++++++++--------------------------------
> 1 file changed, 56 insertions(+), 114 deletions(-)
>
>diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
>index 6a11025e585..ca2961d80fa 100644
>--- a/fs/binfmt_elf.c
>+++ b/fs/binfmt_elf.c
>@@ -109,25 +109,6 @@ static struct linux_binfmt elf_format = {
>
> #define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
>
>-static int set_brk(unsigned long start, unsigned long end, int prot)
>-{
>- start = ELF_PAGEALIGN(start);
>- end = ELF_PAGEALIGN(end);
>- if (end > start) {
>- /*
>- * Map the last of the bss segment.
>- * If the header is requesting these pages to be
>- * executable, honour that (ppc32 needs this).
>- */
>- int error = vm_brk_flags(start, end - start,
>- prot & PROT_EXEC ? VM_EXEC : 0);
>- if (error)
>- return error;
>- }
>- current->mm->start_brk = current->mm->brk = end;
>- return 0;
>-}
>-
> /* We need to explicitly zero any fractional pages
> after the data section (i.e. bss). This would
> contain the junk from the file that should not
>@@ -584,6 +565,41 @@ static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state,
> return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp);
> }
>
>+static int zero_bss(unsigned long start, unsigned long end, int prot)
>+{
>+ /*
>+ * First pad the last page from the file up to
>+ * the page boundary, and zero it from elf_bss up to the end of the page.
>+ */
>+ if (padzero(start))
>+ return -EFAULT;
>+
>+ /*
>+ * Next, align both the file and mem bss up to the page size,
>+ * since this is where elf_bss was just zeroed up to, and where
>+ * last_bss will end after the vm_brk_flags() below.
>+ */
>+
>+ start = ELF_PAGEALIGN(start);
>+ end = ELF_PAGEALIGN(end);
>+
>+ /* Finally, if there is still more bss to allocate, do it. */
>+
>+ return (end > start ? vm_brk_flags(start, end - start,
>+ prot & PROT_EXEC ? VM_EXEC : 0) : 0);
>+}
>+
>+static int set_brk(unsigned long start, unsigned long end, int prot)
>+{
>+ int error = zero_bss(start, end, prot);
>+
>+ if (error < 0)
>+ return error;
>+
>+ current->mm->start_brk = current->mm->brk = ELF_PAGEALIGN(end);
>+ return 0;
>+}
>+
> /* This is much more generalized than the library routine read function,
> so we keep this separate. Technically the library read function
> is only provided so that we can read a.out libraries that have
>@@ -597,8 +613,6 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
> struct elf_phdr *eppnt;
> unsigned long load_addr = 0;
> int load_addr_set = 0;
>- unsigned long last_bss = 0, elf_bss = 0;
>- int bss_prot = 0;
> unsigned long error = ~0UL;
> unsigned long total_size;
> int i;
>@@ -662,50 +676,21 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
> goto out;
> }
>
>- /*
>- * Find the end of the file mapping for this phdr, and
>- * keep track of the largest address we see for this.
>- */
>- k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
>- if (k > elf_bss)
>- elf_bss = k;
>+ if (eppnt->p_memsz > eppnt->p_filesz) {
>+ /*
>+ * Handle BSS zeroing and mapping
>+ */
>+ unsigned long start = load_addr + vaddr + eppnt->p_filesz;
>+ unsigned long end = load_addr + vaddr + eppnt->p_memsz;
>
>- /*
>- * Do the same thing for the memory mapping - between
>- * elf_bss and last_bss is the bss section.
>- */
>- k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
>- if (k > last_bss) {
>- last_bss = k;
>- bss_prot = elf_prot;
>+ error = zero_bss(start, end, elf_prot);
>+
>+ if (error < 0)
>+ goto out;
> }
> }
> }
>
>- /*
>- * Now fill out the bss section: first pad the last page from
>- * the file up to the page boundary, and zero it from elf_bss
>- * up to the end of the page.
>- */
>- if (padzero(elf_bss)) {
>- error = -EFAULT;
>- goto out;
>- }
>- /*
>- * Next, align both the file and mem bss up to the page size,
>- * since this is where elf_bss was just zeroed up to, and where
>- * last_bss will end after the vm_brk_flags() below.
>- */
>- elf_bss = ELF_PAGEALIGN(elf_bss);
>- last_bss = ELF_PAGEALIGN(last_bss);
>- /* Finally, if there is still more bss to allocate, do it. */
>- if (last_bss > elf_bss) {
>- error = vm_brk_flags(elf_bss, last_bss - elf_bss,
>- bss_prot & PROT_EXEC ? VM_EXEC : 0);
>- if (error)
>- goto out;
>- }
>-
> error = load_addr;
> out:
> return error;
>@@ -829,8 +814,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
> unsigned long error;
> struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
> struct elf_phdr *elf_property_phdata = NULL;
>- unsigned long elf_bss, elf_brk;
>- int bss_prot = 0;
> int retval, i;
> unsigned long elf_entry;
> unsigned long e_entry;
>@@ -1020,9 +1003,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
> executable_stack);
> if (retval < 0)
> goto out_free_dentry;
>-
>- elf_bss = 0;
>- elf_brk = 0;
>
> start_code = ~0UL;
> end_code = 0;
>@@ -1041,33 +1021,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
> if (elf_ppnt->p_type != PT_LOAD)
> continue;
>
>- if (unlikely (elf_brk > elf_bss)) {
>- unsigned long nbyte;
>-
>- /* There was a PT_LOAD segment with p_memsz > p_filesz
>- before this one. Map anonymous pages, if needed,
>- and clear the area. */
>- retval = set_brk(elf_bss + load_bias,
>- elf_brk + load_bias,
>- bss_prot);
>- if (retval)
>- goto out_free_dentry;
>- nbyte = ELF_PAGEOFFSET(elf_bss);
>- if (nbyte) {
>- nbyte = ELF_MIN_ALIGN - nbyte;
>- if (nbyte > elf_brk - elf_bss)
>- nbyte = elf_brk - elf_bss;
>- if (clear_user((void __user *)elf_bss +
>- load_bias, nbyte)) {
>- /*
>- * This bss-zeroing can fail if the ELF
>- * file specifies odd protections. So
>- * we don't check the return value
>- */
>- }
>- }
>- }
>-
> elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
> !!interpreter, false);
>
>@@ -1211,41 +1164,30 @@ static int load_elf_binary(struct linux_binprm *bprm)
>
> k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
>
>- if (k > elf_bss)
>- elf_bss = k;
>+
>+ if (elf_ppnt->p_memsz > elf_ppnt->p_filesz) {
>+ unsigned long seg_end = elf_ppnt->p_vaddr +
>+ elf_ppnt->p_memsz + load_bias;
>+ retval = set_brk(k + load_bias,
>+ seg_end,
>+ elf_prot);
>+ if (retval)
>+ goto out_free_dentry;
>+ }
>+
> if ((elf_ppnt->p_flags & PF_X) && end_code < k)
> end_code = k;
> if (end_data < k)
> end_data = k;
>- k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
>- if (k > elf_brk) {
>- bss_prot = elf_prot;
>- elf_brk = k;
>- }
> }
>
> e_entry = elf_ex->e_entry + load_bias;
> phdr_addr += load_bias;
>- elf_bss += load_bias;
>- elf_brk += load_bias;
> start_code += load_bias;
> end_code += load_bias;
> start_data += load_bias;
> end_data += load_bias;
>
>- /* Calling set_brk effectively mmaps the pages that we need
>- * for the bss and break sections. We must do this before
>- * mapping in the interpreter, to make sure it doesn't wind
>- * up getting placed where the bss needs to go.
>- */
>- retval = set_brk(elf_bss, elf_brk, bss_prot);
>- if (retval)
>- goto out_free_dentry;
>- if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
>- retval = -EFAULT; /* Nobody gets to see this, but.. */
>- goto out_free_dentry;
>- }
>-
> if (interpreter) {
> elf_entry = load_elf_interp(interp_elf_ex,
> interpreter,
>--
>2.38.1
I have a write-up about the issue: https://maskray.me/blog/2022-11-05-lld-musl-powerpc64
and used a `.section .toc,"aw",@nobits` trick to verify that this patch
makes two RW PT_LOAD (p_filesz < p_memsz) work.
Reviewed-by: Fangrui Song <i@...kray.me>
Tested-by: Fangrui Song <i@...kray.me>
Powered by blists - more mailing lists