[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <b67e091d-7be0-7f8b-4ef2-0a97936a1d34@ghiti.fr>
Date: Mon, 10 Jan 2022 09:05:02 +0100
From: Alexandre ghiti <alex@...ti.fr>
To: Palmer Dabbelt <palmer@...belt.com>
Cc: alexandre.ghiti@...onical.com, mpe@...erman.id.au,
benh@...nel.crashing.org, paulus@...ba.org,
Paul Walmsley <paul.walmsley@...ive.com>,
aou@...s.berkeley.edu, linuxppc-dev@...ts.ozlabs.org,
linux-kernel@...r.kernel.org, linux-riscv@...ts.infradead.org
Subject: Re: [PATCH v7 1/3] riscv: Introduce CONFIG_RELOCATABLE
Hi Palmer,
Do you think this could go in for-next?
Thanks,
Alex
On 12/6/21 10:44, Alexandre ghiti wrote:
> @Palmer, can I do anything for that to be pulled in 5.17?
>
> Thanks,
>
> Alex
>
> On 10/27/21 07:04, Alexandre ghiti wrote:
>> Hi Palmer,
>>
>> On 10/26/21 11:29 PM, Palmer Dabbelt wrote:
>>> On Sat, 09 Oct 2021 10:20:20 PDT (-0700), alex@...ti.fr wrote:
>>>> Arf, I have sent this patchset with the wrong email address. @Palmer
>>>> tell me if you want me to resend it correctly.
>>> Sorry for being kind of slow here. It's fine: there's a "From:" in
>>> the patch, and git picks those up so it'll match the signed-off-by
>>> line. I send pretty much all my patches that way, as I never managed
>>> to get my Google address working correctly.
>>>
>>>> Thanks,
>>>>
>>>> Alex
>>>>
>>>> On 10/9/21 7:12 PM, Alexandre Ghiti wrote:
>>>>> From: Alexandre Ghiti <alex@...ti.fr>
>>>>>
>>>>> This config allows to compile 64b kernel as PIE and to relocate it at
>>>>> any virtual address at runtime: this paves the way to KASLR.
>>>>> Runtime relocation is possible since relocation metadata are
>>>>> embedded into
>>>>> the kernel.
>>> IMO this should really be user selectable, at a bare minimum so it's
>>> testable.
>>> I just sent along a patch to do that (my power's off at home, so email
>>> is a bit
>>> wacky right now).
>>>
>>> I haven't put this on for-next yet as I'm not sure if you had a fix
>>> for the
>>> kasan issue (which IIUC would conflict with this).
>>
>> The kasan issue only revealed that I need to move the kasan shadow
>> memory around with sv48 support, that's not related to the relocatable
>> kernel.
>>
>> Thanks,
>>
>> Alex
>>
>>
>>>>> Note that relocating at runtime introduces an overhead even if the
>>>>> kernel is loaded at the same address it was linked at and that the
>>>>> compiler
>>>>> options are those used in arm64 which uses the same RELA relocation
>>>>> format.
>>>>>
>>>>> Signed-off-by: Alexandre Ghiti <alex@...ti.fr>
>>>>> ---
>>>>> arch/riscv/Kconfig | 12 ++++++++
>>>>> arch/riscv/Makefile | 7 +++--
>>>>> arch/riscv/kernel/vmlinux.lds.S | 6 ++++
>>>>> arch/riscv/mm/Makefile | 4 +++
>>>>> arch/riscv/mm/init.c | 54
>>>>> ++++++++++++++++++++++++++++++++-
>>>>> 5 files changed, 80 insertions(+), 3 deletions(-)
>>>>>
>>>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>>>>> index ea16fa2dd768..043ba92559fa 100644
>>>>> --- a/arch/riscv/Kconfig
>>>>> +++ b/arch/riscv/Kconfig
>>>>> @@ -213,6 +213,18 @@ config PGTABLE_LEVELS
>>>>> config LOCKDEP_SUPPORT
>>>>> def_bool y
>>>>>
>>>>> +config RELOCATABLE
>>>>> + bool
>>>>> + depends on MMU && 64BIT && !XIP_KERNEL
>>>>> + help
>>>>> + This builds a kernel as a Position Independent Executable
>>>>> (PIE),
>>>>> + which retains all relocation metadata required to
>>>>> relocate the
>>>>> + kernel binary at runtime to a different virtual address
>>>>> than the
>>>>> + address it was linked at.
>>>>> + Since RISCV uses the RELA relocation format, this
>>>>> requires a
>>>>> + relocation pass at runtime even if the kernel is loaded
>>>>> at the
>>>>> + same address it was linked at.
>>>>> +
>>>>> source "arch/riscv/Kconfig.socs"
>>>>> source "arch/riscv/Kconfig.erratas"
>>>>>
>>>>> diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
>>>>> index 0eb4568fbd29..2f509915f246 100644
>>>>> --- a/arch/riscv/Makefile
>>>>> +++ b/arch/riscv/Makefile
>>>>> @@ -9,9 +9,12 @@
>>>>> #
>>>>>
>>>>> OBJCOPYFLAGS := -O binary
>>>>> -LDFLAGS_vmlinux :=
>>>>> +ifeq ($(CONFIG_RELOCATABLE),y)
>>>>> + LDFLAGS_vmlinux += -shared -Bsymbolic -z notext -z norelro
>>>>> + KBUILD_CFLAGS += -fPIE
>>>>> +endif
>>>>> ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
>>>>> - LDFLAGS_vmlinux := --no-relax
>>>>> + LDFLAGS_vmlinux += --no-relax
>>>>> KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
>>>>> CC_FLAGS_FTRACE := -fpatchable-function-entry=8
>>>>> endif
>>>>> diff --git a/arch/riscv/kernel/vmlinux.lds.S
>>>>> b/arch/riscv/kernel/vmlinux.lds.S
>>>>> index 5104f3a871e3..862a8c09723c 100644
>>>>> --- a/arch/riscv/kernel/vmlinux.lds.S
>>>>> +++ b/arch/riscv/kernel/vmlinux.lds.S
>>>>> @@ -133,6 +133,12 @@ SECTIONS
>>>>>
>>>>> BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
>>>>>
>>>>> + .rela.dyn : ALIGN(8) {
>>>>> + __rela_dyn_start = .;
>>>>> + *(.rela .rela*)
>>>>> + __rela_dyn_end = .;
>>>>> + }
>>>>> +
>>>>> #ifdef CONFIG_EFI
>>>>> . = ALIGN(PECOFF_SECTION_ALIGNMENT);
>>>>> __pecoff_data_virt_size = ABSOLUTE(. - __pecoff_text_end);
>>>>> diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
>>>>> index 7ebaef10ea1b..2d33ec574bbb 100644
>>>>> --- a/arch/riscv/mm/Makefile
>>>>> +++ b/arch/riscv/mm/Makefile
>>>>> @@ -1,6 +1,10 @@
>>>>> # SPDX-License-Identifier: GPL-2.0-only
>>>>>
>>>>> CFLAGS_init.o := -mcmodel=medany
>>>>> +ifdef CONFIG_RELOCATABLE
>>>>> +CFLAGS_init.o += -fno-pie
>>>>> +endif
>>>>> +
>>>>> ifdef CONFIG_FTRACE
>>>>> CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE)
>>>>> CFLAGS_REMOVE_cacheflush.o = $(CC_FLAGS_FTRACE)
>>>>> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
>>>>> index c0cddf0fc22d..42041c12d496 100644
>>>>> --- a/arch/riscv/mm/init.c
>>>>> +++ b/arch/riscv/mm/init.c
>>>>> @@ -20,6 +20,9 @@
>>>>> #include <linux/dma-map-ops.h>
>>>>> #include <linux/crash_dump.h>
>>>>> #include <linux/hugetlb.h>
>>>>> +#ifdef CONFIG_RELOCATABLE
>>>>> +#include <linux/elf.h>
>>>>> +#endif
>>>>>
>>>>> #include <asm/fixmap.h>
>>>>> #include <asm/tlbflush.h>
>>>>> @@ -103,7 +106,7 @@ static void __init print_vm_layout(void)
>>>>> print_mlm("lowmem", (unsigned long)PAGE_OFFSET,
>>>>> (unsigned long)high_memory);
>>>>> #ifdef CONFIG_64BIT
>>>>> - print_mlm("kernel", (unsigned long)KERNEL_LINK_ADDR,
>>>>> + print_mlm("kernel", (unsigned long)kernel_map.virt_addr,
>>>>> (unsigned long)ADDRESS_SPACE_END);
>>>>> #endif
>>>>> }
>>>>> @@ -518,6 +521,44 @@ static __init pgprot_t pgprot_from_va(uintptr_t
>>>>> va)
>>>>> #error "setup_vm() is called from head.S before relocate so it
>>>>> should not use absolute addressing."
>>>>> #endif
>>>>>
>>>>> +#ifdef CONFIG_RELOCATABLE
>>>>> +extern unsigned long __rela_dyn_start, __rela_dyn_end;
>>>>> +
>>>>> +static void __init relocate_kernel(void)
>>>>> +{
>>>>> + Elf64_Rela *rela = (Elf64_Rela *)&__rela_dyn_start;
>>>>> + /*
>>>>> + * This holds the offset between the linked virtual address and
>>>>> the
>>>>> + * relocated virtual address.
>>>>> + */
>>>>> + uintptr_t reloc_offset = kernel_map.virt_addr -
>>>>> KERNEL_LINK_ADDR;
>>>>> + /*
>>>>> + * This holds the offset between kernel linked virtual
>>>>> address and
>>>>> + * physical address.
>>>>> + */
>>>>> + uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR -
>>>>> kernel_map.phys_addr;
>>>>> +
>>>>> + for ( ; rela < (Elf64_Rela *)&__rela_dyn_end; rela++) {
>>>>> + Elf64_Addr addr = (rela->r_offset -
>>>>> va_kernel_link_pa_offset);
>>>>> + Elf64_Addr relocated_addr = rela->r_addend;
>>>>> +
>>>>> + if (rela->r_info != R_RISCV_RELATIVE)
>>>>> + continue;
>>>>> +
>>>>> + /*
>>>>> + * Make sure to not relocate vdso symbols like rt_sigreturn
>>>>> + * which are linked from the address 0 in vmlinux since
>>>>> + * vdso symbol addresses are actually used as an offset from
>>>>> + * mm->context.vdso in VDSO_OFFSET macro.
>>>>> + */
>>>>> + if (relocated_addr >= KERNEL_LINK_ADDR)
>>>>> + relocated_addr += reloc_offset;
>>>>> +
>>>>> + *(Elf64_Addr *)addr = relocated_addr;
>>>>> + }
>>>>> +}
>>>>> +#endif /* CONFIG_RELOCATABLE */
>>>>> +
>>>>> #ifdef CONFIG_XIP_KERNEL
>>>>> static void __init create_kernel_page_table(pgd_t *pgdir,
>>>>> __always_unused bool early)
>>>>> @@ -625,6 +666,17 @@ asmlinkage void __init setup_vm(uintptr_t
>>>>> dtb_pa)
>>>>> BUG_ON((kernel_map.virt_addr + kernel_map.size) >
>>>>> ADDRESS_SPACE_END - SZ_4K);
>>>>> #endif
>>>>>
>>>>> +#ifdef CONFIG_RELOCATABLE
>>>>> + /*
>>>>> + * Early page table uses only one PGDIR, which makes it possible
>>>>> + * to map PGDIR_SIZE aligned on PGDIR_SIZE: if the relocation
>>>>> offset
>>>>> + * makes the kernel cross over a PGDIR_SIZE boundary, raise a
>>>>> bug
>>>>> + * since a part of the kernel would not get mapped.
>>>>> + */
>>>>> + BUG_ON(PGDIR_SIZE - (kernel_map.virt_addr & (PGDIR_SIZE - 1)) <
>>>>> kernel_map.size);
>>>>> + relocate_kernel();
>>>>> +#endif
>>>>> +
>>>>> pt_ops.alloc_pte = alloc_pte_early;
>>>>> pt_ops.get_pte_virt = get_pte_virt_early;
>>>>> #ifndef __PAGETABLE_PMD_FOLDED
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@...ts.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
Powered by blists - more mailing lists