lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ab46f9a28693c10995f9628557bd212e@kernel.org>
Date:   Tue, 24 Nov 2020 13:09:21 +0000
From:   Marc Zyngier <maz@...nel.org>
To:     David Brazdil <dbrazdil@...gle.com>
Cc:     kvmarm@...ts.cs.columbia.edu, linux-arm-kernel@...ts.infradead.org,
        linux-kernel@...r.kernel.org, James Morse <james.morse@....com>,
        Julien Thierry <julien.thierry.kdev@...il.com>,
        Suzuki K Poulose <suzuki.poulose@....com>,
        Catalin Marinas <catalin.marinas@....com>,
        Will Deacon <will@...nel.org>,
        Mark Rutland <mark.rutland@....com>,
        Andrew Scull <ascull@...gle.com>,
        Ard Biesheuvel <ardb@...nel.org>, kernel-team@...roid.com
Subject: Re: [RFC PATCH 2/6] kvm: arm64: Fix up RELA relocations in hyp
 code/data

On 2020-11-19 16:25, David Brazdil wrote:
> KVM nVHE code runs under a different VA mapping than the kernel, hence
> so far it relied only on PC-relative addressing to avoid accidentally
> using a relocated kernel VA from a constant pool (see hyp_symbol_addr).
> 
> So as to reduce the possibility of a programmer error, fixup the
> relocated addresses instead. Let the kernel relocate them to kernel VA
> first, but then iterate over them again, filter those that point to hyp
> code/data and convert the kernel VA to hyp VA.
> 
> This is done after kvm_compute_layout and before apply_alternatives.
> 
> Signed-off-by: David Brazdil <dbrazdil@...gle.com>
> ---
>  arch/arm64/include/asm/kvm_mmu.h |  1 +
>  arch/arm64/kernel/smp.c          |  4 +-
>  arch/arm64/kvm/va_layout.c       | 76 ++++++++++++++++++++++++++++++++
>  3 files changed, 80 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_mmu.h 
> b/arch/arm64/include/asm/kvm_mmu.h
> index 5168a0c516ae..e5226f7e4732 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -105,6 +105,7 @@ alternative_cb_end
>  void kvm_update_va_mask(struct alt_instr *alt,
>  			__le32 *origptr, __le32 *updptr, int nr_inst);
>  void kvm_compute_layout(void);
> +void kvm_fixup_hyp_relocations(void);
> 
>  static __always_inline unsigned long __kern_hyp_va(unsigned long v)
>  {
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index 18e9727d3f64..30241afc2c93 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -434,8 +434,10 @@ static void __init hyp_mode_check(void)
>  			   "CPU: CPUs started in inconsistent modes");
>  	else
>  		pr_info("CPU: All CPU(s) started at EL1\n");
> -	if (IS_ENABLED(CONFIG_KVM))
> +	if (IS_ENABLED(CONFIG_KVM)) {
>  		kvm_compute_layout();
> +		kvm_fixup_hyp_relocations();
> +	}
>  }
> 
>  void __init smp_cpus_done(unsigned int max_cpus)
> diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
> index d8cc51bd60bf..b80fab974896 100644
> --- a/arch/arm64/kvm/va_layout.c
> +++ b/arch/arm64/kvm/va_layout.c
> @@ -10,6 +10,7 @@
>  #include <asm/alternative.h>
>  #include <asm/debug-monitors.h>
>  #include <asm/insn.h>
> +#include <asm/kvm_asm.h>
>  #include <asm/kvm_mmu.h>
>  #include <asm/memory.h>
> 
> @@ -82,6 +83,81 @@ __init void kvm_compute_layout(void)
>  	init_hyp_physvirt_offset();
>  }
> 
> +#define __load_elf_u64(s)					\
> +	({							\
> +		extern u64 s;					\
> +		u64 val;					\
> +								\
> +		asm ("ldr %0, =%1" : "=r"(val) : "S"(&s));	\
> +		val;						\
> +	})

I'm not sure I get the rational about the naming here. None of this
has much to do with ELF, but seems to just load a value from a
constant pool.

> +
> +static bool __is_within_bounds(u64 addr, char *start, char *end)
> +{
> +	return start <= (char*)addr && (char*)addr < end;
> +}
> +
> +static bool __is_in_hyp_section(u64 addr)
> +{
> +	return __is_within_bounds(addr, __hyp_text_start, __hyp_text_end) ||
> +	       __is_within_bounds(addr, __hyp_rodata_start, __hyp_rodata_end) 
> ||
> +	       __is_within_bounds(addr,
> +				  CHOOSE_NVHE_SYM(__per_cpu_start),
> +				  CHOOSE_NVHE_SYM(__per_cpu_end));
> +}
> +
> +static void __fixup_hyp_rel(u64 addr)
> +{
> +	u64 *ptr, kern_va, hyp_va;
> +
> +	/* Adjust the relocation address taken from ELF for KASLR. */
> +	addr += kaslr_offset();
> +
> +	/* Skip addresses not in any of the hyp sections. */
> +	if (!__is_in_hyp_section(addr))
> +		return;
> +
> +	/* Get the LM alias of the relocation address. */
> +	ptr = (u64*)kvm_ksym_ref((void*)addr);

Why the casting? We should be perfectly fine without.

nit: we really need to change the name of this helper, it doesn't have
anything to do with symbols anymore. And actually, lm_alias() *is* the
right thing to use here (we don't relocate anything on VHE).

> +
> +	/*
> +	 * Read the value at the relocation address. It has already been
> +	 * relocated to the actual kernel kimg VA.
> +	 */
> +	kern_va = (u64)kvm_ksym_ref((void*)*ptr);

Same comment.

> +
> +	/* Convert to hyp VA. */
> +	hyp_va = __early_kern_hyp_va(kern_va);
> +
> +	/* Store hyp VA at the relocation address. */
> +	*ptr = __early_kern_hyp_va(kern_va);
> +}
> +
> +static void __fixup_hyp_rela(void)
> +{
> +	Elf64_Rela *rel;
> +	size_t i, n;
> +
> +	rel = (Elf64_Rela*)(kimage_vaddr + __load_elf_u64(__rela_offset));
> +	n = __load_elf_u64(__rela_size) / sizeof(*rel);
> +
> +	for (i = 0; i < n; ++i)
> +		__fixup_hyp_rel(rel[i].r_offset);
> +}
> +
> +/*
> + * The kernel relocated pointers to kernel VA. Iterate over 
> relocations in
> + * the hypervisor ELF sections and convert them to hyp VA. This avoids 
> the
> + * need to only use PC-relative addressing in hyp.
> + */
> +__init void kvm_fixup_hyp_relocations(void)
> +{
> +	if (!IS_ENABLED(CONFIG_RELOCATABLE) || has_vhe())

What do we do if CONFIG_RELOCATABLE is not selected? As far as I can 
tell,
bad things will happen...

I'm also worried that at this stage, the kernel is broken, until you
remove the other bits involved in runtime offsetting pointers.

Thanks,

         M.
-- 
Jazz is not dead. It just smells funny...

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ