lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAMzpN2h6S69bOLXCUhmkVJErvoKxq-wPmfoaqR7eGHYLgirn+Q@mail.gmail.com>
Date: Wed, 10 Apr 2024 17:00:10 -0400
From: Brian Gerst <brgerst@...il.com>
To: Jason Andryuk <jason.andryuk@....com>
Cc: Juergen Gross <jgross@...e.com>, Boris Ostrovsky <boris.ostrovsky@...cle.com>, 
	Thomas Gleixner <tglx@...utronix.de>, Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>, 
	Dave Hansen <dave.hansen@...ux.intel.com>, x86@...nel.org, 
	"H. Peter Anvin" <hpa@...or.com>, Stefano Stabellini <sstabellini@...nel.org>, 
	Oleksandr Tyshchenko <oleksandr_tyshchenko@...m.com>, Paolo Bonzini <pbonzini@...hat.com>, 
	xen-devel@...ts.xenproject.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 2/5] x86/pvh: Make PVH entrypoint PIC for x86-64

On Wed, Apr 10, 2024 at 3:50 PM Jason Andryuk <jason.andryuk@....com> wrote:
>
> The PVH entrypoint is 32bit non-PIC code running the uncompressed
> vmlinux at its load address CONFIG_PHYSICAL_START - default 0x1000000
> (16MB).  The kernel is loaded at that physical address inside the VM by
> the VMM software (Xen/QEMU).
>
> When running a Xen PVH Dom0, the host reserved addresses are mapped 1-1
> into the PVH container.  There exist system firmwares (Coreboot/EDK2)
> with reserved memory at 16MB.  This creates a conflict where the PVH
> kernel cannot be loaded at that address.
>
> Modify the PVH entrypoint to be position-indepedent to allow flexibility
> in load address.  Only the 64bit entry path is converted.  A 32bit
> kernel is not PIC, so calling into other parts of the kernel, like
> xen_prepare_pvh() and mk_pgtable_32(), don't work properly when
> relocated.
>
> This makes the code PIC, but the page tables need to be updated as well
> to handle running from the kernel high map.
>
> The UNWIND_HINT_END_OF_STACK is to silence:
> vmlinux.o: warning: objtool: pvh_start_xen+0x7f: unreachable instruction
> after the lret into 64bit code.
>
> Signed-off-by: Jason Andryuk <jason.andryuk@....com>
> ---
> ---
>  arch/x86/platform/pvh/head.S | 44 ++++++++++++++++++++++++++++--------
>  1 file changed, 34 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
> index f7235ef87bc3..bb1e582e32b1 100644
> --- a/arch/x86/platform/pvh/head.S
> +++ b/arch/x86/platform/pvh/head.S
> @@ -7,6 +7,7 @@
>         .code32
>         .text
>  #define _pa(x)          ((x) - __START_KERNEL_map)
> +#define rva(x)          ((x) - pvh_start_xen)
>
>  #include <linux/elfnote.h>
>  #include <linux/init.h>
> @@ -54,7 +55,25 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
>         UNWIND_HINT_END_OF_STACK
>         cld
>
> -       lgdt (_pa(gdt))
> +       /*
> +        * See the comment for startup_32 for more details.  We need to
> +        * execute a call to get the execution address to be position
> +        * independent, but we don't have a stack.  Save and restore the
> +        * magic field of start_info in ebx, and use that as the stack.
> +        */
> +       mov  (%ebx), %eax
> +       leal 4(%ebx), %esp
> +       ANNOTATE_INTRA_FUNCTION_CALL
> +       call 1f
> +1:     popl %ebp
> +       mov  %eax, (%ebx)
> +       subl $rva(1b), %ebp
> +       movl $0, %esp
> +
> +       leal rva(gdt)(%ebp), %eax
> +       leal rva(gdt_start)(%ebp), %ecx
> +       movl %ecx, 2(%eax)
> +       lgdt (%eax)
>
>         mov $PVH_DS_SEL,%eax
>         mov %eax,%ds
> @@ -62,14 +81,14 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
>         mov %eax,%ss
>
>         /* Stash hvm_start_info. */
> -       mov $_pa(pvh_start_info), %edi
> +       leal rva(pvh_start_info)(%ebp), %edi
>         mov %ebx, %esi
> -       mov _pa(pvh_start_info_sz), %ecx
> +       movl rva(pvh_start_info_sz)(%ebp), %ecx
>         shr $2,%ecx
>         rep
>         movsl
>
> -       mov $_pa(early_stack_end), %esp
> +       leal rva(early_stack_end)(%ebp), %esp
>
>         /* Enable PAE mode. */
>         mov %cr4, %eax
> @@ -84,28 +103,33 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
>         wrmsr
>
>         /* Enable pre-constructed page tables. */
> -       mov $_pa(init_top_pgt), %eax
> +       leal rva(init_top_pgt)(%ebp), %eax
>         mov %eax, %cr3
>         mov $(X86_CR0_PG | X86_CR0_PE), %eax
>         mov %eax, %cr0
>
>         /* Jump to 64-bit mode. */
> -       ljmp $PVH_CS_SEL, $_pa(1f)
> +       pushl $PVH_CS_SEL
> +       leal  rva(1f)(%ebp), %eax
> +       pushl %eax
> +       lretl
>
>         /* 64-bit entry point. */
>         .code64
>  1:
> +       UNWIND_HINT_END_OF_STACK
> +
>         /* Set base address in stack canary descriptor. */
>         mov $MSR_GS_BASE,%ecx
> -       mov $_pa(canary), %eax
> +       leal rva(canary)(%ebp), %eax

Since this is in 64-bit mode, RIP-relative addressing can be used.

>         xor %edx, %edx
>         wrmsr
>
>         call xen_prepare_pvh
>
>         /* startup_64 expects boot_params in %rsi. */
> -       mov $_pa(pvh_bootparams), %rsi
> -       mov $_pa(startup_64), %rax
> +       lea rva(pvh_bootparams)(%ebp), %rsi
> +       lea rva(startup_64)(%ebp), %rax

RIP-relative here too.

>         ANNOTATE_RETPOLINE_SAFE
>         jmp *%rax
>
> @@ -143,7 +167,7 @@ SYM_CODE_END(pvh_start_xen)
>         .balign 8
>  SYM_DATA_START_LOCAL(gdt)
>         .word gdt_end - gdt_start
> -       .long _pa(gdt_start)
> +       .long _pa(gdt_start) /* x86-64 will overwrite if relocated. */
>         .word 0
>  SYM_DATA_END(gdt)
>  SYM_DATA_START_LOCAL(gdt_start)
> --
> 2.44.0
>
>

Brian Gerst

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ