lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3cb5532e-d9af-4045-99f3-9c8944672073@suse.com>
Date: Thu, 23 May 2024 14:11:04 +0200
From: Juergen Gross <jgross@...e.com>
To: Jason Andryuk <jason.andryuk@....com>,
 Boris Ostrovsky <boris.ostrovsky@...cle.com>,
 Thomas Gleixner <tglx@...utronix.de>, Ingo Molnar <mingo@...hat.com>,
 Borislav Petkov <bp@...en8.de>, Dave Hansen <dave.hansen@...ux.intel.com>,
 x86@...nel.org, "H. Peter Anvin" <hpa@...or.com>,
 Stefano Stabellini <sstabellini@...nel.org>,
 Oleksandr Tyshchenko <oleksandr_tyshchenko@...m.com>,
 Paolo Bonzini <pbonzini@...hat.com>
Cc: xen-devel@...ts.xenproject.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 5/5] x86/pvh: Add 64bit relocation page tables

On 10.04.24 21:48, Jason Andryuk wrote:
> The PVH entry point is 32bit.  For a 64bit kernel, the entry point must
> switch to 64bit mode, which requires a set of page tables.  In the past,
> PVH used init_top_pgt.
> 
> This works fine when the kernel is loaded at LOAD_PHYSICAL_ADDR, as the
> page tables are prebuilt for this address.  If the kernel is loaded at a
> different address, they need to be adjusted.
> 
> __startup_64() adjusts the prebuilt page tables for the physical load
> address, but it is 64bit code.  The 32bit PVH entry code can't call it
> to adjust the page tables, so it can't readily be re-used.
> 
> 64bit PVH entry needs page tables set up for identity map, the kernel
> high map and the direct map.  pvh_start_xen() enters identity mapped.
> Inside xen_prepare_pvh(), it jumps through a pv_ops function pointer
> into the highmap.  The direct map is used for __va() on the initramfs
> and other guest physical addresses.
> 
> Add a dedicated set of prebuild page tables for PVH entry.  They are
> adjusted in assembly before loading.
> 
> Add XEN_ELFNOTE_PHYS32_RELOC to indicate support for relocation
> along with the kernel's loading constraints.  The maximum load address,
> KERNEL_IMAGE_SIZE - 1, is determined by a single pvh_level2_ident_pgt
> page.  It could be larger with more pages.
> 
> Signed-off-by: Jason Andryuk <jason.andryuk@....com>
> ---
> Instead of adding 5 pages of prebuilt page tables, they could be
> contructed dynamically in the .bss area.  They are then only used for
> PVH entry and until transitioning to init_top_pgt.  The .bss is later
> cleared.  It's safer to add the dedicated pages, so that is done here.
> ---
>   arch/x86/platform/pvh/head.S | 105 ++++++++++++++++++++++++++++++++++-
>   1 file changed, 104 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
> index c08d08d8cc92..4af3cfbcf2f8 100644
> --- a/arch/x86/platform/pvh/head.S
> +++ b/arch/x86/platform/pvh/head.S
> @@ -21,6 +21,8 @@
>   #include <asm/nospec-branch.h>
>   #include <xen/interface/elfnote.h>
>   
> +#include "../kernel/pgtable_64_helpers.h"
> +
>   	__HEAD
>   
>   /*
> @@ -102,8 +104,47 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
>   	btsl $_EFER_LME, %eax
>   	wrmsr
>   
> +	mov %ebp, %ebx
> +	subl $LOAD_PHYSICAL_ADDR, %ebx /* offset */
> +	jz .Lpagetable_done
> +
> +	/* Fixup page-tables for relocation. */
> +	leal rva(pvh_init_top_pgt)(%ebp), %edi
> +	movl $512, %ecx

Please use PTRS_PER_PGD instead of the literal 512. Similar issue below.

> +2:
> +	testl $_PAGE_PRESENT, 0x00(%edi)
> +	jz 1f
> +	addl %ebx, 0x00(%edi)
> +1:
> +	addl $8, %edi
> +	decl %ecx
> +	jnz 2b
> +
> +	/* L3 ident has a single entry. */
> +	leal rva(pvh_level3_ident_pgt)(%ebp), %edi
> +	addl %ebx, 0x00(%edi)
> +
> +	leal rva(pvh_level3_kernel_pgt)(%ebp), %edi
> +	addl %ebx, (4096 - 16)(%edi)
> +	addl %ebx, (4096 - 8)(%edi)

PAGE_SIZE instead of 4096, please.

> +
> +	/* pvh_level2_ident_pgt is fine - large pages */
> +
> +	/* pvh_level2_kernel_pgt needs adjustment - large pages */
> +	leal rva(pvh_level2_kernel_pgt)(%ebp), %edi
> +	movl $512, %ecx
> +2:
> +	testl $_PAGE_PRESENT, 0x00(%edi)
> +	jz 1f
> +	addl %ebx, 0x00(%edi)
> +1:
> +	addl $8, %edi
> +	decl %ecx
> +	jnz 2b
> +
> +.Lpagetable_done:
>   	/* Enable pre-constructed page tables. */
> -	leal rva(init_top_pgt)(%ebp), %eax
> +	leal rva(pvh_init_top_pgt)(%ebp), %eax
>   	mov %eax, %cr3
>   	mov $(X86_CR0_PG | X86_CR0_PE), %eax
>   	mov %eax, %cr0
> @@ -197,5 +238,67 @@ SYM_DATA_START_LOCAL(early_stack)
>   	.fill BOOT_STACK_SIZE, 1, 0
>   SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end)
>   
> +#ifdef CONFIG_X86_64
> +/*
> + * Xen PVH needs a set of identity mapped and kernel high mapping
> + * page tables.  pvh_start_xen starts running on the identity mapped
> + * page tables, but xen_prepare_pvh calls into the high mapping.
> + * These page tables need to be relocatable and are only used until
> + * startup_64 transitions to init_top_pgt.
> + */
> +SYM_DATA_START_PAGE_ALIGNED(pvh_init_top_pgt)
> +	.quad   pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
> +	.org    pvh_init_top_pgt + L4_PAGE_OFFSET*8, 0

Please add a space before and after the '*'.

> +	.quad   pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
> +	.org    pvh_init_top_pgt + L4_START_KERNEL*8, 0
> +	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
> +	.quad   pvh_level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
> +SYM_DATA_END(pvh_init_top_pgt)
> +
> +SYM_DATA_START_PAGE_ALIGNED(pvh_level3_ident_pgt)
> +	.quad	pvh_level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
> +	.fill	511, 8, 0
> +SYM_DATA_END(pvh_level3_ident_pgt)
> +SYM_DATA_START_PAGE_ALIGNED(pvh_level2_ident_pgt)
> +	/*
> +	 * Since I easily can, map the first 1G.
> +	 * Don't set NX because code runs from these pages.
> +	 *
> +	 * Note: This sets _PAGE_GLOBAL despite whether
> +	 * the CPU supports it or it is enabled.  But,
> +	 * the CPU should ignore the bit.
> +	 */
> +	PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
> +SYM_DATA_END(pvh_level2_ident_pgt)
> +SYM_DATA_START_PAGE_ALIGNED(pvh_level3_kernel_pgt)
> +	.fill	L3_START_KERNEL,8,0

Spaces after the commas.

> +	/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
> +	.quad	pvh_level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
> +	.quad	0 /* no fixmap */
> +SYM_DATA_END(pvh_level3_kernel_pgt)
> +
> +SYM_DATA_START_PAGE_ALIGNED(pvh_level2_kernel_pgt)
> +	/*
> +	 * Kernel high mapping.
> +	 *
> +	 * The kernel code+data+bss must be located below KERNEL_IMAGE_SIZE in
> +	 * virtual address space, which is 1 GiB if RANDOMIZE_BASE is enabled,
> +	 * 512 MiB otherwise.
> +	 *
> +	 * (NOTE: after that starts the module area, see MODULES_VADDR.)
> +	 *
> +	 * This table is eventually used by the kernel during normal runtime.
> +	 * Care must be taken to clear out undesired bits later, like _PAGE_RW
> +	 * or _PAGE_GLOBAL in some cases.
> +	 */
> +	PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE/PMD_SIZE)

Spaces around '/'.

> +SYM_DATA_END(pvh_level2_kernel_pgt)
> +
> +	ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_RELOC,
> +		     .long CONFIG_PHYSICAL_ALIGN;
> +		     .long LOAD_PHYSICAL_ADDR;
> +		     .long KERNEL_IMAGE_SIZE - 1)
> +#endif
> +
>   	ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,
>   	             _ASM_PTR (pvh_start_xen - __START_KERNEL_map))


Juergen

Download attachment "OpenPGP_0xB0DE9DD628BF132F.asc" of type "application/pgp-keys" (3684 bytes)

Download attachment "OpenPGP_signature.asc" of type "application/pgp-signature" (496 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ