[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3cb5532e-d9af-4045-99f3-9c8944672073@suse.com>
Date: Thu, 23 May 2024 14:11:04 +0200
From: Juergen Gross <jgross@...e.com>
To: Jason Andryuk <jason.andryuk@....com>,
Boris Ostrovsky <boris.ostrovsky@...cle.com>,
Thomas Gleixner <tglx@...utronix.de>, Ingo Molnar <mingo@...hat.com>,
Borislav Petkov <bp@...en8.de>, Dave Hansen <dave.hansen@...ux.intel.com>,
x86@...nel.org, "H. Peter Anvin" <hpa@...or.com>,
Stefano Stabellini <sstabellini@...nel.org>,
Oleksandr Tyshchenko <oleksandr_tyshchenko@...m.com>,
Paolo Bonzini <pbonzini@...hat.com>
Cc: xen-devel@...ts.xenproject.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 5/5] x86/pvh: Add 64bit relocation page tables
On 10.04.24 21:48, Jason Andryuk wrote:
> The PVH entry point is 32bit. For a 64bit kernel, the entry point must
> switch to 64bit mode, which requires a set of page tables. In the past,
> PVH used init_top_pgt.
>
> This works fine when the kernel is loaded at LOAD_PHYSICAL_ADDR, as the
> page tables are prebuilt for this address. If the kernel is loaded at a
> different address, they need to be adjusted.
>
> __startup_64() adjusts the prebuilt page tables for the physical load
> address, but it is 64bit code. The 32bit PVH entry code can't call it
> to adjust the page tables, so it can't readily be re-used.
>
> 64bit PVH entry needs page tables set up for identity map, the kernel
> high map and the direct map. pvh_start_xen() enters identity mapped.
> Inside xen_prepare_pvh(), it jumps through a pv_ops function pointer
> into the highmap. The direct map is used for __va() on the initramfs
> and other guest physical addresses.
>
> Add a dedicated set of prebuild page tables for PVH entry. They are
> adjusted in assembly before loading.
>
> Add XEN_ELFNOTE_PHYS32_RELOC to indicate support for relocation
> along with the kernel's loading constraints. The maximum load address,
> KERNEL_IMAGE_SIZE - 1, is determined by a single pvh_level2_ident_pgt
> page. It could be larger with more pages.
>
> Signed-off-by: Jason Andryuk <jason.andryuk@....com>
> ---
> Instead of adding 5 pages of prebuilt page tables, they could be
> contructed dynamically in the .bss area. They are then only used for
> PVH entry and until transitioning to init_top_pgt. The .bss is later
> cleared. It's safer to add the dedicated pages, so that is done here.
> ---
> arch/x86/platform/pvh/head.S | 105 ++++++++++++++++++++++++++++++++++-
> 1 file changed, 104 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
> index c08d08d8cc92..4af3cfbcf2f8 100644
> --- a/arch/x86/platform/pvh/head.S
> +++ b/arch/x86/platform/pvh/head.S
> @@ -21,6 +21,8 @@
> #include <asm/nospec-branch.h>
> #include <xen/interface/elfnote.h>
>
> +#include "../kernel/pgtable_64_helpers.h"
> +
> __HEAD
>
> /*
> @@ -102,8 +104,47 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
> btsl $_EFER_LME, %eax
> wrmsr
>
> + mov %ebp, %ebx
> + subl $LOAD_PHYSICAL_ADDR, %ebx /* offset */
> + jz .Lpagetable_done
> +
> + /* Fixup page-tables for relocation. */
> + leal rva(pvh_init_top_pgt)(%ebp), %edi
> + movl $512, %ecx
Please use PTRS_PER_PGD instead of the literal 512. Similar issue below.
> +2:
> + testl $_PAGE_PRESENT, 0x00(%edi)
> + jz 1f
> + addl %ebx, 0x00(%edi)
> +1:
> + addl $8, %edi
> + decl %ecx
> + jnz 2b
> +
> + /* L3 ident has a single entry. */
> + leal rva(pvh_level3_ident_pgt)(%ebp), %edi
> + addl %ebx, 0x00(%edi)
> +
> + leal rva(pvh_level3_kernel_pgt)(%ebp), %edi
> + addl %ebx, (4096 - 16)(%edi)
> + addl %ebx, (4096 - 8)(%edi)
PAGE_SIZE instead of 4096, please.
> +
> + /* pvh_level2_ident_pgt is fine - large pages */
> +
> + /* pvh_level2_kernel_pgt needs adjustment - large pages */
> + leal rva(pvh_level2_kernel_pgt)(%ebp), %edi
> + movl $512, %ecx
> +2:
> + testl $_PAGE_PRESENT, 0x00(%edi)
> + jz 1f
> + addl %ebx, 0x00(%edi)
> +1:
> + addl $8, %edi
> + decl %ecx
> + jnz 2b
> +
> +.Lpagetable_done:
> /* Enable pre-constructed page tables. */
> - leal rva(init_top_pgt)(%ebp), %eax
> + leal rva(pvh_init_top_pgt)(%ebp), %eax
> mov %eax, %cr3
> mov $(X86_CR0_PG | X86_CR0_PE), %eax
> mov %eax, %cr0
> @@ -197,5 +238,67 @@ SYM_DATA_START_LOCAL(early_stack)
> .fill BOOT_STACK_SIZE, 1, 0
> SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end)
>
> +#ifdef CONFIG_X86_64
> +/*
> + * Xen PVH needs a set of identity mapped and kernel high mapping
> + * page tables. pvh_start_xen starts running on the identity mapped
> + * page tables, but xen_prepare_pvh calls into the high mapping.
> + * These page tables need to be relocatable and are only used until
> + * startup_64 transitions to init_top_pgt.
> + */
> +SYM_DATA_START_PAGE_ALIGNED(pvh_init_top_pgt)
> + .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
> + .org pvh_init_top_pgt + L4_PAGE_OFFSET*8, 0
Please add a space before and after the '*'.
> + .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
> + .org pvh_init_top_pgt + L4_START_KERNEL*8, 0
> + /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
> + .quad pvh_level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
> +SYM_DATA_END(pvh_init_top_pgt)
> +
> +SYM_DATA_START_PAGE_ALIGNED(pvh_level3_ident_pgt)
> + .quad pvh_level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
> + .fill 511, 8, 0
> +SYM_DATA_END(pvh_level3_ident_pgt)
> +SYM_DATA_START_PAGE_ALIGNED(pvh_level2_ident_pgt)
> + /*
> + * Since I easily can, map the first 1G.
> + * Don't set NX because code runs from these pages.
> + *
> + * Note: This sets _PAGE_GLOBAL despite whether
> + * the CPU supports it or it is enabled. But,
> + * the CPU should ignore the bit.
> + */
> + PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
> +SYM_DATA_END(pvh_level2_ident_pgt)
> +SYM_DATA_START_PAGE_ALIGNED(pvh_level3_kernel_pgt)
> + .fill L3_START_KERNEL,8,0
Spaces after the commas.
> + /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
> + .quad pvh_level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
> + .quad 0 /* no fixmap */
> +SYM_DATA_END(pvh_level3_kernel_pgt)
> +
> +SYM_DATA_START_PAGE_ALIGNED(pvh_level2_kernel_pgt)
> + /*
> + * Kernel high mapping.
> + *
> + * The kernel code+data+bss must be located below KERNEL_IMAGE_SIZE in
> + * virtual address space, which is 1 GiB if RANDOMIZE_BASE is enabled,
> + * 512 MiB otherwise.
> + *
> + * (NOTE: after that starts the module area, see MODULES_VADDR.)
> + *
> + * This table is eventually used by the kernel during normal runtime.
> + * Care must be taken to clear out undesired bits later, like _PAGE_RW
> + * or _PAGE_GLOBAL in some cases.
> + */
> + PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE/PMD_SIZE)
Spaces around '/'.
> +SYM_DATA_END(pvh_level2_kernel_pgt)
> +
> + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_RELOC,
> + .long CONFIG_PHYSICAL_ALIGN;
> + .long LOAD_PHYSICAL_ADDR;
> + .long KERNEL_IMAGE_SIZE - 1)
> +#endif
> +
> ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,
> _ASM_PTR (pvh_start_xen - __START_KERNEL_map))
Juergen
Download attachment "OpenPGP_0xB0DE9DD628BF132F.asc" of type "application/pgp-keys" (3684 bytes)
Download attachment "OpenPGP_signature.asc" of type "application/pgp-signature" (496 bytes)
Powered by blists - more mailing lists