[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <d01798a0-5817-4645-8c8c-d61dcf668c25@roeck-us.net>
Date: Wed, 31 Jul 2024 15:22:53 -0700
From: Guenter Roeck <linux@...ck-us.net>
To: Peter Zijlstra <peterz@...radead.org>,
Thomas Gleixner <tglx@...utronix.de>
Cc: Linus Torvalds <torvalds@...ux-foundation.org>,
Jens Axboe <axboe@...nel.dk>, Andy Lutomirski <luto@...nel.org>,
Ingo Molnar <mingo@...hat.com>, Peter Anvin <hpa@...or.com>,
Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
the arch/x86 maintainers <x86@...nel.org>
Subject: Re: Linux 6.11-rc1
On 7/31/24 14:20, Peter Zijlstra wrote:
> On Wed, Jul 31, 2024 at 07:26:04PM +0200, Thomas Gleixner wrote:
>> On Wed, Jul 31 2024 at 18:51, Peter Zijlstra wrote:
>>> On Wed, Jul 31, 2024 at 06:31:05PM +0200, Peter Zijlstra wrote:
>>> Thomas, this all still relies on the full text section being PMD mapped,
>>> and since we don't have ALIGN_ENTRY_TEXT_END and _etext has PAGE_SIZE
>>> alignment, can't have a PAGE mapped tail which then doesn't get cloned?
>>>
>>> Do we want to make pto_clone_entry_text() use PTI_LEVEL_KERNEL_IMAGE
>>> such that it will clone whatever it has?
>>
>> Yes, I think so.
>
> The alternative is ripping that level thing out entirely, and simply
> duplicate anything we find in the page-tables.
>
The patch below (on top of the previous one, because otherwise it doesn't
apply) causes qemu to bail out hard, with
...
[ 3.658327] sr 2:0:0:0: Attached scsi generic sg0 type 5
[ 3.858040] sched_clock: Marking stable (3834034034, 23728553)->(3865222956, -7460369)
[ 3.861469] registered taskstats version 1
[ 3.861584] Loading compiled-in X.509 certificates
[ 4.082031] Btrfs loaded, zoned=no, fsverity=no
[ 4.096034] cryptomgr_test (69) used greatest stack depth: 6136 bytes left
No backtrace or other message, it just exits immediately.
Guenter
> We could add something like:
>
> WARN_ON_ONCE(IS_ENABLED(CONFIG_X86_64));
>
> in the PTE path, but do we really care?
>
> ---
> --- a/arch/x86/mm/pti.c
> +++ b/arch/x86/mm/pti.c
> @@ -47,16 +47,6 @@
> #define __GFP_NOTRACK 0
> #endif
>
> -/*
> - * Define the page-table levels we clone for user-space on 32
> - * and 64 bit.
> - */
> -#ifdef CONFIG_X86_64
> -#define PTI_LEVEL_KERNEL_IMAGE PTI_CLONE_PMD
> -#else
> -#define PTI_LEVEL_KERNEL_IMAGE PTI_CLONE_PTE
> -#endif
> -
> static void __init pti_print_if_insecure(const char *reason)
> {
> if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
> @@ -294,14 +284,7 @@ static void __init pti_setup_vsyscall(vo
> static void __init pti_setup_vsyscall(void) { }
> #endif
>
> -enum pti_clone_level {
> - PTI_CLONE_PMD,
> - PTI_CLONE_PTE,
> -};
> -
> -static void
> -pti_clone_pgtable(unsigned long start, unsigned long end,
> - enum pti_clone_level level)
> +static void pti_clone_pgtable(unsigned long start, unsigned long end)
> {
> unsigned long addr;
>
> @@ -341,7 +324,7 @@ pti_clone_pgtable(unsigned long start, u
> continue;
> }
>
> - if (pmd_leaf(*pmd) || level == PTI_CLONE_PMD) {
> + if (pmd_leaf(*pmd)) {
> target_pmd = pti_user_pagetable_walk_pmd(addr);
> if (WARN_ON(!target_pmd))
> return;
> @@ -375,37 +358,33 @@ pti_clone_pgtable(unsigned long start, u
> *target_pmd = *pmd;
>
> addr = round_up(addr + 1, PMD_SIZE);
> + continue;
> + }
>
> - } else if (level == PTI_CLONE_PTE) {
> -
> - /* Walk the page-table down to the pte level */
> - pte = pte_offset_kernel(pmd, addr);
> - if (pte_none(*pte)) {
> - addr = round_up(addr + 1, PAGE_SIZE);
> - continue;
> - }
> -
> - /* Only clone present PTEs */
> - if (WARN_ON(!(pte_flags(*pte) & _PAGE_PRESENT)))
> - return;
> + /* Walk the page-table down to the pte level */
> + pte = pte_offset_kernel(pmd, addr);
> + if (pte_none(*pte)) {
> + addr = round_up(addr + 1, PAGE_SIZE);
> + continue;
> + }
>
> - /* Allocate PTE in the user page-table */
> - target_pte = pti_user_pagetable_walk_pte(addr);
> - if (WARN_ON(!target_pte))
> - return;
> + /* Only clone present PTEs */
> + if (WARN_ON(!(pte_flags(*pte) & _PAGE_PRESENT)))
> + return;
>
> - /* Set GLOBAL bit in both PTEs */
> - if (boot_cpu_has(X86_FEATURE_PGE))
> - *pte = pte_set_flags(*pte, _PAGE_GLOBAL);
> + /* Allocate PTE in the user page-table */
> + target_pte = pti_user_pagetable_walk_pte(addr);
> + if (WARN_ON(!target_pte))
> + return;
>
> - /* Clone the PTE */
> - *target_pte = *pte;
> + /* Set GLOBAL bit in both PTEs */
> + if (boot_cpu_has(X86_FEATURE_PGE))
> + *pte = pte_set_flags(*pte, _PAGE_GLOBAL);
>
> - addr = round_up(addr + 1, PAGE_SIZE);
> + /* Clone the PTE */
> + *target_pte = *pte;
>
> - } else {
> - BUG();
> - }
> + addr = round_up(addr + 1, PAGE_SIZE);
> }
> }
>
> @@ -475,7 +454,7 @@ static void __init pti_clone_user_shared
> start = CPU_ENTRY_AREA_BASE;
> end = start + (PAGE_SIZE * CPU_ENTRY_AREA_PAGES);
>
> - pti_clone_pgtable(start, end, PTI_CLONE_PMD);
> + pti_clone_pgtable(start, end);
> }
> #endif /* CONFIG_X86_64 */
>
> @@ -495,8 +474,7 @@ static void __init pti_setup_espfix64(vo
> static void pti_clone_entry_text(void)
> {
> pti_clone_pgtable((unsigned long) __entry_text_start,
> - (unsigned long) __entry_text_end,
> - PTI_CLONE_PMD);
> + (unsigned long) __entry_text_end);
> }
>
> /*
> @@ -571,7 +549,7 @@ static void pti_clone_kernel_text(void)
> * pti_set_kernel_image_nonglobal() did to clear the
> * global bit.
> */
> - pti_clone_pgtable(start, end_clone, PTI_LEVEL_KERNEL_IMAGE);
> + pti_clone_pgtable(start, end_clone);
>
> /*
> * pti_clone_pgtable() will set the global bit in any PMDs
Powered by blists - more mailing lists