lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 18 Apr 2023 11:22:25 +0100
From:   Ryan Roberts <ryan.roberts@....com>
To:     Ard Biesheuvel <ardb@...nel.org>, linux-kernel@...r.kernel.org
Cc:     linux-arm-kernel@...ts.infradead.org,
        Catalin Marinas <catalin.marinas@....com>,
        Will Deacon <will@...nel.org>, Marc Zyngier <maz@...nel.org>,
        Mark Rutland <mark.rutland@....com>,
        Anshuman Khandual <anshuman.khandual@....com>,
        Kees Cook <keescook@...omium.org>
Subject: Re: [PATCH v3 35/60] arm64: mm: Use 48-bit virtual addressing for the
 permanent ID map

On 07/03/2023 14:04, Ard Biesheuvel wrote:
> Even though we support loading kernels anywhere in 48-bit addressable
> physical memory, we create the ID maps based on the number of levels
> that we happened to configure for the kernel VA and user VA spaces.
> 
> The reason for this is that the PGD/PUD/PMD based classification of
> translation levels, along with the associated folding when the number of
> levels is less than 5, does not permit creating a page table hierarchy
> of a set number of levels. This means that, for instance, on 39-bit VA
> kernels we need to configure an additional level above PGD level on the
> fly, and 36-bit VA kernels still only support 47-bit virtual addressing
> with this trick applied.
> 
> Now that we have a separate helper to populate page table hierarchies
> that does not define the levels in terms of PUDS/PMDS/etc at all, let's
> reuse it to create the permanent ID map with a fixed VA size of 48 bits.
> 
> Signed-off-by: Ard Biesheuvel <ardb@...nel.org>
> ---
>  arch/arm64/include/asm/kernel-pgtable.h |  2 ++
>  arch/arm64/kernel/head.S                |  5 +++
>  arch/arm64/kvm/mmu.c                    | 15 +++------
>  arch/arm64/mm/mmu.c                     | 32 +++++++++++---------
>  arch/arm64/mm/proc.S                    |  9 ++----
>  5 files changed, 31 insertions(+), 32 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
> index 50b5c145358a5d8e..2a2c80ffe59e5307 100644
> --- a/arch/arm64/include/asm/kernel-pgtable.h
> +++ b/arch/arm64/include/asm/kernel-pgtable.h
> @@ -35,6 +35,8 @@
>  #define SWAPPER_PGTABLE_LEVELS	(CONFIG_PGTABLE_LEVELS)
>  #endif
>  
> +#define IDMAP_LEVELS		ARM64_HW_PGTABLE_LEVELS(48)
> +#define IDMAP_ROOT_LEVEL	(4 - IDMAP_LEVELS)
>  
>  /*
>   * If KASLR is enabled, then an offset K is added to the kernel address
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index e45fd99e8ab4272a..fc6a4076d826b728 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -727,6 +727,11 @@ SYM_FUNC_START_LOCAL(__no_granule_support)
>  SYM_FUNC_END(__no_granule_support)
>  
>  SYM_FUNC_START_LOCAL(__primary_switch)
> +	mrs		x1, tcr_el1
> +	mov		x2, #64 - VA_BITS
> +	tcr_set_t0sz	x1, x2
> +	msr		tcr_el1, x1
> +
>  	adrp	x1, reserved_pg_dir
>  	adrp	x2, init_idmap_pg_dir
>  	bl	__enable_mmu
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index 7113587222ffe8e1..d64be7b5f6692e8b 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -1687,16 +1687,9 @@ int __init kvm_mmu_init(u32 *hyp_va_bits)
>  	BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
>  
>  	/*
> -	 * The ID map may be configured to use an extended virtual address
> -	 * range. This is only the case if system RAM is out of range for the
> -	 * currently configured page size and VA_BITS_MIN, in which case we will
> -	 * also need the extended virtual range for the HYP ID map, or we won't
> -	 * be able to enable the EL2 MMU.
> -	 *
> -	 * However, in some cases the ID map may be configured for fewer than
> -	 * the number of VA bits used by the regular kernel stage 1. This
> -	 * happens when VA_BITS=52 and the kernel image is placed in PA space
> -	 * below 48 bits.
> +	 * The ID map is always configured for 48 bits of translation, which
> +	 * may be fewer than the number of VA bits used by the regular kernel
> +	 * stage 1, when VA_BITS=52.
>  	 *
>  	 * At EL2, there is only one TTBR register, and we can't switch between
>  	 * translation tables *and* update TCR_EL2.T0SZ at the same time. Bottom
> @@ -1707,7 +1700,7 @@ int __init kvm_mmu_init(u32 *hyp_va_bits)
>  	 * 1 VA bits to assure that the hypervisor can both ID map its code page
>  	 * and map any kernel memory.
>  	 */
> -	idmap_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET);
> +	idmap_bits = 48;
>  	kernel_bits = vabits_actual;
>  	*hyp_va_bits = max(idmap_bits, kernel_bits);

This effectively means that the hypervisor always uses at least 48 VA bits.
Previously, I think it would have been 39 for (e.g.) Android builds? Does this
have any performance implications for pKVM?

>  
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index 81e1420d2cc13246..a59433ae4f5f8d02 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -762,22 +762,21 @@ static void __init map_kernel(pgd_t *pgdp)
>  	kasan_copy_shadow(pgdp);
>  }
>  
> +void __pi_map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
> +		    int level, pte_t *tbl, bool may_use_cont, u64 va_offset);
> +
> +static u8 idmap_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init,
> +	  kpti_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init;

I see this new storage introduced, but I don't see you removing the storage for
the old method (I have a vague memory of it being defined in the linker script)?

> +
>  static void __init create_idmap(void)
>  {
>  	u64 start = __pa_symbol(__idmap_text_start);
> -	u64 size = __pa_symbol(__idmap_text_end) - start;
> -	pgd_t *pgd = idmap_pg_dir;
> -	u64 pgd_phys;
> -
> -	/* check if we need an additional level of translation */
> -	if (VA_BITS < 48 && idmap_t0sz < (64 - VA_BITS_MIN)) {
> -		pgd_phys = early_pgtable_alloc(PAGE_SHIFT);
> -		set_pgd(&idmap_pg_dir[start >> VA_BITS],
> -			__pgd(pgd_phys | P4D_TYPE_TABLE));
> -		pgd = __va(pgd_phys);
> -	}
> -	__create_pgd_mapping(pgd, start, start, size, PAGE_KERNEL_ROX,
> -			     early_pgtable_alloc, 0);
> +	u64 end   = __pa_symbol(__idmap_text_end);
> +	u64 ptep  = __pa_symbol(idmap_ptes);
> +
> +	__pi_map_range(&ptep, start, end, start, PAGE_KERNEL_ROX,
> +		       IDMAP_ROOT_LEVEL, (pte_t *)idmap_pg_dir, false,
> +		       __phys_to_virt(ptep) - ptep);
>  
>  	if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) {
>  		extern u32 __idmap_kpti_flag;
> @@ -787,8 +786,10 @@ static void __init create_idmap(void)
>  		 * The KPTI G-to-nG conversion code needs a read-write mapping
>  		 * of its synchronization flag in the ID map.
>  		 */
> -		__create_pgd_mapping(pgd, pa, pa, sizeof(u32), PAGE_KERNEL,
> -				     early_pgtable_alloc, 0);
> +		ptep = __pa_symbol(kpti_ptes);
> +		__pi_map_range(&ptep, pa, pa + sizeof(u32), pa, PAGE_KERNEL,
> +			       IDMAP_ROOT_LEVEL, (pte_t *)idmap_pg_dir, false,
> +			       __phys_to_virt(ptep) - ptep);
>  	}
>  }
>  
> @@ -813,6 +814,7 @@ void __init paging_init(void)
>  	memblock_allow_resize();
>  
>  	create_idmap();
> +	idmap_t0sz = TCR_T0SZ(48);
>  }
>  
>  #ifdef CONFIG_MEMORY_HOTPLUG
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index 82e88f4521737c0e..c7129b21bfd5191f 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -422,9 +422,9 @@ SYM_FUNC_START(__cpu_setup)
>  	mair	.req	x17
>  	tcr	.req	x16
>  	mov_q	mair, MAIR_EL1_SET
> -	mov_q	tcr, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
> -			TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
> -			TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS
> +	mov_q	tcr, TCR_T0SZ(48) | TCR_T1SZ(VA_BITS) | TCR_CACHE_FLAGS | \
> +		     TCR_SMP_FLAGS | TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
> +		     TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS

You're hardcoding 48 in 3 places in this patch. I wonder if an IDMAP_VA_BITS
macro might help here?

>  
>  	tcr_clear_errata_bits tcr, x9, x5
>  
> @@ -432,10 +432,7 @@ SYM_FUNC_START(__cpu_setup)
>  	sub		x9, xzr, x0
>  	add		x9, x9, #64
>  	tcr_set_t1sz	tcr, x9
> -#else
> -	idmap_get_t0sz	x9
>  #endif
> -	tcr_set_t0sz	tcr, x9
>  
>  	/*
>  	 * Set the IPS bits in TCR_EL1.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ