lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:   Thu, 24 Feb 2022 08:26:59 +0200
From:   Mike Rapoport <rppt@...nel.org>
To:     Vijay Balakrishna <vijayb@...ux.microsoft.com>
Cc:     Catalin Marinas <catalin.marinas@....com>,
        Will Deacon <will@...nel.org>,
        Nicolas Saenz Julienne <nsaenz@...nel.org>,
        Anshuman Khandual <anshuman.khandual@....com>,
        Ard Biesheuvel <ardb@...nel.org>,
        Pavel Tatashin <pasha.tatashin@...een.com>,
        linux-kernel@...r.kernel.org, linux-arm-kernel@...ts.infradead.org
Subject: Re: [PATCH v2] arm64: Do not defer reserve_crashkernel() for
 platforms with no DMA memory zones

On Wed, Feb 23, 2022 at 11:57:33AM -0800, Vijay Balakrishna wrote:
> The following patches resulted in deferring crash kernel reservation to
> mem_init(), mainly aimed at platforms with DMA memory zones (no IOMMU),
> in particular Raspberry Pi 4.
> 
> commit 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32")
> commit 8424ecdde7df ("arm64: mm: Set ZONE_DMA size based on devicetree's dma-ranges")
> commit 0a30c53573b0 ("arm64: mm: Move reserve_crashkernel() into mem_init()")
> commit 2687275a5843 ("arm64: Force NO_BLOCK_MAPPINGS if crashkernel reservation is required")
> 
> Above changes introduced boot slowdown due to linear map creation for
> all the memory banks with NO_BLOCK_MAPPINGS, see discussion[1].  The proposed
> changes restore crash kernel reservation to earlier behavior thus avoids
> slow boot, particularly for platforms with IOMMU (no DMA memory zones).
> 
> Tested changes to confirm no ~150ms boot slowdown on our SoC with IOMMU
> and 8GB memory.  Also tested with ZONE_DMA and/or ZONE_DMA32 configs to confirm
> no regression to deferring scheme of crash kernel memory reservation.
> In both cases successfully collected kernel crash dump.
> 
> [1] https://lore.kernel.org/all/9436d033-579b-55fa-9b00-6f4b661c2dd7@linux.microsoft.com/
> 
> Signed-off-by: Vijay Balakrishna <vijayb@...ux.microsoft.com>
> Cc: stable@...r.kernel.org
> ---
> Changes from v1 -> v2
> ---------------------
> - replaced '!crashk_res.end' with IS_ENABLED(ZONE_DMA/DMA32) (Nicolas's comment)
> - minor change to make it uniform -- replaced #if defined(..) -> #if IS_ENABLED(..)
> - added new comment in arch/arm64/mm/init.c to ease future maintenance (Nicolas's comment)
> - test performed comment moved to commit message
> 
> [v1] https://lore.kernel.org/all/1645056294-6509-1-git-send-email-vijayb@linux.microsoft.com/
> ---
>  arch/arm64/mm/init.c | 36 +++++++++++++++++++++++++++++++++---
>  arch/arm64/mm/mmu.c  | 29 ++++++++++++++++++++++++++++-
>  2 files changed, 61 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index db63cc885771..51869f9dfc33 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -61,8 +61,34 @@ EXPORT_SYMBOL(memstart_addr);
>   * unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4).
>   * In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory,
>   * otherwise it is empty.
> + *
> + * Memory reservation for crash kernel either done early or deferred
> + * depending on DMA memory zones configs (ZONE_DMA) --
> + *
> + * In absence of ZONE_DMA configs arm64_dma_phys_limit initialized
> + * here instead of max_zone_phys().  This lets early reservation of
> + * crash kernel memory which has a dependency on arm64_dma_phys_limit.
> + * Reserving memory early for crash kernel allows linear creation of block
> + * mappings (greater than page-granularity) for all the memory bank rangs.
> + * In this scheme a comparatively quicker boot is observed.
> + *
> + * If ZONE_DMA configs are defined, crash kernel memory reservation
> + * is delayed until DMA zone memory range size initilazation performed in
> + * zone_sizes_init().  The defer is necessary to steer clear of DMA zone
> + * memory range to avoid overlap allocation.  So crash kernel memory boundaries
> + * are not known when mapping all bank memory ranges, which otherwise means
> + * not possible to exclude crash kernel range from creating block mappings
> + * so page-granularity mappings are created for the entire memory range.
> + * Hence a slightly slower boot is observed.
> + *
> + * Note: Page-granularity mapppings are necessary for crash kernel memory
> + * range for shrinking its size via /sys/kernel/kexec_crash_size interface.
>   */
> -phys_addr_t arm64_dma_phys_limit __ro_after_init;
> +#if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32)
> +phys_addr_t __ro_after_init arm64_dma_phys_limit;
> +#else
> +phys_addr_t __ro_after_init arm64_dma_phys_limit = PHYS_MASK + 1;
> +#endif
>  
>  #ifdef CONFIG_KEXEC_CORE
>  /*
> @@ -153,8 +179,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
>  	if (!arm64_dma_phys_limit)
>  		arm64_dma_phys_limit = dma32_phys_limit;
>  #endif
> -	if (!arm64_dma_phys_limit)
> -		arm64_dma_phys_limit = PHYS_MASK + 1;
>  	max_zone_pfns[ZONE_NORMAL] = max;
>  
>  	free_area_init(max_zone_pfns);
> @@ -315,6 +339,10 @@ void __init arm64_memblock_init(void)
>  
>  	early_init_fdt_scan_reserved_mem();
>  
> +#if !IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)
> +	reserve_crashkernel();
> +#endif

Nit: with IS_ENABLED() this does not need to be an #ifdef, but rather

	if (!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32))
		reserve_crashkernel();

The same applies to the cases below.

> +
>  	high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
>  }
>  
> @@ -357,11 +385,13 @@ void __init bootmem_init(void)
>  	 */
>  	dma_contiguous_reserve(arm64_dma_phys_limit);
>  
> +#if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32)
>  	/*
>  	 * request_standard_resources() depends on crashkernel's memory being
>  	 * reserved, so do it here.
>  	 */
>  	reserve_crashkernel();
> +#endif
>  
>  	memblock_dump_all();
>  }
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index acfae9b41cc8..884b2c6d6cd9 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -517,7 +517,7 @@ static void __init map_mem(pgd_t *pgdp)
>  	 */
>  	BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
>  
> -	if (can_set_direct_map() || crash_mem_map || IS_ENABLED(CONFIG_KFENCE))
> +	if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE))
>  		flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
>  
>  	/*
> @@ -528,6 +528,18 @@ static void __init map_mem(pgd_t *pgdp)
>  	 */
>  	memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
>  
> +#if IS_ENABLED(CONFIG_KEXEC_CORE)
> +
> +#if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32)
> +	if (crash_mem_map)
> +		flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
> +#else
> +	if (crashk_res.end)
> +		memblock_mark_nomap(crashk_res.start,
> +				    resource_size(&crashk_res));
> +#endif
> +
> +#endif
>  	/* map all the memory banks */
>  	for_each_mem_range(i, &start, &end) {
>  		if (start >= end)
> @@ -554,6 +566,21 @@ static void __init map_mem(pgd_t *pgdp)
>  	__map_memblock(pgdp, kernel_start, kernel_end,
>  		       PAGE_KERNEL, NO_CONT_MAPPINGS);
>  	memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
> +#if IS_ENABLED(CONFIG_KEXEC_CORE) && \
> +    !IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)
> +	/*
> +	 * Use page-level mappings here so that we can shrink the region
> +	 * in page granularity and put back unused memory to buddy system
> +	 * through /sys/kernel/kexec_crash_size interface.
> +	 */
> +	if (crashk_res.end) {
> +		__map_memblock(pgdp, crashk_res.start, crashk_res.end + 1,
> +			       PAGE_KERNEL,
> +			       NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
> +		memblock_clear_nomap(crashk_res.start,
> +				     resource_size(&crashk_res));
> +	}
> +#endif
>  }
>  
>  void mark_rodata_ro(void)
> -- 
> 2.35.1
> 

-- 
Sincerely yours,
Mike.

Powered by blists - more mailing lists