lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4D9B6245.9000600@kernel.org>
Date:	Tue, 05 Apr 2011 11:41:09 -0700
From:	Yinghai Lu <yinghai@...nel.org>
To:	Tejun Heo <tj@...nel.org>
CC:	mingo@...hat.com, hpa@...or.com, tglx@...utronix.de,
	rientjes@...gle.com, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 09/14] x86-32, NUMA: Move lowmem address space reservation
 to init_alloc_remap()

On 04/04/2011 03:23 PM, Tejun Heo wrote:
> Remap alloc init is done in the following stages.
>
> 1. init_alloc_remap() calculates how much memory is necessary for each
>     node and reserves node local memory.
>
> 2. initmem_init() collects how much each node needs and reserves a
>     single contiguous lowmem area which can contain all.
>
> 3. init_remap_allocator() initializes allocator parameters from the
>     determined lowmem address and per-node offsets.
>
> 4. Actual remap happens.
>
> There is no reason for the lowmem remap area to be reserved as a
> single contiguous area at one go.  They don't interact with each other
> and the memblock allocator will put them side-by-side anyway.
>
> This patch breaks up the single lowmem address reservation and put
> per-node lowmem address reservation into init_alloc_remap() and
> initializes allocator parameters directly in the function as all the
> addresses are determined there.  This merges steps 2 and 3 into 1.
>
> While at it, remove now largely irrelevant comments in
> init_alloc_remap().
>
> This change causes the following behavior changes.
>
> * Remap lowmem areas are allocated in smaller per-node chunks.
>
> * Remap lowmem area reservation failure fail future remap allocations
>    instead of panicking.
>
> * Remap allocator initialization is less verbose.
>
> Signed-off-by: Tejun Heo<tj@...nel.org>
> Cc: Yinghai Lu<yinghai@...nel.org>
> Cc: David Rientjes<rientjes@...gle.com>
> Cc: Thomas Gleixner<tglx@...utronix.de
> Cc: Ingo Molnar<mingo@...hat.com>
> Cc: "H. Peter Anvin"<hpa@...or.com>
> ---
>   arch/x86/mm/numa_32.c |   82 +++++++++++++++----------------------------------
>   1 files changed, 25 insertions(+), 57 deletions(-)
>
> diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
> index c127543..12bb34c 100644
> --- a/arch/x86/mm/numa_32.c
> +++ b/arch/x86/mm/numa_32.c
> @@ -108,9 +108,6 @@ static unsigned long node_remap_size[MAX_NUMNODES];
>   static void *node_remap_start_vaddr[MAX_NUMNODES];
>   void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
>
> -static unsigned long kva_start_pfn;
> -static unsigned long kva_pages;
> -
>   int __cpuinit numa_cpu_node(int cpu)
>   {
>   	return apic->x86_32_numa_cpu_node(cpu);
> @@ -266,7 +263,8 @@ void resume_map_numa_kva(pgd_t *pgd_base)
>   static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
>   {
>   	unsigned long size;
> -	u64 node_pa;
> +	u64 node_pa, remap_pa;
> +	void *remap_va;
>
>   	/*
>   	 * The acpi/srat node info can show hot-add memroy zones where
> @@ -287,6 +285,7 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
>   	size += ALIGN(sizeof(pg_data_t), PAGE_SIZE);
>   	size = ALIGN(size, LARGE_PAGE_BYTES);
>
> +	/* allocate node memory and the lowmem remap area */
>   	node_pa = memblock_find_in_range(node_start_pfn[nid]<<  PAGE_SHIFT,
>   					 (u64)node_end_pfn[nid]<<  PAGE_SHIFT,
>   					 size, LARGE_PAGE_BYTES);
> @@ -295,45 +294,35 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
>   			   size, nid);
>   		return 0;
>   	}
> +	memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM");
> +
> +	remap_pa = memblock_find_in_range(min_low_pfn<<  PAGE_SHIFT,
> +					  max_low_pfn<<  PAGE_SHIFT,
> +					  size, LARGE_PAGE_BYTES);
> +	if (remap_pa == MEMBLOCK_ERROR) {
> +		pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n",
> +			   size, nid);
> +		memblock_x86_free_range(node_pa, node_pa + size);
> +		return 0;
> +	}
> +	memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG");
> +	remap_va = phys_to_virt(remap_pa);
>
> +	/* initialize remap allocator parameters */
> +	node_remap_start_pfn[nid] = node_pa>>  PAGE_SHIFT;
>   	node_remap_size[nid] = size>>  PAGE_SHIFT;
>   	node_remap_offset[nid] = offset;
> -	printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of node %d at %llx\n",
> -	       size>>  PAGE_SHIFT, nid, node_pa>>  PAGE_SHIFT);
>
> -	/*
> -	 *  prevent kva address below max_low_pfn want it on system
> -	 *  with less memory later.
> -	 *  layout will be: KVA address , KVA RAM
> -	 *
> -	 *  we are supposed to only record the one less then
> -	 *  max_low_pfn but we could have some hole in high memory,
> -	 *  and it will only check page_is_ram(pfn)&&
> -	 *  !page_is_reserved_early(pfn) to decide to use it as free.
> -	 *  So memblock_x86_reserve_range here, hope we don't run out
> -	 *  of that array
> -	 */
> -	memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM");
> +	node_remap_start_vaddr[nid] = remap_va;
> +	node_remap_end_vaddr[nid] = remap_va + size;
> +	node_remap_alloc_vaddr[nid] = remap_va + ALIGN(sizeof(pg_data_t), PAGE_SIZE);
>
> -	node_remap_start_pfn[nid] = node_pa>>  PAGE_SHIFT;
> +	printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) ->  [%p-%p)\n",
> +	       nid, node_pa, node_pa + size, remap_va, remap_va + size);
>
>   	return size>>  PAGE_SHIFT;
>   }
>
> -static void init_remap_allocator(int nid)
> -{
> -	node_remap_start_vaddr[nid] = pfn_to_kaddr(
> -			kva_start_pfn + node_remap_offset[nid]);
> -	node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] +
> -		(node_remap_size[nid] * PAGE_SIZE);
> -	node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] +
> -		ALIGN(sizeof(pg_data_t), PAGE_SIZE);
> -
> -	printk(KERN_DEBUG "node %d will remap to vaddr %08lx - %08lx\n", nid,
> -		(ulong) node_remap_start_vaddr[nid],
> -		(ulong) node_remap_end_vaddr[nid]);
> -}
> -
>   void __init initmem_init(void)
>   {
>   	unsigned long reserve_pages = 0;
> @@ -352,25 +341,7 @@ void __init initmem_init(void)
>
>   	for_each_online_node(nid)
>   		reserve_pages += init_alloc_remap(nid, reserve_pages);
> -	kva_pages = roundup(reserve_pages, PTRS_PER_PTE);
> -	printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n",
> -			reserve_pages);
> -
> -	kva_start_pfn = memblock_find_in_range(min_low_pfn<<  PAGE_SHIFT,
> -				max_low_pfn<<  PAGE_SHIFT,
> -				kva_pages<<  PAGE_SHIFT,
> -				PTRS_PER_PTE<<  PAGE_SHIFT)>>  PAGE_SHIFT;
> -	if (kva_start_pfn == MEMBLOCK_ERROR)
> -		panic("Can not get kva space\n");
> -
> -	printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n",
> -		kva_start_pfn, max_low_pfn);
> -	printk(KERN_INFO "max_pfn = %lx\n", max_pfn);
> -
> -	/* avoid clash with initrd */
> -	memblock_x86_reserve_range(kva_start_pfn<<PAGE_SHIFT,
> -		      (kva_start_pfn + kva_pages)<<PAGE_SHIFT,
> -		     "KVA PG");
> +
>   #ifdef CONFIG_HIGHMEM
>   	highstart_pfn = highend_pfn = max_pfn;
>   	if (max_pfn>  max_low_pfn)
> @@ -390,11 +361,8 @@ void __init initmem_init(void)
>
>   	printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n",
>   			(ulong) pfn_to_kaddr(max_low_pfn));
> -	for_each_online_node(nid) {
> -		init_remap_allocator(nid);
> -
> +	for_each_online_node(nid)
>   		allocate_pgdat(nid);
> -	}
>   	remap_numa_kva();
>
>   	printk(KERN_DEBUG "High memory starts at vaddr %08lx\n",

Acked-by: Yinghai Lu <yinghai@...nel.org>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ