lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1551011649-30103-3-git-send-email-kernelfans@gmail.com>
Date:   Sun, 24 Feb 2019 20:34:05 +0800
From:   Pingfan Liu <kernelfans@...il.com>
To:     x86@...nel.org, linux-mm@...ck.org
Cc:     Pingfan Liu <kernelfans@...il.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
        "H. Peter Anvin" <hpa@...or.com>,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        Vlastimil Babka <vbabka@...e.cz>,
        Mike Rapoport <rppt@...ux.vnet.ibm.com>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Mel Gorman <mgorman@...e.de>,
        Joonsoo Kim <iamjoonsoo.kim@....com>,
        Andy Lutomirski <luto@...nel.org>,
        Andi Kleen <ak@...ux.intel.com>,
        Petr Tesarik <ptesarik@...e.cz>,
        Michal Hocko <mhocko@...e.com>,
        Stephen Rothwell <sfr@...b.auug.org.au>,
        Jonathan Corbet <corbet@....net>,
        Nicholas Piggin <npiggin@...il.com>,
        Daniel Vacek <neelx@...hat.com>, linux-kernel@...r.kernel.org
Subject: [PATCH 2/6] mm/memblock: make full utilization of numa info

There are numa machines with memory-less node. When allocating memory for
the memory-less node, memblock allocator falls back to 'Node 0' without fully
utilizing the nearest node. This hurts the performance, especially for per
cpu section. Suppressing this defect by building the full node fall back
info for memblock allocator, like what we have done for page allocator.

Signed-off-by: Pingfan Liu <kernelfans@...il.com>
CC: Thomas Gleixner <tglx@...utronix.de>
CC: Ingo Molnar <mingo@...hat.com>
CC: Borislav Petkov <bp@...en8.de>
CC: "H. Peter Anvin" <hpa@...or.com>
CC: Dave Hansen <dave.hansen@...ux.intel.com>
CC: Vlastimil Babka <vbabka@...e.cz>
CC: Mike Rapoport <rppt@...ux.vnet.ibm.com>
CC: Andrew Morton <akpm@...ux-foundation.org>
CC: Mel Gorman <mgorman@...e.de>
CC: Joonsoo Kim <iamjoonsoo.kim@....com>
CC: Andy Lutomirski <luto@...nel.org>
CC: Andi Kleen <ak@...ux.intel.com>
CC: Petr Tesarik <ptesarik@...e.cz>
CC: Michal Hocko <mhocko@...e.com>
CC: Stephen Rothwell <sfr@...b.auug.org.au>
CC: Jonathan Corbet <corbet@....net>
CC: Nicholas Piggin <npiggin@...il.com>
CC: Daniel Vacek <neelx@...hat.com>
CC: linux-kernel@...r.kernel.org
---
 include/linux/memblock.h |  3 +++
 mm/memblock.c            | 68 ++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 66 insertions(+), 5 deletions(-)

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 64c41cf..ee999c5 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -342,6 +342,9 @@ void *memblock_alloc_try_nid_nopanic(phys_addr_t size, phys_addr_t align,
 void *memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align,
 			     phys_addr_t min_addr, phys_addr_t max_addr,
 			     int nid);
+extern int build_node_order(int *node_oder_array, int sz,
+	int local_node, nodemask_t *used_mask);
+void memblock_build_node_order(void);
 
 static inline void * __init memblock_alloc(phys_addr_t size,  phys_addr_t align)
 {
diff --git a/mm/memblock.c b/mm/memblock.c
index 022d4cb..cf78850 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1338,6 +1338,47 @@ phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t ali
 	return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
 }
 
+static int **node_fallback __initdata;
+
+/*
+ * build_node_order() relies on cpumask_of_node(), hence arch should set up
+ * cpumask before calling this func.
+ */
+void __init memblock_build_node_order(void)
+{
+	int nid, i;
+	nodemask_t used_mask;
+
+	node_fallback = memblock_alloc(MAX_NUMNODES * sizeof(int *),
+		sizeof(int *));
+	for_each_online_node(nid) {
+		node_fallback[nid] = memblock_alloc(
+			num_online_nodes() * sizeof(int), sizeof(int));
+		for (i = 0; i < num_online_nodes(); i++)
+			node_fallback[nid][i] = NUMA_NO_NODE;
+	}
+
+	for_each_online_node(nid) {
+		nodes_clear(used_mask);
+		node_set(nid, used_mask);
+		build_node_order(node_fallback[nid], num_online_nodes(),
+			nid, &used_mask);
+	}
+}
+
+static void __init memblock_free_node_order(void)
+{
+	int nid;
+
+	if (!node_fallback)
+		return;
+	for_each_online_node(nid)
+		memblock_free(__pa(node_fallback[nid]),
+			num_online_nodes() * sizeof(int));
+	memblock_free(__pa(node_fallback), MAX_NUMNODES * sizeof(int *));
+	node_fallback = NULL;
+}
+
 /**
  * memblock_alloc_internal - allocate boot memory block
  * @size: size of memory block to be allocated in bytes
@@ -1370,6 +1411,7 @@ static void * __init memblock_alloc_internal(
 {
 	phys_addr_t alloc;
 	void *ptr;
+	int node;
 	enum memblock_flags flags = choose_memblock_flags();
 
 	if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
@@ -1397,11 +1439,26 @@ static void * __init memblock_alloc_internal(
 		goto done;
 
 	if (nid != NUMA_NO_NODE) {
-		alloc = memblock_find_in_range_node(size, align, min_addr,
-						    max_addr, NUMA_NO_NODE,
-						    flags);
-		if (alloc && !memblock_reserve(alloc, size))
-			goto done;
+		if (!node_fallback) {
+			alloc = memblock_find_in_range_node(size, align,
+					min_addr, max_addr,
+					NUMA_NO_NODE, flags);
+			if (alloc && !memblock_reserve(alloc, size))
+				goto done;
+		} else {
+			int i;
+			for (i = 0; i < num_online_nodes(); i++) {
+				node = node_fallback[nid][i];
+				/* fallback list has all memory nodes */
+				if (node == NUMA_NO_NODE)
+					break;
+				alloc = memblock_find_in_range_node(size,
+						align, min_addr, max_addr,
+						node, flags);
+				if (alloc && !memblock_reserve(alloc, size))
+					goto done;
+			}
+		}
 	}
 
 	if (min_addr) {
@@ -1969,6 +2026,7 @@ unsigned long __init memblock_free_all(void)
 
 	reset_all_zones_managed_pages();
 
+	memblock_free_node_order();
 	pages = free_low_memory_core_early();
 	totalram_pages_add(pages);
 
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ