lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20170728121941.GL2274@dhcp22.suse.cz>
Date:   Fri, 28 Jul 2017 14:19:41 +0200
From:   Michal Hocko <mhocko@...nel.org>
To:     Jerome Glisse <jglisse@...hat.com>
Cc:     linux-mm@...ck.org, Andrew Morton <akpm@...ux-foundation.org>,
        Mel Gorman <mgorman@...e.de>, Vlastimil Babka <vbabka@...e.cz>,
        Andrea Arcangeli <aarcange@...hat.com>,
        Reza Arbab <arbab@...ux.vnet.ibm.com>,
        Yasuaki Ishimatsu <yasu.isimatu@...il.com>,
        qiuxishi@...wei.com, Kani Toshimitsu <toshi.kani@....com>,
        slaoub@...il.com, Joonsoo Kim <js1304@...il.com>,
        Andi Kleen <ak@...ux.intel.com>,
        Daniel Kiper <daniel.kiper@...cle.com>,
        Igor Mammedov <imammedo@...hat.com>,
        Vitaly Kuznetsov <vkuznets@...hat.com>,
        LKML <linux-kernel@...r.kernel.org>,
        Benjamin Herrenschmidt <benh@...nel.crashing.org>,
        Catalin Marinas <catalin.marinas@....com>,
        Dan Williams <dan.j.williams@...el.com>,
        Fenghua Yu <fenghua.yu@...el.com>,
        Heiko Carstens <heiko.carstens@...ibm.com>,
        "H. Peter Anvin" <hpa@...or.com>, Ingo Molnar <mingo@...hat.com>,
        Martin Schwidefsky <schwidefsky@...ibm.com>,
        Michael Ellerman <mpe@...erman.id.au>,
        Paul Mackerras <paulus@...ba.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        Tony Luck <tony.luck@...el.com>,
        Will Deacon <will.deacon@....com>,
        Gerald Schaefer <gerald.schaefer@...ibm.com>
Subject: Re: [RFC PATCH 0/5] mm, memory_hotplug: allocate memmap from
 hotadded memory

On Thu 27-07-17 08:56:52, Michal Hocko wrote:
> On Wed 26-07-17 17:06:59, Jerome Glisse wrote:
> [...]
> > This does not seems to be an opt-in change ie if i am reading patch 3
> > correctly if an altmap is not provided to __add_pages() you fallback
> > to allocating from begining of zone. This will not work with HMM ie
> > device private memory. So at very least i would like to see some way
> > to opt-out of this. Maybe a new argument like bool forbid_altmap ?
> 
> OK, I see! I will think about how to make a sane api for that.

This is what I came up with. s390 guys mentioned that I cannot simply
use the new range at this stage yet. This will need probably some other
changes but I guess we want an opt-in approach with an arch veto in general.

So what do you think about the following? Only x86 is update now and I
will split it into two parts but the idea should be clear at least.
---
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e4f749e5652f..a4a29af28bcf 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -772,7 +772,8 @@ static void  update_end_of_memory_vars(u64 start, u64 size)
 	}
 }
 
-int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
+int arch_add_memory(int nid, u64 start, u64 size,
+		struct mhp_restrictions *restrictions)
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -780,7 +781,9 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
 
 	init_memory_mapping(start, start + size);
 
-	ret = __add_pages(nid, start_pfn, nr_pages, want_memblock);
+	/* newly hotadded memory range is ready to be used for the memmap */
+	restrictions->flags |= MHP_RANGE_ACCESSIBLE;
+	ret = __add_pages(nid, start_pfn, nr_pages, restrictions);
 	WARN_ON_ONCE(ret);
 
 	/* update max_pfn, max_low_pfn and high_memory */
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index f2636ad2d00f..928d93e2a555 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -129,9 +129,29 @@ extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
 	unsigned long nr_pages);
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
+/*
+ * Do we want sysfs memblock files created. This will allow userspace to online
+ * and offline memory explicitly. Lack of this bit means that the caller has to
+ * call move_pfn_range_to_zone to finish the initialization.
+ */
+#define MHP_MEMBLOCK_API		1<<0
+
+/*
+ * Is the hotadded memory accessible directly or it needs a special handling.
+ * We will try to allocate the memmap for the range from within the added memory
+ * if the bit is set.
+ */
+#define MHP_RANGE_ACCESSIBLE		1<<1
+
+/* Restrictions for the memory hotplug */
+struct mhp_restrictions {
+	unsigned long flags;	/* MHP_ flags */
+	struct vmem_altmap *altmap; /* use this alternative allocatro for memmaps */
+};
+
 /* reasonably generic interface to expand the physical pages */
 extern int __add_pages(int nid, unsigned long start_pfn,
-	unsigned long nr_pages, bool want_memblock);
+	unsigned long nr_pages, struct mhp_restrictions *restrictions);
 
 #ifdef CONFIG_NUMA
 extern int memory_add_physaddr_to_nid(u64 start);
@@ -306,7 +326,8 @@ extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
 		void *arg, int (*func)(struct memory_block *, void *));
 extern int add_memory(int nid, u64 start, u64 size);
 extern int add_memory_resource(int nid, struct resource *resource, bool online);
-extern int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock);
+extern int arch_add_memory(int nid, u64 start, u64 size,
+		struct mhp_restrictions *restrictions);
 extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
 		unsigned long nr_pages);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
diff --git a/kernel/memremap.c b/kernel/memremap.c
index a72eb5932d2f..cf0998cfcb13 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -286,6 +286,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
 	struct dev_pagemap *pgmap;
 	struct page_map *page_map;
 	int error, nid, is_ram;
+	struct mhp_restrictions restrictions = {};
 	unsigned long pfn;
 
 	align_start = res->start & ~(SECTION_SIZE - 1);
@@ -357,8 +358,11 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
 	if (error)
 		goto err_pfn_remap;
 
+	/* We do not want any optional features only our own memmap */
+	restrictions.altmap = to_vmem_altmap((unsigned long) pfn_to_page(phys_start_pfn));
+
 	mem_hotplug_begin();
-	error = arch_add_memory(nid, align_start, align_size, false);
+	error = arch_add_memory(nid, align_start, align_size, &restrictions);
 	if (!error)
 		move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
 					align_start >> PAGE_SHIFT,
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 19037d0191e5..9d11c3b5b448 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -287,12 +287,13 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
  * add the new pages.
  */
 int __ref __add_pages(int nid, unsigned long phys_start_pfn,
-			unsigned long nr_pages, bool want_memblock)
+			unsigned long nr_pages,
+			struct mhp_restrictions *restrictions)
 {
 	unsigned long i;
 	int err = 0;
 	int start_sec, end_sec;
-	struct vmem_altmap *altmap;
+	struct vmem_altmap *altmap = restrictions->altmap;
 	struct vmem_altmap __section_altmap = {.base_pfn = phys_start_pfn};
 
 	/* during initialize mem_map, align hot-added range to section */
@@ -301,10 +302,9 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn,
 
 	/*
 	 * Check device specific altmap and fallback to allocating from the
-	 * begining of the section otherwise
+	 * begining of the added range otherwise
 	 */
-	altmap = to_vmem_altmap((unsigned long) pfn_to_page(phys_start_pfn));
-	if (!altmap) {
+	if (!altmap && restrictions->flags & MHP_RANGE_ACCESSIBLE) {
 		__section_altmap.free = nr_pages;
 		__section_altmap.flush_alloc_pfns = mark_vmemmap_pages;
 		altmap = &__section_altmap;
@@ -324,7 +324,9 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn,
 	}
 
 	for (i = start_sec; i <= end_sec; i++) {
-		err = __add_section(nid, section_nr_to_pfn(i), want_memblock, altmap);
+		err = __add_section(nid, section_nr_to_pfn(i),
+				restrictions->flags & MHP_MEMBLOCK_API,
+				altmap);
 
 		/*
 		 * EEXIST is finally dealt with by ioresource collision
@@ -1160,6 +1162,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
 	bool new_pgdat;
 	bool new_node;
 	int ret;
+	struct mhp_restrictions restrictions = {};
 
 	start = res->start;
 	size = resource_size(res);
@@ -1191,8 +1194,10 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
 			goto error;
 	}
 
+	restrictions.flags = MHP_MEMBLOCK_API;
+
 	/* call arch's memory hotadd */
-	ret = arch_add_memory(nid, start, size, true);
+	ret = arch_add_memory(nid, start, size, &restrictions);
 
 	if (ret < 0)
 		goto error;
-- 
Michal Hocko
SUSE Labs

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ