lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20231016233215.13090-11-madvenka@linux.microsoft.com>
Date:   Mon, 16 Oct 2023 18:32:15 -0500
From:   madvenka@...ux.microsoft.com
To:     gregkh@...uxfoundation.org, pbonzini@...hat.com, rppt@...nel.org,
        jgowans@...zon.com, graf@...zon.de, arnd@...db.de,
        keescook@...omium.org, stanislav.kinsburskii@...il.com,
        anthony.yznaga@...cle.com, linux-mm@...ck.org,
        linux-kernel@...r.kernel.org, madvenka@...ux.microsoft.com,
        jamorris@...ux.microsoft.com
Subject: [RFC PATCH v1 10/10] mm/prmem: Implement dynamic expansion of prmem.

From: "Madhavan T. Venkataraman" <madvenka@...ux.microsoft.com>

For some use cases, it is hard to predict how much actual memory is
needed to store persistent data. This will depend on the workload. Either
we would have to overcommit memory for persistent data. Or, we could
allow dynamic expansion of prmem memory.

Implement dynamic expansion of prmem. When the allocator runs out of memory
it calls alloc_pages(MAX_ORDER) to allocate a max order page. It creates a
region for that memory and adds it to the list of regions. Then, the
allocator can allocate from that region.

To allow this, extend the command line parameter:

	prmem=size[KMG][,max_size[KMG]]

Size is allocated upfront as mentioned before. Between size and max_size,
prmem is expanded dynamically as mentioned above.

Choosing a max order page means that no fragmentation is created for
transparent huge pages and kmem slabs. But fragmentation may be created
for 1GB pages. This is not a problem for 1GB pages that are reserved
up front. This could be a problem for 1GB pages that are allocated at
run time dynamically.

If max_size is omitted from the command line parameter, no dynamic
expansion will happen.

Signed-off-by: Madhavan T. Venkataraman <madvenka@...ux.microsoft.com>
---
 include/linux/prmem.h          |  8 +++++++
 kernel/prmem/prmem_allocator.c | 38 ++++++++++++++++++++++++++++++++++
 kernel/prmem/prmem_init.c      |  1 +
 kernel/prmem/prmem_misc.c      |  3 ++-
 kernel/prmem/prmem_parse.c     | 20 +++++++++++++++++-
 kernel/prmem/prmem_region.c    |  1 +
 kernel/prmem/prmem_reserve.c   |  1 +
 7 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/include/linux/prmem.h b/include/linux/prmem.h
index c7034690f7cb..bb552946cb5b 100644
--- a/include/linux/prmem.h
+++ b/include/linux/prmem.h
@@ -83,6 +83,9 @@ struct prmem_instance {
  * metadata	Physical address of the metadata page.
  * size		Size of initial memory allocated to prmem.
  *
+ * cur_size	Current amount of memory allocated to prmem.
+ * max_size	Maximum amount of memory that can be allocated to prmem.
+ *
  * regions	List of memory regions.
  *
  * instances	Persistent instances.
@@ -95,6 +98,10 @@ struct prmem {
 	unsigned long		metadata;
 	size_t			size;
 
+	/* Dynamic expansion. */
+	size_t			cur_size;
+	size_t			max_size;
+
 	/* Persistent Regions. */
 	struct list_head	regions;
 
@@ -109,6 +116,7 @@ extern struct prmem		*prmem;
 extern unsigned long		prmem_metadata;
 extern unsigned long		prmem_pa;
 extern size_t			prmem_size;
+extern size_t			prmem_max_size;
 extern bool			prmem_inited;
 extern spinlock_t		prmem_lock;
 
diff --git a/kernel/prmem/prmem_allocator.c b/kernel/prmem/prmem_allocator.c
index f12975bc6777..1cb3eae8a3e7 100644
--- a/kernel/prmem/prmem_allocator.c
+++ b/kernel/prmem/prmem_allocator.c
@@ -9,17 +9,55 @@
 
 /* Page Allocation functions. */
 
+static void prmem_expand(void)
+{
+	struct prmem_region	*region;
+	struct page		*pages;
+	unsigned int		order = MAX_ORDER;
+	size_t			size = (1UL << order) << PAGE_SHIFT;
+
+	if (prmem->cur_size + size > prmem->max_size)
+		return;
+
+	spin_unlock(&prmem_lock);
+	pages = alloc_pages(GFP_NOWAIT, order);
+	spin_lock(&prmem_lock);
+
+	if (!pages)
+		return;
+
+	/* cur_size may have changed. Recheck. */
+	if (prmem->cur_size + size > prmem->max_size)
+		goto free;
+
+	region = prmem_add_region(page_to_phys(pages), size);
+	if (!region)
+		goto free;
+
+	pr_warn("%s: prmem expanded by %ld\n", __func__, size);
+	return;
+free:
+	__free_pages(pages, order);
+}
+
 void *prmem_alloc_pages_locked(unsigned int order)
 {
 	struct prmem_region	*region;
 	void			*va;
 	size_t			size = (1UL << order) << PAGE_SHIFT;
+	bool			expand = true;
 
+retry:
 	list_for_each_entry(region, &prmem->regions, node) {
 		va = prmem_alloc_pool(region, size, size);
 		if (va)
 			return va;
 	}
+	if (expand) {
+		expand = false;
+		prmem_expand();
+		goto retry;
+	}
 	return NULL;
 }
 
diff --git a/kernel/prmem/prmem_init.c b/kernel/prmem/prmem_init.c
index 166fca688ab3..f4814cc88508 100644
--- a/kernel/prmem/prmem_init.c
+++ b/kernel/prmem/prmem_init.c
@@ -20,6 +20,7 @@ void __init prmem_init(void)
 		/* Cold boot. */
 		prmem->metadata = prmem_metadata;
 		prmem->size = prmem_size;
+		prmem->max_size = prmem_max_size;
 		INIT_LIST_HEAD(&prmem->regions);
 		INIT_LIST_HEAD(&prmem->instances);
 
diff --git a/kernel/prmem/prmem_misc.c b/kernel/prmem/prmem_misc.c
index 49b6a7232c1a..3100662d2cbe 100644
--- a/kernel/prmem/prmem_misc.c
+++ b/kernel/prmem/prmem_misc.c
@@ -68,7 +68,8 @@ bool __init prmem_validate(void)
 	unsigned long		checksum;
 
 	/* Sanity check the boot parameter. */
-	if (prmem_metadata != prmem->metadata || prmem_size != prmem->size) {
+	if (prmem_metadata != prmem->metadata || prmem_size != prmem->size ||
+	    prmem_max_size != prmem->max_size) {
 		pr_warn("%s: Boot parameter mismatch\n", __func__);
 		return false;
 	}
diff --git a/kernel/prmem/prmem_parse.c b/kernel/prmem/prmem_parse.c
index 6c1a23c6b84e..3a57b37fa191 100644
--- a/kernel/prmem/prmem_parse.c
+++ b/kernel/prmem/prmem_parse.c
@@ -8,9 +8,11 @@
 #include <linux/prmem.h>
 
 /*
- * Syntax: prmem=size[KMG]
+ * Syntax: prmem=size[KMG][,max_size[KMG]]
  *
  *	Specifies the size of the initial memory to be allocated to prmem.
+ *	Optionally, specifies the maximum amount of memory to be allocated to
+ *	prmem. prmem will expand dynamically between size and max_size.
  */
 static int __init prmem_size_parse(char *cmdline)
 {
@@ -28,6 +30,22 @@ static int __init prmem_size_parse(char *cmdline)
 	}
 
 	prmem_size = size;
+	prmem_max_size = size;
+
+	cur = tmp;
+	if (*cur++ == ',') {
+		/* Get max size. */
+		size = memparse(cur, &tmp);
+		if (cur == tmp || !size || size & (PAGE_SIZE - 1) ||
+		    size <= prmem_size) {
+			prmem_size = 0;
+			prmem_max_size = 0;
+			pr_warn("%s: Incorrect max size %lx\n", __func__, size);
+			return -EINVAL;
+		}
+		prmem_max_size = size;
+	}
+
 	return 0;
 }
 early_param("prmem", prmem_size_parse);
diff --git a/kernel/prmem/prmem_region.c b/kernel/prmem/prmem_region.c
index 6dc88c74d9c8..390329a34b74 100644
--- a/kernel/prmem/prmem_region.c
+++ b/kernel/prmem/prmem_region.c
@@ -82,5 +82,6 @@ struct prmem_region *prmem_add_region(unsigned long pa, size_t size)
 		return NULL;
 
 	list_add_tail(&region->node, &prmem->regions);
+	prmem->cur_size += size;
 	return region;
 }
diff --git a/kernel/prmem/prmem_reserve.c b/kernel/prmem/prmem_reserve.c
index 8000fff05402..c5ae5d7d8f0a 100644
--- a/kernel/prmem/prmem_reserve.c
+++ b/kernel/prmem/prmem_reserve.c
@@ -11,6 +11,7 @@ struct prmem		*prmem;
 unsigned long		prmem_metadata;
 unsigned long		prmem_pa;
 unsigned long		prmem_size;
+unsigned long		prmem_max_size;
 
 void __init prmem_reserve_early(void)
 {
-- 
2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ