[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220811231643.1012912-13-zi.yan@sent.com>
Date: Thu, 11 Aug 2022 19:16:43 -0400
From: Zi Yan <zi.yan@...t.com>
To: linux-mm@...ck.org
Cc: David Hildenbrand <david@...hat.com>,
Matthew Wilcox <willy@...radead.org>,
Vlastimil Babka <vbabka@...e.cz>,
"Kirill A . Shutemov" <kirill.shutemov@...ux.intel.com>,
Mike Kravetz <mike.kravetz@...cle.com>,
John Hubbard <jhubbard@...dia.com>,
Yang Shi <shy828301@...il.com>,
David Rientjes <rientjes@...gle.com>,
James Houghton <jthoughton@...gle.com>,
Mike Rapoport <rppt@...nel.org>, linux-kernel@...r.kernel.org
Subject: [RFC PATCH v2 12/12] mm: make MAX_ORDER a kernel boot time parameter.
From: Zi Yan <ziy@...dia.com>
With the new buddy_alloc_max_order, users can specify larger MAX_ORDER
than set in CONFIG_ARCH_MAX_ORDER or CONFIG_SET_MAX_ORDER.
It can be set any value >= CONFIG_ARCH_MAX_ORDER or CONFIG_SET_MAX_ORDER,
but < 256 (limited by vmscan scan_control and per-cpu free page list).
Signed-off-by: Zi Yan <ziy@...dia.com>
Cc: Jonathan Corbet <corbet@....net>
Cc: "Paul E. McKenney" <paulmck@...nel.org>
Cc: Randy Dunlap <rdunlap@...radead.org>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Vlastimil Babka <vbabka@...e.cz>
Cc: linux-doc@...r.kernel.org
Cc: linux-mm@...ck.org
Cc: linux-kernel@...r.kernel.org
---
.../admin-guide/kernel-parameters.txt | 5 +++
include/linux/mmzone.h | 8 +++++
mm/Kconfig | 13 +++++++
mm/page_alloc.c | 34 ++++++++++++++++++-
mm/vmscan.c | 1 -
5 files changed, 59 insertions(+), 2 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index ec519225b671..0f71233ae396 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -494,6 +494,11 @@
bttv.pll= See Documentation/admin-guide/media/bttv.rst
bttv.tuner=
+ buddy_alloc_max_order= [KNL] This parameter adjusts the size of largest
+ pages that can be allocated from kernel buddy allocator. The largest
+ page size is 2^buddy_alloc_max_order * PAGE_SIZE.
+ Format: integer
+
bulk_remove=off [PPC] This parameter disables the use of the pSeries
firmware feature for flushing multiple hpte entries
at a time.
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index b5774e4c2700..90121d25d660 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -35,6 +35,14 @@
#define MIN_MAX_ORDER MAX_ORDER
#endif
+/* remap MAX_ORDER to buddy_alloc_max_order for boot time adjustment */
+#ifdef CONFIG_BOOT_TIME_MAX_ORDER
+/* Defined in mm/page_alloc.c */
+extern int buddy_alloc_max_order;
+#undef MAX_ORDER
+#define MAX_ORDER buddy_alloc_max_order
+#endif /* CONFIG_BOOT_TIME_MAX_ORDER */
+
#define MAX_ORDER_NR_PAGES (1 << MAX_ORDER)
/*
diff --git a/mm/Kconfig b/mm/Kconfig
index e558f5679707..acccb919d72d 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -455,6 +455,19 @@ config SET_MAX_ORDER
increase this value. A value of 10 means that the largest free memory
block is 2^10 pages.
+config BOOT_TIME_MAX_ORDER
+ bool "Set maximum order of buddy allocator at boot time"
+ depends on SPARSEMEM_VMEMMAP && (ARCH_FORCE_MAX_ORDER != 0 || SET_MAX_ORDER != 0)
+ help
+ It enables users to set the maximum order of buddy allocator at system
+ boot time instead of a static MACRO set at compilation time. Systems with
+ a lot of memory might want to allocate large pages whereas it is much
+ less feasible and desirable for systems with less memory. This option
+ allows different systems to control the largest page they want to
+ allocate. By default, MAX_ORDER will be set to ARCH_FORCE_MAX_ORDER or
+ SET_MAX_ORDER, whichever is non-zero, when the boot time parameter is not
+ set. The maximum of MAX_ORDER is currently limited at 256.
+
config HAVE_MEMBLOCK_PHYS_MAP
bool
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 941a94bb8cf0..4c4d68da1922 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1581,7 +1581,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
order = pindex_to_order(pindex);
nr_pages = 1 << order;
- BUILD_BUG_ON(MAX_ORDER >= (1<<NR_PCP_ORDER_WIDTH));
+ BUILD_BUG_ON(MIN_MAX_ORDER >= (1<<NR_PCP_ORDER_WIDTH));
do {
int mt;
@@ -9679,3 +9679,35 @@ bool has_managed_dma(void)
return false;
}
#endif /* CONFIG_ZONE_DMA */
+
+#ifdef CONFIG_BOOT_TIME_MAX_ORDER
+int buddy_alloc_max_order = MIN_MAX_ORDER;
+EXPORT_SYMBOL(buddy_alloc_max_order);
+
+static int __init buddy_alloc_set(char *val)
+{
+ int ret;
+ unsigned long max_order;
+
+ ret = kstrtoul(val, 10, &max_order);
+
+ if (ret < 0)
+ return -EINVAL;
+
+ /*
+ * max_order is also limited at below locations:
+ * 1. scan_control in mm/vmscan.c uses s8 field for order, max_order cannot
+ * be bigger than S8_MAX before the field is changed.
+ * 2. free_pcppages_bulk has max_order upper limit.
+ */
+ if (max_order > MIN_MAX_ORDER && max_order <= S8_MAX &&
+ max_order <= (1<<NR_PCP_ORDER_WIDTH))
+ buddy_alloc_max_order = max_order;
+ else
+ buddy_alloc_max_order = MIN_MAX_ORDER;
+
+ return 0;
+}
+
+early_param("buddy_alloc_max_order", buddy_alloc_set);
+#endif /* CONFIG_BOOT_TIME_MAX_ORDER */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 06eeeae038dd..9d4fde8705d9 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3816,7 +3816,6 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
* scan_control uses s8 fields for order, priority, and reclaim_idx.
* Confirm they are large enough for max values.
*/
- BUILD_BUG_ON(MAX_ORDER > S8_MAX);
BUILD_BUG_ON(DEF_PRIORITY > S8_MAX);
BUILD_BUG_ON(MAX_NR_ZONES > S8_MAX);
--
2.35.1
Powered by blists - more mailing lists