linux-kernel - [PATCH v1 10/12] mm: convert MAX_ORDER sized static arrays to dynamic ones.

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220922011252.2266780-11-zi.yan@sent.com>
Date:   Wed, 21 Sep 2022 21:12:50 -0400
From:   Zi Yan <zi.yan@...t.com>
To:     linux-mm@...ck.org
Cc:     Zi Yan <ziy@...dia.com>, David Hildenbrand <david@...hat.com>,
        Matthew Wilcox <willy@...radead.org>,
        Vlastimil Babka <vbabka@...e.cz>,
        "Kirill A . Shutemov" <kirill.shutemov@...ux.intel.com>,
        Mike Kravetz <mike.kravetz@...cle.com>,
        John Hubbard <jhubbard@...dia.com>,
        Yang Shi <shy828301@...il.com>,
        David Rientjes <rientjes@...gle.com>,
        James Houghton <jthoughton@...gle.com>,
        Mike Rapoport <rppt@...nel.org>,
        Muchun Song <songmuchun@...edance.com>,
        Andrew Morton <akpm@...ux-foundation.org>,
        linux-kernel@...r.kernel.org
Subject: [PATCH v1 10/12] mm: convert MAX_ORDER sized static arrays to dynamic ones.

From: Zi Yan <ziy@...dia.com>

This prepares for the upcoming changes to make MAX_ORDER a boot time
parameter instead of compilation time constant. All static arrays with
MAX_ORDER size are converted to pointers and their memory is allocated
at runtime.

free_area array in struct zone is allocated using memblock_alloc_node()
at boot time and using kzalloc() when memory is hot-added.

Signed-off-by: Zi Yan <ziy@...dia.com>
Cc: Dave Young <dyoung@...hat.com>
Cc: Jonathan Corbet <corbet@....net>
Cc: Christian Koenig <christian.koenig@....com>
Cc: David Airlie <airlied@...ux.ie>
Cc: kexec@...ts.infradead.org
Cc: linux-doc@...r.kernel.org
Cc: dri-devel@...ts.freedesktop.org
Cc: linux-mm@...ck.org
Cc: linux-kernel@...r.kernel.org
---
 .../admin-guide/kdump/vmcoreinfo.rst          |  2 +-
 drivers/gpu/drm/ttm/ttm_device.c              |  7 ++-
 drivers/gpu/drm/ttm/ttm_pool.c                | 58 +++++++++++++++++--
 include/drm/ttm/ttm_pool.h                    |  4 +-
 include/linux/mmzone.h                        |  2 +-
 mm/kmsan/init.c                               | 12 +++-
 mm/page_alloc.c                               | 32 ++++++++--
 7 files changed, 98 insertions(+), 19 deletions(-)

diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst
index c572b5230fe0..a775462aa7c7 100644
--- a/Documentation/admin-guide/kdump/vmcoreinfo.rst
+++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst
@@ -172,7 +172,7 @@ variables.
 Offset of the free_list's member. This value is used to compute the number
 of free pages.
 
-Each zone has a free_area structure array called free_area[MAX_ORDER + 1].
+Each zone has a free_area structure array called free_area with length of MAX_ORDER + 1.
 The free_list represents a linked list of free page blocks.
 
 (list_head, next|prev)
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index e7147e304637..442a77bb5b4f 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -92,7 +92,9 @@ static int ttm_global_init(void)
 		>> PAGE_SHIFT;
 	num_dma32 = min(num_dma32, 2UL << (30 - PAGE_SHIFT));
 
-	ttm_pool_mgr_init(num_pages);
+	ret = ttm_pool_mgr_init(num_pages);
+	if (ret)
+		goto out;
 	ttm_tt_mgr_init(num_pages, num_dma32);
 
 	glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
@@ -218,7 +220,8 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
 	bdev->funcs = funcs;
 
 	ttm_sys_man_init(bdev);
-	ttm_pool_init(&bdev->pool, dev, use_dma_alloc, use_dma32);
+	if (ttm_pool_init(&bdev->pool, dev, use_dma_alloc, use_dma32))
+		return -ENOMEM;
 
 	bdev->vma_manager = vma_manager;
 	INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 85d19f425af6..d76f7d476421 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -64,11 +64,11 @@ module_param(page_pool_size, ulong, 0644);
 
 static atomic_long_t allocated_pages;
 
-static struct ttm_pool_type global_write_combined[MAX_ORDER + 1];
-static struct ttm_pool_type global_uncached[MAX_ORDER + 1];
+static struct ttm_pool_type *global_write_combined;
+static struct ttm_pool_type *global_uncached;
 
-static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER + 1];
-static struct ttm_pool_type global_dma32_uncached[MAX_ORDER + 1];
+static struct ttm_pool_type *global_dma32_write_combined;
+static struct ttm_pool_type *global_dma32_uncached;
 
 static spinlock_t shrinker_lock;
 static struct list_head shrinker_list;
@@ -493,8 +493,10 @@ EXPORT_SYMBOL(ttm_pool_free);
  * @use_dma32: true if GFP_DMA32 should be used
  *
  * Initialize the pool and its pool types.
+ *
+ * Returns: 0 on successe, negative error code otherwise
  */
-void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
+int ttm_pool_init(struct ttm_pool *pool, struct device *dev,
 		   bool use_dma_alloc, bool use_dma32)
 {
 	unsigned int i, j;
@@ -506,11 +508,30 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
 	pool->use_dma32 = use_dma32;
 
 	if (use_dma_alloc) {
-		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
+		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
+			pool->caching[i].orders =
+				kvcalloc(MAX_ORDER + 1, sizeof(struct ttm_pool_type),
+					GFP_KERNEL);
+			if (!pool->caching[i].orders) {
+				i--;
+				goto failed;
+			}
 			for (j = 0; j <= MAX_ORDER; ++j)
 				ttm_pool_type_init(&pool->caching[i].orders[j],
 						   pool, i, j);
+
+		}
+		return 0;
+
+failed:
+		for (; i >= 0; i--) {
+			for (j = 0; j <= MAX_ORDER; ++j)
+				ttm_pool_type_fini(&pool->caching[i].orders[j]);
+			kfree(pool->caching[i].orders);
+		}
+		return -ENOMEM;
 	}
+	return 0;
 }
 
 /**
@@ -701,6 +722,31 @@ int ttm_pool_mgr_init(unsigned long num_pages)
 	spin_lock_init(&shrinker_lock);
 	INIT_LIST_HEAD(&shrinker_list);
 
+	if (!global_write_combined) {
+		global_write_combined = kvcalloc(MAX_ORDER + 1, sizeof(struct ttm_pool_type),
+						GFP_KERNEL);
+		if (!global_write_combined)
+			return -ENOMEM;
+	}
+	if (!global_uncached) {
+		global_uncached = kvcalloc(MAX_ORDER + 1, sizeof(struct ttm_pool_type),
+					  GFP_KERNEL);
+		if (!global_uncached)
+			return -ENOMEM;
+	}
+	if (!global_dma32_write_combined) {
+		global_dma32_write_combined = kvcalloc(MAX_ORDER + 1, sizeof(struct ttm_pool_type),
+						      GFP_KERNEL);
+		if (!global_dma32_write_combined)
+			return -ENOMEM;
+	}
+	if (!global_dma32_uncached) {
+		global_dma32_uncached = kvcalloc(MAX_ORDER + 1, sizeof(struct ttm_pool_type),
+						GFP_KERNEL);
+		if (!global_dma32_uncached)
+			return -ENOMEM;
+	}
+
 	for (i = 0; i <= MAX_ORDER; ++i) {
 		ttm_pool_type_init(&global_write_combined[i], NULL,
 				   ttm_write_combined, i);
diff --git a/include/drm/ttm/ttm_pool.h b/include/drm/ttm/ttm_pool.h
index 8ce14f9d202a..f5ce60f629ae 100644
--- a/include/drm/ttm/ttm_pool.h
+++ b/include/drm/ttm/ttm_pool.h
@@ -72,7 +72,7 @@ struct ttm_pool {
 	bool use_dma32;
 
 	struct {
-		struct ttm_pool_type orders[MAX_ORDER + 1];
+		struct ttm_pool_type *orders;
 	} caching[TTM_NUM_CACHING_TYPES];
 };
 
@@ -80,7 +80,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
 		   struct ttm_operation_ctx *ctx);
 void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt);
 
-void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
+int ttm_pool_init(struct ttm_pool *pool, struct device *dev,
 		   bool use_dma_alloc, bool use_dma32);
 void ttm_pool_fini(struct ttm_pool *pool);
 
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index da5745fa15c3..032d347f36dd 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -822,7 +822,7 @@ struct zone {
 	CACHELINE_PADDING(_pad1_);
 
 	/* free areas of different sizes */
-	struct free_area	free_area[MAX_ORDER + 1];
+	struct free_area	*free_area;
 
 	/* zone flags, see below */
 	unsigned long		flags;
diff --git a/mm/kmsan/init.c b/mm/kmsan/init.c
index 6c0834274316..d3777146e7c4 100644
--- a/mm/kmsan/init.c
+++ b/mm/kmsan/init.c
@@ -96,7 +96,7 @@ void __init kmsan_init_shadow(void)
 struct metadata_page_pair {
 	struct page *shadow, *origin;
 };
-static struct metadata_page_pair held_back[MAX_ORDER + 1] __initdata;
+static struct metadata_page_pair *held_back __initdata;
 
 /*
  * Eager metadata allocation. When the memblock allocator is freeing pages to
@@ -115,6 +115,16 @@ bool kmsan_memblock_free_pages(struct page *page, unsigned int order)
 {
 	struct page *shadow, *origin;
 
+	if (!held_back) {
+		held_back = memblock_alloc((MAX_ORDER + 1) * sizeof(struct metadata_page_pair),
+				sizeof(struct metadata_page_pair));
+		/* held_back cannot be allocated, kmsan will not take the page */
+		if (!held_back) {
+			WARN_ONCE(1, "held_back array cannot be allocated, kmsan will not work");
+			return true;
+		}
+	}
+
 	if (!held_back[order].shadow) {
 		held_back[order].shadow = page;
 		return false;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e3af87d89ebf..ba7c284ba3d3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6249,13 +6249,23 @@ void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_i
 
 	for_each_populated_zone(zone) {
 		unsigned int order;
-		unsigned long nr[MAX_ORDER + 1], flags, total = 0;
-		unsigned char types[MAX_ORDER + 1];
+		unsigned long *nr, flags, total = 0;
+		unsigned char *types;
 
 		if (zone_idx(zone) > max_zone_idx)
 			continue;
 		if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
 			continue;
+
+		nr = kmalloc_array(MAX_ORDER + 1, sizeof(unsigned long), GFP_KERNEL);
+		if (!nr)
+			break;
+		types = kmalloc_array(MAX_ORDER + 1, sizeof(unsigned char), GFP_KERNEL);
+		if (!types) {
+			kfree(nr);
+			break;
+		}
+
 		show_node(zone);
 		printk(KERN_CONT "%s: ", zone->name);
 
@@ -7710,8 +7720,8 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
 	lruvec_init(&pgdat->__lruvec);
 }
 
-static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid,
-							unsigned long remaining_pages)
+static void __init zone_init_internals(struct zone *zone, enum zone_type idx, int nid,
+					unsigned long remaining_pages, bool hotplug)
 {
 	atomic_long_set(&zone->managed_pages, remaining_pages);
 	zone_set_nid(zone, nid);
@@ -7720,6 +7730,16 @@ static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx,
 	spin_lock_init(&zone->lock);
 	zone_seqlock_init(zone);
 	zone_pcp_init(zone);
+	if (hotplug)
+		zone->free_area =
+			kcalloc_node(MAX_ORDER + 1, sizeof(struct free_area),
+				     GFP_KERNEL, nid);
+	else
+		zone->free_area =
+			memblock_alloc_node(sizeof(struct free_area) * (MAX_ORDER + 1),
+					    sizeof(struct free_area), nid);
+	BUG_ON(!zone->free_area);
+
 }
 
 /*
@@ -7758,7 +7778,7 @@ void __ref free_area_init_core_hotplug(struct pglist_data *pgdat)
 	}
 
 	for (z = 0; z < MAX_NR_ZONES; z++)
-		zone_init_internals(&pgdat->node_zones[z], z, nid, 0);
+		zone_init_internals(&pgdat->node_zones[z], z, nid, 0, true);
 }
 #endif
 
@@ -7821,7 +7841,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat)
 		 * when the bootmem allocator frees pages into the buddy system.
 		 * And all highmem pages will be managed by the buddy system.
 		 */
-		zone_init_internals(zone, j, nid, freesize);
+		zone_init_internals(zone, j, nid, freesize, false);
 
 		if (!size)
 			continue;
-- 
2.35.1