linux-kernel - [RFC PATCH 01/12] mm/cma: add tunable for CMA fallback limit

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite for Android: free password hash cracker in your pocket

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <20250915195153.462039-2-fvdl@google.com>
Date: Mon, 15 Sep 2025 19:51:42 +0000
From: Frank van der Linden <fvdl@...gle.com>
To: akpm@...ux-foundation.org, muchun.song@...ux.dev, linux-mm@...ck.org, 
	linux-kernel@...r.kernel.org
Cc: hannes@...xchg.org, david@...hat.com, roman.gushchin@...ux.dev, 
	Frank van der Linden <fvdl@...gle.com>
Subject: [RFC PATCH 01/12] mm/cma: add tunable for CMA fallback limit

Add a tunable to experiment with the circumstances under which
movable allocations should use CMA pageblocks first, to avoid
false OOM conditions.

The limit is the percentage free memory which is being taken up
by CMA. If the amount of used memory in CMA pageblocks is above
this limit, CMA will be used first. So, 0 would mean always using
CMA first, and 100 means never use CMA first.

Currently the default is 50, which matches the existing behavior,
so there is no functional change.

Signed-off-by: Frank van der Linden <fvdl@...gle.com>
---
 include/linux/mm.h |  4 +++
 mm/page_alloc.c    | 84 ++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 74 insertions(+), 14 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1ae97a0b8ec7..313ab38dc398 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3253,6 +3253,10 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...);
 
 extern void setup_per_cpu_pageset(void);
 
+#ifdef CONFIG_CMA
+extern int cma_first_limit;
+#endif
+
 /* nommu.c */
 extern atomic_long_t mmap_pages_allocated;
 extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d1d037f97c5f..d3966d31c039 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2446,6 +2446,54 @@ enum rmqueue_mode {
 	RMQUEUE_STEAL,
 };
 
+#ifdef CONFIG_CMA
+/*
+ * The percentage of free CMA pages as part of the total number of free
+ * pages above which CMA is used first.
+ * 0 = always, 100 = never
+ */
+int cma_first_limit __read_mostly = 50;
+EXPORT_SYMBOL_GPL(cma_first_limit);
+
+/*
+ * Return values:
+ *
+ * -1 - never try CMA (!ALLOC_CMA or !IS_ENABLED(CONFIG_CMA))
+ *  0 - don't try CMA first
+ *  1 - try CMA first.
+ */
+static __always_inline int use_cma_first(struct zone *zone,
+					 unsigned int alloc_flags)
+{
+	unsigned long free_cma, free_pages, cma_percentage;
+
+	if (!(alloc_flags & ALLOC_CMA))
+		return -1;
+
+	free_cma = zone_page_state(zone, NR_FREE_CMA_PAGES);
+	if (!free_cma)
+		return -1;
+
+	if (!cma_first_limit)
+		return 1;
+
+	if (cma_first_limit == 100)
+		return 0;
+
+	free_pages = zone_page_state(zone, NR_FREE_PAGES);
+	if (!free_pages)
+		return 0;
+
+	cma_percentage = (free_cma * 100) / free_pages;
+	return (cma_percentage > cma_first_limit) ? 1 : 0;
+}
+#else
+static inline int use_cma_first(struct zone *zone, unsigned int alloc_flags)
+{
+	return -1;
+}
+#endif
+
 /*
  * Do the hard work of removing an element from the buddy allocator.
  * Call me with the zone->lock already held.
@@ -2455,20 +2503,13 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
 	  unsigned int alloc_flags, enum rmqueue_mode *mode)
 {
 	struct page *page;
+	int cma_first;
 
-	if (IS_ENABLED(CONFIG_CMA)) {
-		/*
-		 * Balance movable allocations between regular and CMA areas by
-		 * allocating from CMA when over half of the zone's free memory
-		 * is in the CMA area.
-		 */
-		if (alloc_flags & ALLOC_CMA &&
-		    zone_page_state(zone, NR_FREE_CMA_PAGES) >
-		    zone_page_state(zone, NR_FREE_PAGES) / 2) {
-			page = __rmqueue_cma_fallback(zone, order);
-			if (page)
-				return page;
-		}
+	cma_first = use_cma_first(zone, alloc_flags);
+	if (cma_first > 0) {
+		page = __rmqueue_cma_fallback(zone, order);
+		if (page)
+			return page;
 	}
 
 	/*
@@ -2487,7 +2528,11 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
 			return page;
 		fallthrough;
 	case RMQUEUE_CMA:
-		if (alloc_flags & ALLOC_CMA) {
+		/*
+		 * Try CMA if we should, and haven't done so yet,
+		 * which is indicated by cma_first == 0.
+		 */
+		if (cma_first == 0) {
 			page = __rmqueue_cma_fallback(zone, order);
 			if (page) {
 				*mode = RMQUEUE_CMA;
@@ -6672,6 +6717,17 @@ static const struct ctl_table page_alloc_sysctl_table[] = {
 		.extra2		= SYSCTL_ONE_HUNDRED,
 	},
 #endif
+#ifdef CONFIG_CMA
+	{
+		.procname	= "cma_first_limit",
+		.data		= &cma_first_limit,
+		.maxlen		= sizeof(cma_first_limit),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE_HUNDRED,
+	},
+#endif
 };
 
 void __init page_alloc_sysctl_init(void)
-- 
2.51.0.384.g4c02a37b29-goog