[<prev] [next>] [day] [month] [year] [list]
Message-Id: <20241126095138.1832464-1-claudiu.beznea.uj@bp.renesas.com>
Date: Tue, 26 Nov 2024 11:51:38 +0200
From: Claudiu <claudiu.beznea@...on.dev>
To: corbet@....net,
akpm@...ux-foundation.org,
thuth@...hat.com,
rostedt@...dmis.org,
paulmck@...nel.org,
xiongwei.song@...driver.com,
ying.huang@...el.com
Cc: linux-doc@...r.kernel.org,
linux-kernel@...r.kernel.org,
linux-mm@...ck.org,
claudiu.beznea@...on.dev,
geert+renesas@...der.be,
wsa+renesas@...g-engineering.com,
Claudiu Beznea <claudiu.beznea.uj@...renesas.com>
Subject: [RFC PATCH] mm: page_alloc: Add kernel parameter to select maximum PCP batch scale number
From: Claudiu Beznea <claudiu.beznea.uj@...renesas.com>
Commit 52166607ecc9 ("mm: restrict the pcp batch scale factor to avoid
too long latency") introduced default PCP (Per-CPU Pageset) batch size as
a configuration flag. The configuration flag is CONFIG_PCP_BATCH_SCALE_MAX.
The ARM64 defconfig has CONFIG_PCP_BATCH_SCALE_MAX=5. This defconfig
is used by a high range of SoCs.
The Renesas RZ/G3S SoC is a single CPU SoC, with L1$ (I-cache 32Kbytes,
D-cache 32 Kbytes), L3$ (256 Kbytes), but no L2$. It is currently used in
a configuration with 1 GiB RAM size. In this configuration, starting with
commit 52166607ecc9 ("mm: restrict the pcp batch scale factor to avoid too
long latency") the "bonnie++ -d /mnt -u root" benchmark takes ~14 minutes
while previously it took ~10 minutes. The /mnt directory is mounted on SD
card. Same behavior is reproduced on similar Renesas single core devices
(e.g., Renesas RZ/G2UL).
Add a new kernel parameter to allow systems like Renesas RZ/G3S to
continue have the same performance numbers with the default mainline
ARM64 config. With pcp_batch_scale_max=5 (the default value) the bonnie++
benchmark takes ~14 minutes while with pcp_batch_scale_max=0 it takes
~10 minutes.
Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@...renesas.com>
---
.../admin-guide/kernel-parameters.txt | 6 +++++
mm/page_alloc.c | 26 ++++++++++++++-----
2 files changed, 26 insertions(+), 6 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index e7bfe1bde49e..ce745ea78470 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4716,6 +4716,12 @@
for debug and development, but should not be
needed on a platform with proper driver support.
+ pcp_batch_scale_max=n
+ Format: <integer>
+ Range: 0,6 : number
+ Default : CONFIG_PCP_BATCH_SCALE_MAX
+ Used for setting the scale number for PCP batch scale algorithm.
+
pdcchassis= [PARISC,HW] Disable/Enable PDC Chassis Status codes at
boot time.
Format: { 0 | 1 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bc55d39eb372..ef1d37cefb43 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -163,6 +163,20 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
#define pcp_spin_unlock(ptr) \
pcpu_spin_unlock(lock, ptr)
+static unsigned int pcp_batch_scale_max = CONFIG_PCP_BATCH_SCALE_MAX;
+#define MAX_PCP_BATCH 6
+
+static int __init setup_pcp_batch_scale_max(char *str)
+{
+ get_option(&str, (unsigned int *)&pcp_batch_scale_max);
+
+ if (pcp_batch_scale_max > MAX_PCP_BATCH)
+ pcp_batch_scale_max = MAX_PCP_BATCH;
+
+ return 1;
+}
+__setup("pcp_batch_scale_max=", setup_pcp_batch_scale_max);
+
#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
DEFINE_PER_CPU(int, numa_node);
EXPORT_PER_CPU_SYMBOL(numa_node);
@@ -2362,7 +2376,7 @@ int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp)
* control latency. This caps pcp->high decrement too.
*/
if (pcp->high > high_min) {
- pcp->high = max3(pcp->count - (batch << CONFIG_PCP_BATCH_SCALE_MAX),
+ pcp->high = max3(pcp->count - (batch << pcp_batch_scale_max),
pcp->high - (pcp->high >> 3), high_min);
if (pcp->high > high_min)
todo++;
@@ -2412,7 +2426,7 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
count = pcp->count;
if (count) {
int to_drain = min(count,
- pcp->batch << CONFIG_PCP_BATCH_SCALE_MAX);
+ pcp->batch << pcp_batch_scale_max);
free_pcppages_bulk(zone, to_drain, pcp, 0);
count -= to_drain;
@@ -2540,7 +2554,7 @@ static int nr_pcp_free(struct per_cpu_pages *pcp, int batch, int high, bool free
/* Free as much as possible if batch freeing high-order pages. */
if (unlikely(free_high))
- return min(pcp->count, batch << CONFIG_PCP_BATCH_SCALE_MAX);
+ return min(pcp->count, batch << pcp_batch_scale_max);
/* Check for PCP disabled or boot pageset */
if (unlikely(high < batch))
@@ -2572,7 +2586,7 @@ static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone,
return 0;
if (unlikely(free_high)) {
- pcp->high = max(high - (batch << CONFIG_PCP_BATCH_SCALE_MAX),
+ pcp->high = max(high - (batch << pcp_batch_scale_max),
high_min);
return 0;
}
@@ -2642,7 +2656,7 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,
} else if (pcp->flags & PCPF_PREV_FREE_HIGH_ORDER) {
pcp->flags &= ~PCPF_PREV_FREE_HIGH_ORDER;
}
- if (pcp->free_count < (batch << CONFIG_PCP_BATCH_SCALE_MAX))
+ if (pcp->free_count < (batch << pcp_batch_scale_max))
pcp->free_count += (1 << order);
high = nr_pcp_high(pcp, zone, batch, free_high);
if (pcp->count >= high) {
@@ -2984,7 +2998,7 @@ static int nr_pcp_alloc(struct per_cpu_pages *pcp, struct zone *zone, int order)
* subsequent allocation of order-0 pages without any freeing.
*/
if (batch <= max_nr_alloc &&
- pcp->alloc_factor < CONFIG_PCP_BATCH_SCALE_MAX)
+ pcp->alloc_factor < pcp_batch_scale_max)
pcp->alloc_factor++;
batch = min(batch, max_nr_alloc);
}
--
2.39.2
Powered by blists - more mailing lists