lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20251008115658.719006-1-stdcalllevi@yandex-team.ru>
Date: Wed,  8 Oct 2025 11:56:58 +0000
From: Kirill Martynov <stdcalllevi@...dex-team.ru>
To: hannes@...xchg.org,
	surenb@...gle.com
Cc: peterz@...radead.org,
	mingo@...hat.com,
	juri.lelli@...hat.com,
	vincent.guittot@...aro.org,
	dietmar.eggemann@....com,
	linux-kernel@...r.kernel.org,
	Kirill Martynov <stdcalllevi@...dex-team.ru>
Subject: [PATCH] sched/psi: add "abs" pressure type for memory resource

Current PSI memory pressure metrics ("some" and "full") are normalized
against the number of non-idle tasks in the system. This means that
reported stall ratios and trigger behavior depend on overall system load.
When the system is mostly idle, even small stalls can appear as high
pressure, while under heavy load the same stalls may look negligible.

In some use cases this normalization is not helpful. Userspace components
that react early to memory pressure, for example by adjusting memory
reserves or throttling background activity, need a signal that reflects
the actual stall time regardless of task count.

This change introduces a new pressure type for memory called
"abs" (absolute). Unlike "some" and "full", the "abs" calculation
in collect_percpu_times() skips non-idle weighting when computing deltas,
providing a load-independent measure of memory stalls.

The new metric allows detection of early memory pressure, which can be
useful for proactive memory management.

Signed-off-by: Kirill Martynov <stdcalllevi@...dex-team.ru>
---
 include/linux/psi_types.h |  1 +
 kernel/sched/psi.c        | 36 ++++++++++++++++++++++++++++++++----
 2 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
index dd10c22299ab..65acd58f6766 100644
--- a/include/linux/psi_types.h
+++ b/include/linux/psi_types.h
@@ -61,6 +61,7 @@ enum psi_states {
 	PSI_MEM_FULL,
 	PSI_CPU_SOME,
 	PSI_CPU_FULL,
+	PSI_MEM_ABS,
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 	PSI_IRQ_FULL,
 #endif
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index 59fdb7ebbf22..4da28df19bf5 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -252,6 +252,7 @@ static u32 test_states(unsigned int *tasks, u32 state_mask)
 
 	if (tasks[NR_MEMSTALL]) {
 		state_mask |= BIT(PSI_MEM_SOME);
+		state_mask |= BIT(PSI_MEM_ABS);
 		if (tasks[NR_RUNNING] == tasks[NR_MEMSTALL_RUNNING])
 			state_mask |= BIT(PSI_MEM_FULL);
 	}
@@ -389,8 +390,12 @@ static void collect_percpu_times(struct psi_group *group,
 		nonidle = nsecs_to_jiffies(times[PSI_NONIDLE]);
 		nonidle_total += nonidle;
 
-		for (s = 0; s < PSI_NONIDLE; s++)
-			deltas[s] += (u64)times[s] * nonidle;
+		for (s = 0; s < PSI_NONIDLE; s++) {
+			if (s == PSI_MEM_ABS)
+				deltas[s] += (u64)times[s];
+			else
+				deltas[s] += (u64)times[s] * nonidle;
+		}
 	}
 
 	/*
@@ -406,9 +411,13 @@ static void collect_percpu_times(struct psi_group *group,
 	 */
 
 	/* total= */
-	for (s = 0; s < NR_PSI_STATES - 1; s++)
-		group->total[aggregator][s] +=
+	for (s = 0; s < NR_PSI_STATES - 1; s++) {
+		if (s == PSI_MEM_ABS)
+			group->total[aggregator][s] += deltas[s];
+		else
+			group->total[aggregator][s] +=
 				div_u64(deltas[s], max(nonidle_total, 1UL));
+	}
 
 	if (pchanged_states)
 		*pchanged_states = changed_states;
@@ -780,6 +789,10 @@ static void record_times(struct psi_group_cpu *groupc, u64 now)
 			groupc->times[PSI_MEM_FULL] += delta;
 	}
 
+	if (groupc->state_mask & (1 << PSI_MEM_ABS)) {
+		groupc->times[PSI_MEM_ABS] += delta;
+	}
+
 	if (groupc->state_mask & (1 << PSI_CPU_SOME)) {
 		groupc->times[PSI_CPU_SOME] += delta;
 		if (groupc->state_mask & (1 << PSI_CPU_FULL))
@@ -1289,6 +1302,19 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
 			   total);
 	}
 
+	if (res == PSI_MEM) {
+		unsigned long *avg = group->avg[PSI_MEM_ABS];
+		u64 total = 0;
+
+		total = div_u64(group->total[PSI_AVGS][PSI_MEM_ABS],
+				NSEC_PER_USEC);
+		seq_printf(m, "abs avg10=%lu.%02lu avg60=%lu.%02lu avg300=%lu.%02lu total=%llu\n",
+			   LOAD_INT(avg[0]), LOAD_FRAC(avg[0]),
+			   LOAD_INT(avg[1]), LOAD_FRAC(avg[1]),
+			   LOAD_INT(avg[2]), LOAD_FRAC(avg[2]),
+			   total);
+	}
+
 	return 0;
 }
 
@@ -1315,6 +1341,8 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
 		state = PSI_IO_SOME + res * 2;
 	else if (sscanf(buf, "full %u %u", &threshold_us, &window_us) == 2)
 		state = PSI_IO_FULL + res * 2;
+	else if (res == PSI_MEM && sscanf(buf, "all %u %u", &threshold_us, &window_us) == 2)
+		state = PSI_MEM_ABS;
 	else
 		return ERR_PTR(-EINVAL);
 
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ