lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250319064148.774406-5-jingxiangzeng.cas@gmail.com>
Date: Wed, 19 Mar 2025 14:41:47 +0800
From: Jingxiang Zeng <jingxiangzeng.cas@...il.com>
To: akpm@...ux-foundation.org
Cc: linux-mm@...ck.org,
	cgroups@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	hannes@...xchg.org,
	mhocko@...nel.org,
	roman.gushchin@...ux.dev,
	shakeel.butt@...ux.dev,
	muchun.song@...ux.dev,
	kasong@...cent.com,
	Zeng Jingxiang <linuszeng@...cent.com>
Subject: [RFC 4/5] mm/memcontrol: allow memsw account in cgroup v2

From: Zeng Jingxiang <linuszeng@...cent.com>

memsw account is a very useful knob for container memory
overcommitting: It's a great abstraction of the "expected total
memory usage" of a container, so containers can't allocate too
much memory using SWAP, but still be able to SWAP out.

For a simple example, with memsw.limit == memory.limit, containers
can't exceed their original memory limit, even with SWAP enabled, they
get OOM killed as how they used to, but the host is now able to
offload cold pages.

Similar ability seems absent with V2: With memory.swap.max == 0, the
host can't use SWAP to reclaim container memory at all. But with a
value larger than that, containers are able to overuse memory, causing
delayed OOM kill, thrashing, CPU/Memory usage ratio could be heavily
out of balance, especially with compress SWAP backends.

This patch restores the semantics of memory.swap.max to be consistent
with memory.memsw.limit_in_bytes and the semantics of
memory.swap.current to be consistent with memory.memsw.usage_in_bytes
when MEMSW_ACCOUNT_ON_DFL config or cgroup.memsw_account_on_dfl
startup parameter is enabled.

Signed-off-by: Zeng Jingxiang <linuszeng@...cent.com>
---
 mm/memcontrol-v1.c |  2 +-
 mm/memcontrol-v1.h |  4 +++-
 mm/memcontrol.c    | 29 +++++++++++++++++++----------
 3 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c
index c1feb3945350..3344d5e25822 100644
--- a/mm/memcontrol-v1.c
+++ b/mm/memcontrol-v1.c
@@ -1436,7 +1436,7 @@ void memcg1_oom_finish(struct mem_cgroup *memcg, bool locked)
 
 static DEFINE_MUTEX(memcg_max_mutex);
 
-static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
+int mem_cgroup_resize_max(struct mem_cgroup *memcg,
 				 unsigned long max, bool memsw)
 {
 	bool enlarge = false;
diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h
index 6358464bb416..7f7ef9f6d03e 100644
--- a/mm/memcontrol-v1.h
+++ b/mm/memcontrol-v1.h
@@ -36,10 +36,12 @@ struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg);
 /* Cgroup v1-specific declarations */
 #ifdef CONFIG_MEMCG_V1
 
+int mem_cgroup_resize_max(struct mem_cgroup *memcg,
+				 unsigned long max, bool memsw);
 /* Whether legacy memory+swap accounting is active */
 static inline bool do_memsw_account(void)
 {
-	return !cgroup_subsys_on_dfl(memory_cgrp_subsys);
+	return !cgroup_subsys_on_dfl(memory_cgrp_subsys) || do_memsw_account_on_dfl();
 }
 
 unsigned long memcg_events_local(struct mem_cgroup *memcg, int event);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 623ebf610946..d85699fa8a90 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5205,9 +5205,12 @@ static ssize_t swap_max_write(struct kernfs_open_file *of,
 	if (err)
 		return err;
 
-	xchg(&memcg->swap.max, max);
+	if (do_memsw_account_on_dfl())
+		err = mem_cgroup_resize_max(memcg, max, true);
+	else
+		xchg(&memcg->swap.max, max);
 
-	return nbytes;
+	return err ?: nbytes;
 }
 
 static int swap_events_show(struct seq_file *m, void *v)
@@ -5224,24 +5227,28 @@ static int swap_events_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct cftype swap_files[] = {
+static struct cftype swap_files_v1[] = {
 	{
 		.name = "swap.current",
 		.flags = CFTYPE_NOT_ON_ROOT,
 		.read_u64 = swap_current_read,
 	},
-	{
-		.name = "swap.high",
-		.flags = CFTYPE_NOT_ON_ROOT,
-		.seq_show = swap_high_show,
-		.write = swap_high_write,
-	},
 	{
 		.name = "swap.max",
 		.flags = CFTYPE_NOT_ON_ROOT,
 		.seq_show = swap_max_show,
 		.write = swap_max_write,
 	},
+	{ }	/* terminate */
+};
+
+static struct cftype swap_files[] = {
+	{
+		.name = "swap.high",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = swap_high_show,
+		.write = swap_high_write,
+	},
 	{
 		.name = "swap.max.effective",
 		.flags = CFTYPE_NOT_ON_ROOT,
@@ -5473,7 +5480,9 @@ static int __init mem_cgroup_swap_init(void)
 	if (mem_cgroup_disabled())
 		return 0;
 
-	WARN_ON(cgroup_add_dfl_cftypes(&memory_cgrp_subsys, swap_files));
+	WARN_ON(cgroup_add_dfl_cftypes(&memory_cgrp_subsys, swap_files_v1));
+	if (!do_memsw_account_on_dfl())
+		WARN_ON(cgroup_add_dfl_cftypes(&memory_cgrp_subsys, swap_files));
 #ifdef CONFIG_MEMCG_V1
 	WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, memsw_files));
 #endif
-- 
2.41.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ