lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250721203624.3807041-12-kuniyu@google.com>
Date: Mon, 21 Jul 2025 20:35:30 +0000
From: Kuniyuki Iwashima <kuniyu@...gle.com>
To: "David S. Miller" <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>, 
	Jakub Kicinski <kuba@...nel.org>, Neal Cardwell <ncardwell@...gle.com>, Paolo Abeni <pabeni@...hat.com>, 
	Willem de Bruijn <willemb@...gle.com>, Matthieu Baerts <matttbe@...nel.org>, 
	Mat Martineau <martineau@...nel.org>, Johannes Weiner <hannes@...xchg.org>, 
	Michal Hocko <mhocko@...nel.org>, Roman Gushchin <roman.gushchin@...ux.dev>, 
	Shakeel Butt <shakeel.butt@...ux.dev>, Andrew Morton <akpm@...ux-foundation.org>
Cc: Simon Horman <horms@...nel.org>, Geliang Tang <geliang@...nel.org>, 
	Muchun Song <muchun.song@...ux.dev>, Kuniyuki Iwashima <kuniyu@...gle.com>, 
	Kuniyuki Iwashima <kuni1840@...il.com>, netdev@...r.kernel.org, mptcp@...ts.linux.dev, 
	cgroups@...r.kernel.org, linux-mm@...ck.org
Subject: [PATCH v1 net-next 11/13] net-memcg: Add memory.socket_isolated knob.

Some networking protocols have their own global memory accounting,
and such memory is also charged to memcg as sock in memory.stat.

Such sockets are subject to the global limit, thus affected by a
noisy neighbour outside the cgroup.

We will decouple the global memory accounting if configured.

Let's add a per-memcg knob to control that.

The value will be saved in each socket when created and will
persist through the socket's lifetime.

Signed-off-by: Kuniyuki Iwashima <kuniyu@...gle.com>
---
 Documentation/admin-guide/cgroup-v2.rst | 16 +++++++++++
 include/linux/memcontrol.h              |  6 ++++
 include/net/sock.h                      |  3 ++
 mm/memcontrol.c                         | 37 +++++++++++++++++++++++++
 4 files changed, 62 insertions(+)

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index bd98ea3175ec1..2428707b7d27d 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1878,6 +1878,22 @@ The following nested keys are defined.
 	Shows pressure stall information for memory. See
 	:ref:`Documentation/accounting/psi.rst <psi>` for details.
 
+  memory.socket_isolated
+	A read-write single value file which exists on non-root cgroups.
+	The default value is "0".
+
+	Some networking protocols (e.g., TCP, UDP) implement their own memory
+	accounting for socket buffers.
+
+	This memory is also charged to a non-root cgroup as sock in memory.stat.
+
+	Since per-protocol limits such as /proc/sys/net/ipv4/tcp_mem and
+	/proc/sys/net/ipv4/udp_mem are global, memory allocation for socket
+	buffers may fail even when the cgroup has available memory.
+
+	Sockets created with socket_isolated set to 1 are no longer subject
+	to these global protocol limits.
+
 
 Usage Guidelines
 ~~~~~~~~~~~~~~~~
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 211712ec57d1a..7d5d43e3b49e6 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -226,6 +226,12 @@ struct mem_cgroup {
 	 */
 	bool oom_group;
 
+	/*
+	 * If set, MEMCG_SOCK memory is charged on memcg only,
+	 * otherwise, memcg and sk->sk_prot->memory_allocated.
+	 */
+	bool socket_isolated;
+
 	int swappiness;
 
 	/* memory.events and memory.events.local */
diff --git a/include/net/sock.h b/include/net/sock.h
index 16fe0e5afc587..5e8c73731531c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2597,6 +2597,9 @@ static inline gfp_t gfp_memcg_charge(void)
 }
 
 #ifdef CONFIG_MEMCG
+
+#define MEMCG_SOCK_ISOLATED	1UL
+
 static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk)
 {
 	return sk->sk_memcg;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d7f4e31f4e625..0a55c12a6679b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4645,6 +4645,37 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
 	return nbytes;
 }
 
+static int memory_socket_isolated_show(struct seq_file *m, void *v)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
+
+	seq_printf(m, "%d\n", READ_ONCE(memcg->socket_isolated));
+
+	return 0;
+}
+
+static ssize_t memory_socket_isolated_write(struct kernfs_open_file *of,
+					    char *buf, size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	int ret, socket_isolated;
+
+	buf = strstrip(buf);
+	if (!buf)
+		return -EINVAL;
+
+	ret = kstrtoint(buf, 0, &socket_isolated);
+	if (ret)
+		return ret;
+
+	if (socket_isolated != 0 && socket_isolated != MEMCG_SOCK_ISOLATED)
+		return -EINVAL;
+
+	WRITE_ONCE(memcg->socket_isolated, socket_isolated);
+
+	return nbytes;
+}
+
 static struct cftype memory_files[] = {
 	{
 		.name = "current",
@@ -4716,6 +4747,12 @@ static struct cftype memory_files[] = {
 		.flags = CFTYPE_NS_DELEGATABLE,
 		.write = memory_reclaim,
 	},
+	{
+		.name = "socket_isolated",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = memory_socket_isolated_show,
+		.write = memory_socket_isolated_write,
+	},
 	{ }	/* terminate */
 };
 
-- 
2.50.0.727.gbf7dc18ff4-goog


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ