lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Mon, 22 Apr 2024 22:18:24 -0700
From: Shakeel Butt <shakeel.butt@...ux.dev>
To: Andrew Morton <akpm@...ux-foundation.org>,
	Johannes Weiner <hannes@...xchg.org>,
	Michal Hocko <mhocko@...nel.org>,
	Roman Gushchin <roman.gushchin@...ux.dev>,
	Muchun Song <muchun.song@...ux.dev>
Cc: linux-mm@...ck.org,
	linux-kernel@...r.kernel.org
Subject: [PATCH 2/4] memcg: reduce memory for the lruvec and memcg stats

At the moment, the amount of memory allocated for stats related structs
in the mem_cgroup corresponds to the size of enum node_stat_item.
However not all fields in enum node_stat_item has corresponding memcg
stats. The fields of enum node_stat_item is sorted in such a way that
all the fields with corresponding memcg stats are at the start of the
enum node_stat_item. So, let's just make an explicit  boundary within
enum node_stat_item and use that boundary to allocate memory for stats
related structs of memcgs.

For a given x86_64 config, the size of stats with and without patch is:

structs size in bytes         w/o     with

struct lruvec_stats           1128     648
struct lruvec_stats_percpu     752     432
struct memcg_vmstats          1832    1352
struct memcg_vmstats_percpu   1280     960

The memory savings is further compounded by the fact that these structs
are allocated for each cpu and for node. To be precise, for each memcg,
the memory saved would be:

Memory saved = ((21 * 3 * NR_NODES) + (21 * 2 * NR_NODS * NR_CPUS) +
               (21 * 3) + (21 * 2 * NR_CPUS)) * sizeof(long)

Where 21 is the number of fields eliminated.

Signed-off-by: Shakeel Butt <shakeel.butt@...ux.dev>
---
 include/linux/memcontrol.h | 12 ++++++------
 include/linux/mmzone.h     |  8 ++++++--
 mm/memcontrol.c            |  5 ++++-
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9aba0d0462ca..d68db7a0e829 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -32,7 +32,7 @@ struct kmem_cache;
 
 /* Cgroup-specific page state, on top of universal node page state */
 enum memcg_stat_item {
-	MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
+	MEMCG_SWAP = NR_VM_NODE_MEMCG_STAT_ITEMS,
 	MEMCG_SOCK,
 	MEMCG_PERCPU_B,
 	MEMCG_VMALLOC,
@@ -92,21 +92,21 @@ struct mem_cgroup_reclaim_iter {
 
 struct lruvec_stats_percpu {
 	/* Local (CPU and cgroup) state */
-	long state[NR_VM_NODE_STAT_ITEMS];
+	long state[NR_VM_NODE_MEMCG_STAT_ITEMS];
 
 	/* Delta calculation for lockless upward propagation */
-	long state_prev[NR_VM_NODE_STAT_ITEMS];
+	long state_prev[NR_VM_NODE_MEMCG_STAT_ITEMS];
 };
 
 struct lruvec_stats {
 	/* Aggregated (CPU and subtree) state */
-	long state[NR_VM_NODE_STAT_ITEMS];
+	long state[NR_VM_NODE_MEMCG_STAT_ITEMS];
 
 	/* Non-hierarchical (CPU aggregated) state */
-	long state_local[NR_VM_NODE_STAT_ITEMS];
+	long state_local[NR_VM_NODE_MEMCG_STAT_ITEMS];
 
 	/* Pending child counts during tree propagation */
-	long state_pending[NR_VM_NODE_STAT_ITEMS];
+	long state_pending[NR_VM_NODE_MEMCG_STAT_ITEMS];
 };
 
 /*
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 989ca97402c6..59592f3c7d9b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -192,8 +192,12 @@ enum node_stat_item {
 	NR_SHMEM_THPS,
 	NR_FILE_THPS,
 	NR_ANON_THPS,
-	/* No memcg stats for the following fields. */
-	NR_SHMEM_PMDMAPPED,
+	/*
+	 * No memcg stats for the following fields. Please add stats which have
+	 * memcg counterpart above NR_VM_NODE_MEMCG_STAT_ITEMS.
+	 */
+	NR_VM_NODE_MEMCG_STAT_ITEMS,
+	NR_SHMEM_PMDMAPPED = NR_VM_NODE_MEMCG_STAT_ITEMS,
 	NR_FILE_PMDMAPPED,
 	NR_WRITEBACK_TEMP,	/* Writeback using temporary buffers */
 	NR_VMSCAN_WRITE,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 833d09c1d523..bb1bbf417a46 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1648,6 +1648,9 @@ static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
 {
 	int i;
 
+	/* Reduce by 1 for MEMCG_SWAP as that is not exposed in v2. */
+	BUILD_BUG_ON(ARRAY_SIZE(memory_stats) != MEMCG_NR_STAT - 1);
+
 	/*
 	 * Provide statistics on the state of the memory subsystem as
 	 * well as cumulative event counters that show past behavior.
@@ -5869,7 +5872,7 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
 
 		lstatc = per_cpu_ptr(pn->lruvec_stats_percpu, cpu);
 
-		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
+		for (i = 0; i < NR_VM_NODE_MEMCG_STAT_ITEMS; i++) {
 			delta = pn->lruvec_stats.state_pending[i];
 			if (delta)
 				pn->lruvec_stats.state_pending[i] = 0;
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ