lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <156336655979.2828.15196553724473875230.stgit@buzz>
Date:   Wed, 17 Jul 2019 15:29:19 +0300
From:   Konstantin Khlebnikov <khlebnikov@...dex-team.ru>
To:     linux-mm@...ck.org, linux-kernel@...r.kernel.org,
        Johannes Weiner <hannes@...xchg.org>
Cc:     Michal Hocko <mhocko@...nel.org>
Subject: [PATCH 2/2] mm/memcontrol: split local and nested atomic
 vmstats/vmevents counters

This is alternative solution for problem addressed in commit 815744d75152
("mm: memcontrol: don't batch updates of local VM stats and events").

Instead of adding second set of percpu counters which wastes memory and
slows down showing statistics in cgroup-v1 this patch use two arrays of
atomic counters: local and nested statistics.

Then update has the same amount of atomic operations: local update and
one nested for each parent cgroup. Readers of hierarchical statistics
have to sum two atomics which isn't a big deal.

All updates are still batched using one set of percpu counters.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@...dex-team.ru>
---
 include/linux/memcontrol.h |   19 +++++++----------
 mm/memcontrol.c            |   48 +++++++++++++++++++-------------------------
 2 files changed, 29 insertions(+), 38 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 44c41462be33..4dd75d50c200 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -269,16 +269,16 @@ struct mem_cgroup {
 	atomic_t		moving_account;
 	struct task_struct	*move_lock_task;
 
-	/* Legacy local VM stats and events */
-	struct memcg_vmstats_percpu __percpu *vmstats_local;
-
 	/* Subtree VM stats and events (batched updates) */
 	struct memcg_vmstats_percpu __percpu *vmstats_percpu;
 
 	MEMCG_PADDING(_pad2_);
 
-	atomic_long_t		vmstats[MEMCG_NR_STAT];
-	atomic_long_t		vmevents[NR_VM_EVENT_ITEMS];
+	atomic_long_t		vmstats_local[MEMCG_NR_STAT];
+	atomic_long_t		vmstats_nested[MEMCG_NR_STAT];
+
+	atomic_long_t		vmevents_local[NR_VM_EVENT_ITEMS];
+	atomic_long_t		vmevents_nested[NR_VM_EVENT_ITEMS];
 
 	/* memory.events */
 	atomic_long_t		memory_events[MEMCG_NR_MEMORY_EVENTS];
@@ -557,7 +557,8 @@ void unlock_page_memcg(struct page *page);
  */
 static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
 {
-	long x = atomic_long_read(&memcg->vmstats[idx]);
+	long x = atomic_long_read(&memcg->vmstats_local[idx]) +
+		 atomic_long_read(&memcg->vmstats_nested[idx]);
 #ifdef CONFIG_SMP
 	if (x < 0)
 		x = 0;
@@ -572,11 +573,7 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
 static inline unsigned long memcg_page_state_local(struct mem_cgroup *memcg,
 						   int idx)
 {
-	long x = 0;
-	int cpu;
-
-	for_each_possible_cpu(cpu)
-		x += per_cpu(memcg->vmstats_local->stat[idx], cpu);
+	long x = atomic_long_read(&memcg->vmstats_local[idx]);
 #ifdef CONFIG_SMP
 	if (x < 0)
 		x = 0;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 06d33dfc4ec4..97debc8e4120 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -695,14 +695,13 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
 	if (mem_cgroup_disabled())
 		return;
 
-	__this_cpu_add(memcg->vmstats_local->stat[idx], val);
-
 	x = val + __this_cpu_read(memcg->vmstats_percpu->stat[idx]);
 	if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
 		struct mem_cgroup *mi;
 
-		for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
-			atomic_long_add(x, &mi->vmstats[idx]);
+		atomic_long_add(x, &memcg->vmstats_local[idx]);
+		for (mi = memcg; (mi = parent_mem_cgroup(mi)); )
+			atomic_long_add(x, &mi->vmstats_nested[idx]);
 		x = 0;
 	}
 	__this_cpu_write(memcg->vmstats_percpu->stat[idx], x);
@@ -777,14 +776,13 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
 	if (mem_cgroup_disabled())
 		return;
 
-	__this_cpu_add(memcg->vmstats_local->events[idx], count);
-
 	x = count + __this_cpu_read(memcg->vmstats_percpu->events[idx]);
 	if (unlikely(x > MEMCG_CHARGE_BATCH)) {
 		struct mem_cgroup *mi;
 
-		for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
-			atomic_long_add(x, &mi->vmevents[idx]);
+		atomic_long_add(x, &memcg->vmevents_local[idx]);
+		for (mi = memcg; (mi = parent_mem_cgroup(mi)); )
+			atomic_long_add(x, &mi->vmevents_nested[idx]);
 		x = 0;
 	}
 	__this_cpu_write(memcg->vmstats_percpu->events[idx], x);
@@ -792,17 +790,13 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
 
 static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
 {
-	return atomic_long_read(&memcg->vmevents[event]);
+	return atomic_long_read(&memcg->vmevents_local[event]) +
+	       atomic_long_read(&memcg->vmevents_nested[event]);
 }
 
 static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)
 {
-	long x = 0;
-	int cpu;
-
-	for_each_possible_cpu(cpu)
-		x += per_cpu(memcg->vmstats_local->events[event], cpu);
-	return x;
+	return atomic_long_read(&memcg->vmevents_local[event]);
 }
 
 static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
@@ -2257,9 +2251,11 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
 			long x;
 
 			x = this_cpu_xchg(memcg->vmstats_percpu->stat[i], 0);
-			if (x)
-				for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
-					atomic_long_add(x, &mi->vmstats[i]);
+			if (x) {
+				atomic_long_add(x, &memcg->vmstats_local[i]);
+				for (mi = memcg; (mi = parent_mem_cgroup(mi)); )
+					atomic_long_add(x, &mi->vmstats_nested[i]);
+			}
 
 			if (i >= NR_VM_NODE_STAT_ITEMS)
 				continue;
@@ -2280,9 +2276,11 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
 			long x;
 
 			x = this_cpu_xchg(memcg->vmstats_percpu->events[i], 0);
-			if (x)
-				for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
-					atomic_long_add(x, &mi->vmevents[i]);
+			if (x) {
+				atomic_long_add(x, &memcg->vmevents_local[i]);
+				for (mi = memcg; (mi = parent_mem_cgroup(mi)); )
+					atomic_long_add(x, &mi->vmevents_nested[i]);
+			}
 		}
 	}
 
@@ -4085,7 +4083,8 @@ struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
  */
 static unsigned long memcg_exact_page_state(struct mem_cgroup *memcg, int idx)
 {
-	long x = atomic_long_read(&memcg->vmstats[idx]);
+	long x = atomic_long_read(&memcg->vmstats_local[idx]) +
+		 atomic_long_read(&memcg->vmstats_nested[idx]);
 	int cpu;
 
 	for_each_online_cpu(cpu)
@@ -4638,7 +4637,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
 	for_each_node(node)
 		free_mem_cgroup_per_node_info(memcg, node);
 	free_percpu(memcg->vmstats_percpu);
-	free_percpu(memcg->vmstats_local);
 	kfree(memcg);
 }
 
@@ -4667,10 +4665,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 	if (memcg->id.id < 0)
 		goto fail;
 
-	memcg->vmstats_local = alloc_percpu(struct memcg_vmstats_percpu);
-	if (!memcg->vmstats_local)
-		goto fail;
-
 	memcg->vmstats_percpu = alloc_percpu(struct memcg_vmstats_percpu);
 	if (!memcg->vmstats_percpu)
 		goto fail;

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ