lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Fri,  3 May 2024 13:18:34 -0700
From: Roman Gushchin <roman.gushchin@...ux.dev>
To: Andrew Morton <akpm@...ux-foundation.org>
Cc: Muchun Song <muchun.song@...ux.dev>,
	Johannes Weiner <hannes@...xchg.org>,
	Michal Hocko <mhocko@...nel.org>,
	Shakeel Butt <shakeel.butt@...ux.dev>,
	Frank van der Linden <fvdl@...gle.com>,
	linux-mm@...ck.org,
	linux-kernel@...r.kernel.org,
	Roman Gushchin <roman.gushchin@...ux.dev>
Subject: [PATCH v1 3/4] mm: memcg: don't call propagate_protected_usage() needlessly

Memory protection (min/low) requires a constant tracking of
the usage of protected memory. propagate_protected_usage() is called
each time and does a number of operations even in cases when
the actual memory protection functionality is not supported
(e.g. hugetlb cgroups or memcg swap counters). It's an obvious
inefficiency, which can be addressed by calling
propagate_protected_usage() optionally and only for the right
counter type. It eliminates a number of operations from hot paths.

Signed-off-by: Roman Gushchin <roman.gushchin@...ux.dev>
---
 include/linux/page_counter.h |  8 +++++++-
 mm/hugetlb_cgroup.c          |  2 +-
 mm/memcontrol.c              |  4 ++--
 mm/page_counter.c            | 15 ++++++++++++---
 4 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index ae6cc080b78b..5d963f54fcb8 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -62,6 +62,7 @@ struct page_counter {
 	/* Keep all the read most fields in a separete cacheline. */
 	CACHELINE_PADDING(_pad2_);
 
+	bool protection_support;
 	unsigned long min;
 	unsigned long low;
 	unsigned long high[__MCT_NR_ITEMS];
@@ -75,8 +76,12 @@ struct page_counter {
 #define PAGE_COUNTER_MAX (LONG_MAX / PAGE_SIZE)
 #endif
 
+/*
+ * Protection is supported only for the first counter (with id 0).
+ */
 static inline void page_counter_init(struct page_counter *counter,
-				     struct page_counter *parent)
+				     struct page_counter *parent,
+				     bool protection_support)
 {
 	int i;
 
@@ -86,6 +91,7 @@ static inline void page_counter_init(struct page_counter *counter,
 	}
 
 	counter->parent = parent;
+	counter->protection_support = protection_support;
 }
 
 static inline unsigned long page_counter_read(struct page_counter *counter,
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 069c7f164dc5..81cb78d0714f 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -79,7 +79,7 @@ static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
 	int idx;
 
 	page_counter_init(&h_cgroup->res,
-			  parent_h_cgroup ? &parent_h_cgroup->res : NULL);
+			  parent_h_cgroup ? &parent_h_cgroup->res : NULL, false);
 
 	for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) {
 		unsigned long limit;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f4511506ef1f..46becae5ff99 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5761,11 +5761,11 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 	if (parent) {
 		WRITE_ONCE(memcg->swappiness, mem_cgroup_swappiness(parent));
 		WRITE_ONCE(memcg->oom_kill_disable, READ_ONCE(parent->oom_kill_disable));
-		page_counter_init(&memcg->memory, &parent->memory);
+		page_counter_init(&memcg->memory, &parent->memory, true);
 	} else {
 		init_memcg_stats();
 		init_memcg_events();
-		page_counter_init(&memcg->memory, NULL);
+		page_counter_init(&memcg->memory, NULL, true);
 		root_mem_cgroup = memcg;
 		return &memcg->css;
 	}
diff --git a/mm/page_counter.c b/mm/page_counter.c
index b6ca3adbc226..5a27e3141ff3 100644
--- a/mm/page_counter.c
+++ b/mm/page_counter.c
@@ -60,7 +60,8 @@ void page_counter_cancel(struct page_counter *counter,
 		new = 0;
 		atomic_long_set(&counter->usage[id], new);
 	}
-	propagate_protected_usage(counter, new);
+	if (counter->protection_support && id == 0)
+		propagate_protected_usage(counter, new);
 }
 
 /**
@@ -76,12 +77,14 @@ void page_counter_charge(struct page_counter *counter,
 			 unsigned long nr_pages)
 {
 	struct page_counter *c;
+	bool track_protection = counter->protection_support && (id == 0);
 
 	for (c = counter; c; c = c->parent) {
 		long new;
 
 		new = atomic_long_add_return(nr_pages, &c->usage[id]);
-		propagate_protected_usage(c, new);
+		if (track_protection)
+			propagate_protected_usage(c, new);
 		/*
 		 * This is indeed racy, but we can live with some
 		 * inaccuracy in the watermark.
@@ -107,6 +110,7 @@ bool page_counter_try_charge(struct page_counter *counter,
 			     struct page_counter **fail)
 {
 	struct page_counter *c;
+	bool track_protection = counter->protection_support && (id == 0);
 
 	for (c = counter; c; c = c->parent) {
 		long new;
@@ -136,7 +140,8 @@ bool page_counter_try_charge(struct page_counter *counter,
 			*fail = c;
 			goto failed;
 		}
-		propagate_protected_usage(c, new);
+		if (track_protection)
+			propagate_protected_usage(c, new);
 		/*
 		 * Just like with failcnt, we can live with some
 		 * inaccuracy in the watermark.
@@ -226,6 +231,8 @@ void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages)
 {
 	struct page_counter *c;
 
+	WARN_ON_ONCE(!counter->protection_support);
+
 	WRITE_ONCE(counter->min, nr_pages);
 
 	for (c = counter; c; c = c->parent)
@@ -243,6 +250,8 @@ void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages)
 {
 	struct page_counter *c;
 
+	WARN_ON_ONCE(!counter->protection_support);
+
 	WRITE_ONCE(counter->low, nr_pages);
 
 	for (c = counter; c; c = c->parent)
-- 
2.43.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ