lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 27 Aug 2020 10:52:14 -0700
From:   Roman Gushchin <guro@...com>
To:     <linux-mm@...ck.org>
CC:     Andrew Morton <akpm@...ux-foundation.org>,
        =Shakeel Butt <shakeelb@...gle.com>,
        Johannes Weiner <hannes@...xchg.org>,
        Michal Hocko <mhocko@...nel.org>, <kernel-team@...com>,
        <linux-kernel@...r.kernel.org>, Roman Gushchin <guro@...com>
Subject: [PATCH RFC 3/4] mm: kmem: prepare remote memcg charging infra for interrupt contexts

Remote memcg charging API uses current->active_memcg to store the
currently active memory cgroup, which overwrites the memory cgroup
of the current process. It works well for normal contexts, but doesn't
work for interrupt contexts: indeed, if an interrupt occurs during
the execution of a section with an active memcg set, all allocations
inside the interrupt will be charged to the active memcg set (given
that we'll enable accounting for allocations from an interrupt
context). But because the interrupt might have no relation to the
active memcg set outside, it's obviously wrong from the accounting
prospective.

To resolve this problem, let's add a global percpu int_active_memcg
variable, which will be used to store an active memory cgroup which
will be sued from interrupt contexts. set_active_memcg() will
transparently use current->active_memcg or int_active_memcg depending
on the context.

To make the read part simple and transparent for the caller, let's
introduce two new functions:
  - struct mem_cgroup *active_memcg(void),
  - struct mem_cgroup *get_active_memcg(void).

They are returning the active memcg if it's set, hiding all
implementation details: where to get it depending on the current context.

Signed-off-by: Roman Gushchin <guro@...com>
---
 include/linux/sched/mm.h | 13 +++++++++--
 mm/memcontrol.c          | 48 ++++++++++++++++++++++++++++------------
 2 files changed, 45 insertions(+), 16 deletions(-)

diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 4c69a4349ac1..030a1cf77b8a 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -304,6 +304,7 @@ static inline void memalloc_nocma_restore(unsigned int flags)
 #endif
 
 #ifdef CONFIG_MEMCG
+DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg);
 /**
  * set_active_memcg - Starts the remote memcg charging scope.
  * @memcg: memcg to charge.
@@ -318,8 +319,16 @@ static inline void memalloc_nocma_restore(unsigned int flags)
 static inline struct mem_cgroup *
 set_active_memcg(struct mem_cgroup *memcg)
 {
-	struct mem_cgroup *old = current->active_memcg;
-	current->active_memcg = memcg;
+	struct mem_cgroup *old;
+
+	if (in_interrupt()) {
+		old = this_cpu_read(int_active_memcg);
+		this_cpu_write(int_active_memcg, memcg);
+	} else {
+		old = current->active_memcg;
+		current->active_memcg = memcg;
+	}
+
 	return old;
 }
 #else
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 5d847257a639..a51a6066079e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -73,6 +73,9 @@ EXPORT_SYMBOL(memory_cgrp_subsys);
 
 struct mem_cgroup *root_mem_cgroup __read_mostly;
 
+/* Active memory cgroup to use from an interrupt context */
+DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
+
 /* Socket memory accounting disabled? */
 static bool cgroup_memory_nosocket;
 
@@ -1069,26 +1072,43 @@ struct mem_cgroup *get_mem_cgroup_from_page(struct page *page)
 }
 EXPORT_SYMBOL(get_mem_cgroup_from_page);
 
-/**
- * If current->active_memcg is non-NULL, do not fallback to current->mm->memcg.
- */
-static __always_inline struct mem_cgroup *get_mem_cgroup_from_current(void)
+static __always_inline struct mem_cgroup *active_memcg(void)
 {
-	if (memcg_kmem_bypass())
-		return NULL;
+	if (in_interrupt())
+		return this_cpu_read(int_active_memcg);
+	else
+		return current->active_memcg;
+}
 
-	if (unlikely(current->active_memcg)) {
-		struct mem_cgroup *memcg;
+static __always_inline struct mem_cgroup *get_active_memcg(void)
+{
+	struct mem_cgroup *memcg;
 
-		rcu_read_lock();
+	rcu_read_lock();
+	memcg = active_memcg();
+	if (memcg) {
 		/* current->active_memcg must hold a ref. */
-		if (WARN_ON_ONCE(!css_tryget(&current->active_memcg->css)))
+		if (WARN_ON_ONCE(!css_tryget(&memcg->css)))
 			memcg = root_mem_cgroup;
 		else
 			memcg = current->active_memcg;
-		rcu_read_unlock();
-		return memcg;
 	}
+	rcu_read_unlock();
+
+	return memcg;
+}
+
+/**
+ * If active memcg is set, do not fallback to current->mm->memcg.
+ */
+static __always_inline struct mem_cgroup *get_mem_cgroup_from_current(void)
+{
+	if (memcg_kmem_bypass())
+		return NULL;
+
+	if (unlikely(active_memcg()))
+		return get_active_memcg();
+
 	return get_mem_cgroup_from_mm(current->mm);
 }
 
@@ -2920,8 +2940,8 @@ __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
 		return NULL;
 
 	rcu_read_lock();
-	if (unlikely(current->active_memcg))
-		memcg = rcu_dereference(current->active_memcg);
+	if (unlikely(active_memcg()))
+		memcg = active_memcg();
 	else
 		memcg = mem_cgroup_from_task(current);
 
-- 
2.26.2

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ