lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 17 Aug 2022 10:42:40 +0300
From:   Vasily Averin <vvs@...nvz.org>
To:     Roman Gushchin <roman.gushchin@...ux.dev>, mkoutny@...e.com,
        tj@...nel.org
Cc:     gregkh@...uxfoundation.org, hannes@...xchg.org, kernel@...nvz.org,
        linux-kernel@...r.kernel.org, mhocko@...e.com, shakeelb@...gle.com,
        songmuchun@...edance.com, viro@...iv.linux.org.uk
Subject: [RFC PATCH] memcg: adjust memcg for new cgroup allocations

This is first of patches announced here:
https://lore.kernel.org/all/62188f37-f816-08e9-cdd5-8df23131746d@openvz.org/
---
Usually accounted allocations use memory cgroup of current process.
In case of cgroup this looks incorrect: memory cgroup can be created
both from the host and from inside container.  At the same time
it is intuitively expected that these both operations should lead
to the same result.
This issue was addressed by Roman Gushchin in commit 3e38e0aaca9e
"mm: memcg: charge memcg percpu memory to the parent cgroup",
He adjusted memcg for allocations called inside mem_cgroup_css_alloc.

However, now we want to enable accounting for some other cgroup-related
resources called from cgroup_mkdir. We would like to guarantee that
all new accounted allocation will be charged to the same memory cgroup.

This patch moves memcg adjustment from mem_cgroup_css_alloc() to its
callers: cgroup_mkdir() and cgroup_apply_control(). In own turn this
requires to create new proxy function mem_cgroup_from_cgroup().

Signed-off-by: Vasily Averin <vvs@...nvz.org>
---
 include/linux/memcontrol.h | 18 ++++++++++++++++++
 kernel/cgroup/cgroup.c     | 22 ++++++++++++++++------
 mm/memcontrol.c            |  4 +---
 3 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 4d31ce55b1c0..342426d1edbf 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1813,6 +1813,19 @@ static inline struct mem_cgroup *mem_cgroup_or_root(struct mem_cgroup *memcg)
 {
 	return memcg ? memcg : root_mem_cgroup;
 }
+
+static inline struct mem_cgroup *mem_cgroup_from_cgroup(struct cgroup *cgroup)
+{
+	struct cgroup_subsys_state *css;
+	struct mem_cgroup *memcg = NULL;
+
+	css = cgroup_get_e_css(cgroup, &memory_cgrp_subsys);
+
+	if (css)
+		memcg = container_of(css, struct mem_cgroup, css);
+
+	return mem_cgroup_or_root(memcg);
+}
 #else
 static inline bool mem_cgroup_kmem_disabled(void)
 {
@@ -1878,6 +1891,11 @@ static inline struct mem_cgroup *mem_cgroup_or_root(struct mem_cgroup *memcg)
 {
 	return NULL;
 }
+
+static inline struct mem_cgroup *mem_cgroup_from_cgroup(struct cgroup *cgroup)
+{
+	return NULL;
+}
 #endif /* CONFIG_MEMCG_KMEM */
 
 #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index ffaccd6373f1..544d93a8878f 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3247,13 +3247,16 @@ static void cgroup_apply_control_disable(struct cgroup *cgrp)
  */
 static int cgroup_apply_control(struct cgroup *cgrp)
 {
+	struct mem_cgroup *memcg, *old_memcg;
 	int ret;
 
 	cgroup_propagate_control(cgrp);
 
+	memcg = mem_cgroup_from_cgroup(cgrp);
+	old_memcg = set_active_memcg(memcg);
 	ret = cgroup_apply_control_enable(cgrp);
 	if (ret)
-		return ret;
+		goto out_memcg;
 
 	/*
 	 * At this point, cgroup_e_css_by_mask() results reflect the new csses
@@ -3261,10 +3264,11 @@ static int cgroup_apply_control(struct cgroup *cgrp)
 	 * css associations of all tasks in the subtree.
 	 */
 	ret = cgroup_update_dfl_csses(cgrp);
-	if (ret)
-		return ret;
 
-	return 0;
+out_memcg:
+	set_active_memcg(old_memcg);
+	mem_cgroup_put(memcg);
+	return ret;
 }
 
 /**
@@ -5532,6 +5536,7 @@ static bool cgroup_check_hierarchy_limits(struct cgroup *parent)
 int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
 {
 	struct cgroup *parent, *cgrp;
+	struct mem_cgroup *memcg, *old_memcg;
 	int ret;
 
 	/* do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable */
@@ -5547,10 +5552,12 @@ int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
 		goto out_unlock;
 	}
 
+	memcg = mem_cgroup_from_cgroup(parent);
+	old_memcg = set_active_memcg(memcg);
 	cgrp = cgroup_create(parent, name, mode);
 	if (IS_ERR(cgrp)) {
 		ret = PTR_ERR(cgrp);
-		goto out_unlock;
+		goto out_memcg;
 	}
 
 	/*
@@ -5577,10 +5584,13 @@ int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
 	kernfs_activate(cgrp->kn);
 
 	ret = 0;
-	goto out_unlock;
+	goto out_memcg;
 
 out_destroy:
 	cgroup_destroy_locked(cgrp);
+out_memcg:
+	set_active_memcg(old_memcg);
+	mem_cgroup_put(memcg);
 out_unlock:
 	cgroup_kn_unlock(parent_kn);
 	return ret;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b69979c9ced5..e170c64e66e2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5239,11 +5239,9 @@ static struct cgroup_subsys_state * __ref
 mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 {
 	struct mem_cgroup *parent = mem_cgroup_from_css(parent_css);
-	struct mem_cgroup *memcg, *old_memcg;
+	struct mem_cgroup *memcg;
 
-	old_memcg = set_active_memcg(parent);
 	memcg = mem_cgroup_alloc();
-	set_active_memcg(old_memcg);
 	if (IS_ERR(memcg))
 		return ERR_CAST(memcg);
 
-- 
2.34.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ