lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220709002858.129534-1-roman.gushchin@linux.dev>
Date:   Fri,  8 Jul 2022 17:28:58 -0700
From:   Roman Gushchin <roman.gushchin@...ux.dev>
To:     bpf@...r.kernel.org
Cc:     Shakeel Butt <shakeelb@...gle.com>,
        Alexei Starovoitov <ast@...nel.org>,
        linux-kernel@...r.kernel.org,
        Roman Gushchin <roman.gushchin@...ux.dev>
Subject: [PATCH bpf-next] bpf: reparent bpf maps on memcg offlining

The memory consumed by a mpf map is always accounted to the memory
cgroup of the process which created the map. The map can outlive
the memory cgroup if it's used by processes in other cgroups or
is pinned on bpffs. In this case the map pins the original cgroup
in the dying state.

For other types of objects (slab objects, non-slab kernel allocations,
percpu objects and recently LRU pages) there is a reparenting process
implemented: on cgroup offlining charged objects are getting
reassigned to the parent cgroup. Because all charges and statistics
are fully recursive it's a fairly cheap operation.

For efficiency and consistency with other types of objects, let's do
the same for bpf maps. Fortunately thanks to the objcg API, the
required changes are minimal.

Please, note that individual allocations (slabs, percpu and large
kmallocs) already have the reparenting mechanism. This commit adds
it to the saved map->memcg pointer by replacing it to map->objcg.
Because dying cgroups are not visible for a user and all charges are
recursive, this commit doesn't bring any behavior changes for a user.

Signed-off-by: Roman Gushchin <roman.gushchin@...ux.dev>
---
 include/linux/bpf.h  |  2 +-
 kernel/bpf/syscall.c | 35 +++++++++++++++++++++++++++--------
 2 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2b21f2a3452f..85a4db3e0536 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -221,7 +221,7 @@ struct bpf_map {
 	u32 btf_vmlinux_value_type_id;
 	struct btf *btf;
 #ifdef CONFIG_MEMCG_KMEM
-	struct mem_cgroup *memcg;
+	struct obj_cgroup *objcg;
 #endif
 	char name[BPF_OBJ_NAME_LEN];
 	struct bpf_map_off_arr *off_arr;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ab688d85b2c6..432cbbc74ba6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -419,35 +419,52 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
 #ifdef CONFIG_MEMCG_KMEM
 static void bpf_map_save_memcg(struct bpf_map *map)
 {
-	map->memcg = get_mem_cgroup_from_mm(current->mm);
+	/* Currently if a map is created by a process belonging to the root
+	 * memory cgroup, get_obj_cgroup_from_current() will return NULL.
+	 * So we have to check map->objcg for being NULL each time it's
+	 * being used.
+	 */
+	map->objcg = get_obj_cgroup_from_current();
 }
 
 static void bpf_map_release_memcg(struct bpf_map *map)
 {
-	mem_cgroup_put(map->memcg);
+	if (map->objcg)
+		obj_cgroup_put(map->objcg);
+}
+
+static struct mem_cgroup *bpf_map_get_memcg(struct bpf_map *map) {
+	if (map->objcg)
+		return get_mem_cgroup_from_objcg(map->objcg);
+
+	return root_mem_cgroup;
 }
 
 void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
 			   int node)
 {
-	struct mem_cgroup *old_memcg;
+	struct mem_cgroup *memcg, *old_memcg;
 	void *ptr;
 
-	old_memcg = set_active_memcg(map->memcg);
+	memcg = bpf_map_get_memcg(map);
+	old_memcg = set_active_memcg(memcg);
 	ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node);
 	set_active_memcg(old_memcg);
+	mem_cgroup_put(memcg);
 
 	return ptr;
 }
 
 void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
 {
-	struct mem_cgroup *old_memcg;
+	struct mem_cgroup *memcg, *old_memcg;
 	void *ptr;
 
-	old_memcg = set_active_memcg(map->memcg);
+	memcg = bpf_map_get_memcg(map);
+	old_memcg = set_active_memcg(memcg);
 	ptr = kzalloc(size, flags | __GFP_ACCOUNT);
 	set_active_memcg(old_memcg);
+	mem_cgroup_put(memcg);
 
 	return ptr;
 }
@@ -455,12 +472,14 @@ void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
 void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
 				    size_t align, gfp_t flags)
 {
-	struct mem_cgroup *old_memcg;
+	struct mem_cgroup *memcg, *old_memcg;
 	void __percpu *ptr;
 
-	old_memcg = set_active_memcg(map->memcg);
+	memcg = bpf_map_get_memcg(map);
+	old_memcg = set_active_memcg(memcg);
 	ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT);
 	set_active_memcg(old_memcg);
+	mem_cgroup_put(memcg);
 
 	return ptr;
 }
-- 
2.36.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ