lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a055507f-9c4d-4fb1-b5a6-bda9c2dc20ac@linux.dev>
Date: Fri, 26 Dec 2025 11:10:11 +0800
From: Muchun Song <muchun.song@...ux.dev>
To: Chen Ridong <chenridong@...weicloud.com>
Cc: linux-mm@...ck.org, linux-kernel@...r.kernel.org,
 cgroups@...r.kernel.org, Muchun Song <songmuchun@...edance.com>,
 Qi Zheng <zhengqi.arch@...edance.com>, Qi Zheng <qi.zheng@...ux.dev>,
 hannes@...xchg.org, hughd@...gle.com, mhocko@...e.com,
 roman.gushchin@...ux.dev, shakeel.butt@...ux.dev, david@...nel.org,
 lorenzo.stoakes@...cle.com, ziy@...dia.com, harry.yoo@...cle.com,
 imran.f.khan@...cle.com, kamalesh.babulal@...cle.com,
 axelrasmussen@...gle.com, yuanchu@...gle.com, weixugc@...gle.com,
 mkoutny@...e.com, akpm@...ux-foundation.org,
 hamzamahfooz@...ux.microsoft.com, apais@...ux.microsoft.com,
 lance.yang@...ux.dev
Subject: Re: [PATCH v2 07/28] mm: memcontrol: return root object cgroup for
 root memory cgroup



On 2025/12/26 09:03, Chen Ridong wrote:
>
> On 2025/12/17 15:27, Qi Zheng wrote:
>> From: Muchun Song <songmuchun@...edance.com>
>>
>> Memory cgroup functions such as get_mem_cgroup_from_folio() and
>> get_mem_cgroup_from_mm() return a valid memory cgroup pointer,
>> even for the root memory cgroup. In contrast, the situation for
>> object cgroups has been different.
>>
>> Previously, the root object cgroup couldn't be returned because
>> it didn't exist. Now that a valid root object cgroup exists, for
>> the sake of consistency, it's necessary to align the behavior of
>> object-cgroup-related operations with that of memory cgroup APIs.
>>
>> Signed-off-by: Muchun Song <songmuchun@...edance.com>
>> Signed-off-by: Qi Zheng <zhengqi.arch@...edance.com>
>> ---
>>   include/linux/memcontrol.h | 26 +++++++++++++++++-----
>>   mm/memcontrol.c            | 45 ++++++++++++++++++++------------------
>>   mm/percpu.c                |  2 +-
>>   3 files changed, 45 insertions(+), 28 deletions(-)
>>
>> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
>> index 288dd6337f80f..776d9be1f446a 100644
>> --- a/include/linux/memcontrol.h
>> +++ b/include/linux/memcontrol.h
>> @@ -332,6 +332,7 @@ struct mem_cgroup {
>>   #define MEMCG_CHARGE_BATCH 64U
>>   
>>   extern struct mem_cgroup *root_mem_cgroup;
>> +extern struct obj_cgroup *root_obj_cgroup;
>>   
>>   enum page_memcg_data_flags {
>>   	/* page->memcg_data is a pointer to an slabobj_ext vector */
>> @@ -549,6 +550,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
>>   	return (memcg == root_mem_cgroup);
>>   }
>>   
>> +static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
>> +{
>> +	return objcg == root_obj_cgroup;
>> +}
>> +
>>   static inline bool mem_cgroup_disabled(void)
>>   {
>>   	return !cgroup_subsys_enabled(memory_cgrp_subsys);
>> @@ -773,23 +779,26 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
>>   
>>   static inline bool obj_cgroup_tryget(struct obj_cgroup *objcg)
>>   {
>> +	if (obj_cgroup_is_root(objcg))
>> +		return true;
>>   	return percpu_ref_tryget(&objcg->refcnt);
>>   }
>>   
>> -static inline void obj_cgroup_get(struct obj_cgroup *objcg)
>> +static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
>> +				       unsigned long nr)
>>   {
>> -	percpu_ref_get(&objcg->refcnt);
>> +	if (!obj_cgroup_is_root(objcg))
>> +		percpu_ref_get_many(&objcg->refcnt, nr);
>>   }
>>   
>> -static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
>> -				       unsigned long nr)
>> +static inline void obj_cgroup_get(struct obj_cgroup *objcg)
>>   {
>> -	percpu_ref_get_many(&objcg->refcnt, nr);
>> +	obj_cgroup_get_many(objcg, 1);
>>   }
>>   
>>   static inline void obj_cgroup_put(struct obj_cgroup *objcg)
>>   {
>> -	if (objcg)
>> +	if (objcg && !obj_cgroup_is_root(objcg))
>>   		percpu_ref_put(&objcg->refcnt);
>>   }
>>   
>> @@ -1084,6 +1093,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
>>   	return true;
>>   }
>>   
>> +static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
>> +{
>> +	return true;
>> +}
>> +
>>   static inline bool mem_cgroup_disabled(void)
>>   {
>>   	return true;
>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>> index 544b3200db12d..21b5aad34cae7 100644
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> @@ -83,6 +83,8 @@ EXPORT_SYMBOL(memory_cgrp_subsys);
>>   struct mem_cgroup *root_mem_cgroup __read_mostly;
>>   EXPORT_SYMBOL(root_mem_cgroup);
>>   
>> +struct obj_cgroup *root_obj_cgroup __read_mostly;
>> +
>>   /* Active memory cgroup to use from an interrupt context */
>>   DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
>>   EXPORT_PER_CPU_SYMBOL_GPL(int_active_memcg);
>> @@ -2634,15 +2636,14 @@ struct mem_cgroup *mem_cgroup_from_slab_obj(void *p)
>>   
>>   static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg)
>>   {
>> -	struct obj_cgroup *objcg = NULL;
>> +	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
>> +		struct obj_cgroup *objcg = rcu_dereference(memcg->objcg);
>>   
>> -	for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
>> -		objcg = rcu_dereference(memcg->objcg);
>>   		if (likely(objcg && obj_cgroup_tryget(objcg)))
>> -			break;
>> -		objcg = NULL;
>> +			return objcg;
>>   	}
>> -	return objcg;
>> +
>> +	return NULL;
>>   }
>>   
>>   static struct obj_cgroup *current_objcg_update(void)
>> @@ -2716,18 +2717,17 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
>>   		 * Objcg reference is kept by the task, so it's safe
>>   		 * to use the objcg by the current task.
>>   		 */
>> -		return objcg;
>> +		return objcg ? : root_obj_cgroup;
>>   	}
>>   
>>   	memcg = this_cpu_read(int_active_memcg);
>>   	if (unlikely(memcg))
>>   		goto from_memcg;
>>   
>> -	return NULL;
>> +	return root_obj_cgroup;
>>   
>>   from_memcg:
>> -	objcg = NULL;
>> -	for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
>> +	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
>>   		/*
>>   		 * Memcg pointer is protected by scope (see set_active_memcg())
>>   		 * and is pinning the corresponding objcg, so objcg can't go
>> @@ -2736,10 +2736,10 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
>>   		 */
>>   		objcg = rcu_dereference_check(memcg->objcg, 1);
>>   		if (likely(objcg))
>> -			break;
>> +			return objcg;
>>   	}
>>   
>> -	return objcg;
>> +	return root_obj_cgroup;
>>   }
>>   
>>   struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
>> @@ -2753,14 +2753,8 @@ struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
>>   		objcg = __folio_objcg(folio);
>>   		obj_cgroup_get(objcg);
>>   	} else {
>> -		struct mem_cgroup *memcg;
>> -
>>   		rcu_read_lock();
>> -		memcg = __folio_memcg(folio);
>> -		if (memcg)
>> -			objcg = __get_obj_cgroup_from_memcg(memcg);
>> -		else
>> -			objcg = NULL;
>> +		objcg = __get_obj_cgroup_from_memcg(__folio_memcg(folio));
>>   		rcu_read_unlock();
>>   	}
>>   	return objcg;
>> @@ -2863,7 +2857,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
>>   	int ret = 0;
>>   
>>   	objcg = current_obj_cgroup();
>> -	if (objcg) {
>> +	if (objcg && !obj_cgroup_is_root(objcg)) {
>>   		ret = obj_cgroup_charge_pages(objcg, gfp, 1 << order);
>>   		if (!ret) {
>>   			obj_cgroup_get(objcg);
>> @@ -3164,7 +3158,7 @@ bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
>>   	 * obj_cgroup_get() is used to get a permanent reference.
>>   	 */
>>   	objcg = current_obj_cgroup();
>> -	if (!objcg)
>> +	if (!objcg || obj_cgroup_is_root(objcg))
>>   		return true;
>>   
>>   	/*
>> @@ -3851,6 +3845,9 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
>>   	if (!objcg)
>>   		goto free_shrinker;
>>   
>> +	if (unlikely(mem_cgroup_is_root(memcg)))
>> +		root_obj_cgroup = objcg;
>> +
>>   	objcg->memcg = memcg;
>>   	rcu_assign_pointer(memcg->objcg, objcg);
>>   	obj_cgroup_get(objcg);
>> @@ -5471,6 +5468,9 @@ void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size)
>>   	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
>>   		return;
>>   
>> +	if (obj_cgroup_is_root(objcg))
>> +		return;
>> +
>>   	VM_WARN_ON_ONCE(!(current->flags & PF_MEMALLOC));
>>   
>>   	/* PF_MEMALLOC context, charging must succeed */
>> @@ -5498,6 +5498,9 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
>>   	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
>>   		return;
>>   
>> +	if (obj_cgroup_is_root(objcg))
>> +		return;
>> +
>>   	obj_cgroup_uncharge(objcg, size);
>>   
> If we modify zswap by adding MEMCG_ZSWAP_B and MEMCG_ZSWAPPED with obj_cgroup_charge_zswap , then
> remove a control group (via rmdir) and reparent its objects to the root cgroup, then for the root
> cgroup, obj_cgroup_uncharge_zswap will return directly due to the obj_cgroup_is_root check. Would
> this cause us to miss decrementing MEMCG_ZSWAP_B and MEMCG_ZSWAPPED?

I'm not sure I fully understand the problem—how could this happen, given 
that
obj_cgroup_charge_zswap also checks for the root objcg?

Muchun,
Thanks.
>
>>   	rcu_read_lock();
>> diff --git a/mm/percpu.c b/mm/percpu.c
>> index 81462ce5866e1..5c1a9b77d6b93 100644
>> --- a/mm/percpu.c
>> +++ b/mm/percpu.c
>> @@ -1616,7 +1616,7 @@ static bool pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
>>   		return true;
>>   
>>   	objcg = current_obj_cgroup();
>> -	if (!objcg)
>> +	if (!objcg || obj_cgroup_is_root(objcg))
>>   		return true;
>>   
>>   	if (obj_cgroup_charge(objcg, gfp, pcpu_obj_full_size(size)))


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ