lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <d84092a7-fc3d-4c3c-98b3-341d63a21b18@huaweicloud.com>
Date: Sat, 28 Jun 2025 11:09:39 +0800
From: Chen Ridong <chenridong@...weicloud.com>
To: Muchun Song <songmuchun@...edance.com>, hannes@...xchg.org,
 mhocko@...nel.org, roman.gushchin@...ux.dev, shakeel.butt@...ux.dev,
 muchun.song@...ux.dev, akpm@...ux-foundation.org, david@...morbit.com,
 zhengqi.arch@...edance.com, yosry.ahmed@...ux.dev, nphamcs@...il.com,
 chengming.zhou@...ux.dev
Cc: linux-kernel@...r.kernel.org, cgroups@...r.kernel.org,
 linux-mm@...ck.org, hamzamahfooz@...ux.microsoft.com,
 apais@...ux.microsoft.com
Subject: Re: [PATCH RFC 10/28] mm: memcontrol: return root object cgroup for
 root memory cgroup



On 2025/4/15 10:45, Muchun Song wrote:
> Memory cgroup functions such as get_mem_cgroup_from_folio() and
> get_mem_cgroup_from_mm() return a valid memory cgroup pointer,
> even for the root memory cgroup. In contrast, the situation for
> object cgroups has been different.
> 
> Previously, the root object cgroup couldn't be returned because
> it didn't exist. Now that a valid root object cgroup exists, for
> the sake of consistency, it's necessary to align the behavior of
> object-cgroup-related operations with that of memory cgroup APIs.
> 
> Signed-off-by: Muchun Song <songmuchun@...edance.com>
> ---
>  include/linux/memcontrol.h | 29 ++++++++++++++++++-------
>  mm/memcontrol.c            | 44 ++++++++++++++++++++------------------
>  mm/percpu.c                |  2 +-
>  3 files changed, 45 insertions(+), 30 deletions(-)
> 
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index bb4f203733f3..e74922d5755d 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -319,6 +319,7 @@ struct mem_cgroup {
>  #define MEMCG_CHARGE_BATCH 64U
>  
>  extern struct mem_cgroup *root_mem_cgroup;
> +extern struct obj_cgroup *root_obj_cgroup;
>  
>  enum page_memcg_data_flags {
>  	/* page->memcg_data is a pointer to an slabobj_ext vector */
> @@ -528,6 +529,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
>  	return (memcg == root_mem_cgroup);
>  }
>  
> +static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
> +{
> +	return objcg == root_obj_cgroup;
> +}
> +
>  static inline bool mem_cgroup_disabled(void)
>  {
>  	return !cgroup_subsys_enabled(memory_cgrp_subsys);
> @@ -752,23 +758,26 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
>  
>  static inline bool obj_cgroup_tryget(struct obj_cgroup *objcg)
>  {
> +	if (obj_cgroup_is_root(objcg))
> +		return true;
>  	return percpu_ref_tryget(&objcg->refcnt);
>  }
>  
> -static inline void obj_cgroup_get(struct obj_cgroup *objcg)
> +static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
> +				       unsigned long nr)
>  {
> -	percpu_ref_get(&objcg->refcnt);
> +	if (!obj_cgroup_is_root(objcg))
> +		percpu_ref_get_many(&objcg->refcnt, nr);
>  }
>  
> -static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
> -				       unsigned long nr)
> +static inline void obj_cgroup_get(struct obj_cgroup *objcg)
>  {
> -	percpu_ref_get_many(&objcg->refcnt, nr);
> +	obj_cgroup_get_many(objcg, 1);
>  }
>  
>  static inline void obj_cgroup_put(struct obj_cgroup *objcg)
>  {
> -	if (objcg)
> +	if (objcg && !obj_cgroup_is_root(objcg))
>  		percpu_ref_put(&objcg->refcnt);
>  }
>  
> @@ -1101,6 +1110,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
>  	return true;
>  }
>  
> +static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
> +{
> +	return true;
> +}
> +
>  static inline bool mem_cgroup_disabled(void)
>  {
>  	return true;
> @@ -1684,8 +1698,7 @@ static inline struct obj_cgroup *get_obj_cgroup_from_current(void)
>  {
>  	struct obj_cgroup *objcg = current_obj_cgroup();
>  
> -	if (objcg)
> -		obj_cgroup_get(objcg);
> +	obj_cgroup_get(objcg);
>  
>  	return objcg;
>  }
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index a6362d11b46c..4aadc1b87db3 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -81,6 +81,7 @@ struct cgroup_subsys memory_cgrp_subsys __read_mostly;
>  EXPORT_SYMBOL(memory_cgrp_subsys);
>  
>  struct mem_cgroup *root_mem_cgroup __read_mostly;
> +struct obj_cgroup *root_obj_cgroup __read_mostly;
>  
>  /* Active memory cgroup to use from an interrupt context */
>  DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
> @@ -2525,15 +2526,14 @@ struct mem_cgroup *mem_cgroup_from_slab_obj(void *p)
>  
>  static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg)
>  {
> -	struct obj_cgroup *objcg = NULL;
> +	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
> +		struct obj_cgroup *objcg = rcu_dereference(memcg->objcg);
>  
> -	for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
> -		objcg = rcu_dereference(memcg->objcg);
>  		if (likely(objcg && obj_cgroup_tryget(objcg)))
> -			break;
> -		objcg = NULL;
> +			return objcg;
>  	}
> -	return objcg;
> +
> +	return NULL;
>  }
>  

It appears that the return NULL statement might be dead code in this
context. And would it be preferable to use return root_obj_cgroup instead?

Best regards,
Ridong

>  static struct obj_cgroup *current_objcg_update(void)
> @@ -2604,18 +2604,17 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
>  		 * Objcg reference is kept by the task, so it's safe
>  		 * to use the objcg by the current task.
>  		 */
> -		return objcg;
> +		return objcg ? : root_obj_cgroup;
>  	}
>  
>  	memcg = this_cpu_read(int_active_memcg);
>  	if (unlikely(memcg))
>  		goto from_memcg;
>  
> -	return NULL;
> +	return root_obj_cgroup;
>  
>  from_memcg:
> -	objcg = NULL;
> -	for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
> +	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
>  		/*
>  		 * Memcg pointer is protected by scope (see set_active_memcg())
>  		 * and is pinning the corresponding objcg, so objcg can't go
> @@ -2624,10 +2623,10 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
>  		 */
>  		objcg = rcu_dereference_check(memcg->objcg, 1);
>  		if (likely(objcg))
> -			break;
> +			return objcg;
>  	}
>  
> -	return objcg;
> +	return root_obj_cgroup;
>  }
>  
>  struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
> @@ -2641,14 +2640,8 @@ struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
>  		objcg = __folio_objcg(folio);
>  		obj_cgroup_get(objcg);
>  	} else {
> -		struct mem_cgroup *memcg;
> -
>  		rcu_read_lock();
> -		memcg = __folio_memcg(folio);
> -		if (memcg)
> -			objcg = __get_obj_cgroup_from_memcg(memcg);
> -		else
> -			objcg = NULL;
> +		objcg = __get_obj_cgroup_from_memcg(__folio_memcg(folio));
>  		rcu_read_unlock();
>  	}
>  	return objcg;
> @@ -2733,7 +2726,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
>  	int ret = 0;
>  
>  	objcg = current_obj_cgroup();
> -	if (objcg) {
> +	if (!obj_cgroup_is_root(objcg)) {
>  		ret = obj_cgroup_charge_pages(objcg, gfp, 1 << order);
>  		if (!ret) {
>  			obj_cgroup_get(objcg);
> @@ -3036,7 +3029,7 @@ bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
>  	 * obj_cgroup_get() is used to get a permanent reference.
>  	 */
>  	objcg = current_obj_cgroup();
> -	if (!objcg)
> +	if (obj_cgroup_is_root(objcg))
>  		return true;
>  
>  	/*
> @@ -3708,6 +3701,9 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
>  	if (!objcg)
>  		goto free_shrinker;
>  
> +	if (unlikely(mem_cgroup_is_root(memcg)))
> +		root_obj_cgroup = objcg;
> +
>  	objcg->memcg = memcg;
>  	rcu_assign_pointer(memcg->objcg, objcg);
>  	obj_cgroup_get(objcg);
> @@ -5302,6 +5298,9 @@ void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size)
>  	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
>  		return;
>  
> +	if (obj_cgroup_is_root(objcg))
> +		return;
> +
>  	VM_WARN_ON_ONCE(!(current->flags & PF_MEMALLOC));
>  
>  	/* PF_MEMALLOC context, charging must succeed */
> @@ -5329,6 +5328,9 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
>  	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
>  		return;
>  
> +	if (obj_cgroup_is_root(objcg))
> +		return;
> +
>  	obj_cgroup_uncharge(objcg, size);
>  
>  	rcu_read_lock();
> diff --git a/mm/percpu.c b/mm/percpu.c
> index b35494c8ede2..3e54c6fca9bd 100644
> --- a/mm/percpu.c
> +++ b/mm/percpu.c
> @@ -1616,7 +1616,7 @@ static bool pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
>  		return true;
>  
>  	objcg = current_obj_cgroup();
> -	if (!objcg)
> +	if (obj_cgroup_is_root(objcg))
>  		return true;
>  
>  	if (obj_cgroup_charge(objcg, gfp, pcpu_obj_full_size(size)))


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ