[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAMZfGtX_Ft=OpThZC0vp2TdXxymK-AV6HTyinVhRBJrk6ZkUfA@mail.gmail.com>
Date: Mon, 30 Jun 2025 15:16:18 +0800
From: Muchun Song <songmuchun@...edance.com>
To: Chen Ridong <chenridong@...weicloud.com>
Cc: Johannes Weiner <hannes@...xchg.org>, Michal Hocko <mhocko@...nel.org>,
Roman Gushchin <roman.gushchin@...ux.dev>, Shakeel Butt <shakeel.butt@...ux.dev>, muchun.song@...ux.dev,
Andrew Morton <akpm@...ux-foundation.org>, Dave Chinner <david@...morbit.com>,
Qi Zheng <zhengqi.arch@...edance.com>, yosry.ahmed@...ux.dev,
Nhat Pham <nphamcs@...il.com>, chengming.zhou@...ux.dev,
LKML <linux-kernel@...r.kernel.org>, Cgroups <cgroups@...r.kernel.org>,
Linux Memory Management List <linux-mm@...ck.org>, hamzamahfooz@...ux.microsoft.com,
apais@...ux.microsoft.com
Subject: Re: Re: [PATCH RFC 10/28] mm: memcontrol: return root object cgroup
for root memory cgroup
On Sat, Jun 28, 2025 at 11:09 AM Chen Ridong <chenridong@...weicloud.com> wrote:
>
>
>
> On 2025/4/15 10:45, Muchun Song wrote:
> > Memory cgroup functions such as get_mem_cgroup_from_folio() and
> > get_mem_cgroup_from_mm() return a valid memory cgroup pointer,
> > even for the root memory cgroup. In contrast, the situation for
> > object cgroups has been different.
> >
> > Previously, the root object cgroup couldn't be returned because
> > it didn't exist. Now that a valid root object cgroup exists, for
> > the sake of consistency, it's necessary to align the behavior of
> > object-cgroup-related operations with that of memory cgroup APIs.
> >
> > Signed-off-by: Muchun Song <songmuchun@...edance.com>
> > ---
> > include/linux/memcontrol.h | 29 ++++++++++++++++++-------
> > mm/memcontrol.c | 44 ++++++++++++++++++++------------------
> > mm/percpu.c | 2 +-
> > 3 files changed, 45 insertions(+), 30 deletions(-)
> >
> > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> > index bb4f203733f3..e74922d5755d 100644
> > --- a/include/linux/memcontrol.h
> > +++ b/include/linux/memcontrol.h
> > @@ -319,6 +319,7 @@ struct mem_cgroup {
> > #define MEMCG_CHARGE_BATCH 64U
> >
> > extern struct mem_cgroup *root_mem_cgroup;
> > +extern struct obj_cgroup *root_obj_cgroup;
> >
> > enum page_memcg_data_flags {
> > /* page->memcg_data is a pointer to an slabobj_ext vector */
> > @@ -528,6 +529,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
> > return (memcg == root_mem_cgroup);
> > }
> >
> > +static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
> > +{
> > + return objcg == root_obj_cgroup;
> > +}
> > +
> > static inline bool mem_cgroup_disabled(void)
> > {
> > return !cgroup_subsys_enabled(memory_cgrp_subsys);
> > @@ -752,23 +758,26 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
> >
> > static inline bool obj_cgroup_tryget(struct obj_cgroup *objcg)
> > {
> > + if (obj_cgroup_is_root(objcg))
> > + return true;
> > return percpu_ref_tryget(&objcg->refcnt);
> > }
> >
> > -static inline void obj_cgroup_get(struct obj_cgroup *objcg)
> > +static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
> > + unsigned long nr)
> > {
> > - percpu_ref_get(&objcg->refcnt);
> > + if (!obj_cgroup_is_root(objcg))
> > + percpu_ref_get_many(&objcg->refcnt, nr);
> > }
> >
> > -static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
> > - unsigned long nr)
> > +static inline void obj_cgroup_get(struct obj_cgroup *objcg)
> > {
> > - percpu_ref_get_many(&objcg->refcnt, nr);
> > + obj_cgroup_get_many(objcg, 1);
> > }
> >
> > static inline void obj_cgroup_put(struct obj_cgroup *objcg)
> > {
> > - if (objcg)
> > + if (objcg && !obj_cgroup_is_root(objcg))
> > percpu_ref_put(&objcg->refcnt);
> > }
> >
> > @@ -1101,6 +1110,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
> > return true;
> > }
> >
> > +static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
> > +{
> > + return true;
> > +}
> > +
> > static inline bool mem_cgroup_disabled(void)
> > {
> > return true;
> > @@ -1684,8 +1698,7 @@ static inline struct obj_cgroup *get_obj_cgroup_from_current(void)
> > {
> > struct obj_cgroup *objcg = current_obj_cgroup();
> >
> > - if (objcg)
> > - obj_cgroup_get(objcg);
> > + obj_cgroup_get(objcg);
> >
> > return objcg;
> > }
> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index a6362d11b46c..4aadc1b87db3 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -81,6 +81,7 @@ struct cgroup_subsys memory_cgrp_subsys __read_mostly;
> > EXPORT_SYMBOL(memory_cgrp_subsys);
> >
> > struct mem_cgroup *root_mem_cgroup __read_mostly;
> > +struct obj_cgroup *root_obj_cgroup __read_mostly;
> >
> > /* Active memory cgroup to use from an interrupt context */
> > DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
> > @@ -2525,15 +2526,14 @@ struct mem_cgroup *mem_cgroup_from_slab_obj(void *p)
> >
> > static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg)
> > {
> > - struct obj_cgroup *objcg = NULL;
> > + for (; memcg; memcg = parent_mem_cgroup(memcg)) {
> > + struct obj_cgroup *objcg = rcu_dereference(memcg->objcg);
> >
> > - for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
> > - objcg = rcu_dereference(memcg->objcg);
> > if (likely(objcg && obj_cgroup_tryget(objcg)))
> > - break;
> > - objcg = NULL;
> > + return objcg;
> > }
> > - return objcg;
> > +
> > + return NULL;
> > }
> >
>
> It appears that the return NULL statement might be dead code in this
> context. And would it be preferable to use return root_obj_cgroup instead?
I do not think so. The parameter of @memcg could be NULL passed from
current_objcg_update(). Returning NULL in this case makes sense to me.
It is not reasonable to return root_obj_cgroup for a NULL memcg for me.
Muchun,
Thanks.
>
> Best regards,
> Ridong
>
> > static struct obj_cgroup *current_objcg_update(void)
> > @@ -2604,18 +2604,17 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
> > * Objcg reference is kept by the task, so it's safe
> > * to use the objcg by the current task.
> > */
> > - return objcg;
> > + return objcg ? : root_obj_cgroup;
> > }
> >
> > memcg = this_cpu_read(int_active_memcg);
> > if (unlikely(memcg))
> > goto from_memcg;
> >
> > - return NULL;
> > + return root_obj_cgroup;
> >
> > from_memcg:
> > - objcg = NULL;
> > - for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
> > + for (; memcg; memcg = parent_mem_cgroup(memcg)) {
> > /*
> > * Memcg pointer is protected by scope (see set_active_memcg())
> > * and is pinning the corresponding objcg, so objcg can't go
> > @@ -2624,10 +2623,10 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
> > */
> > objcg = rcu_dereference_check(memcg->objcg, 1);
> > if (likely(objcg))
> > - break;
> > + return objcg;
> > }
> >
> > - return objcg;
> > + return root_obj_cgroup;
> > }
> >
> > struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
> > @@ -2641,14 +2640,8 @@ struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
> > objcg = __folio_objcg(folio);
> > obj_cgroup_get(objcg);
> > } else {
> > - struct mem_cgroup *memcg;
> > -
> > rcu_read_lock();
> > - memcg = __folio_memcg(folio);
> > - if (memcg)
> > - objcg = __get_obj_cgroup_from_memcg(memcg);
> > - else
> > - objcg = NULL;
> > + objcg = __get_obj_cgroup_from_memcg(__folio_memcg(folio));
> > rcu_read_unlock();
> > }
> > return objcg;
> > @@ -2733,7 +2726,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
> > int ret = 0;
> >
> > objcg = current_obj_cgroup();
> > - if (objcg) {
> > + if (!obj_cgroup_is_root(objcg)) {
> > ret = obj_cgroup_charge_pages(objcg, gfp, 1 << order);
> > if (!ret) {
> > obj_cgroup_get(objcg);
> > @@ -3036,7 +3029,7 @@ bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
> > * obj_cgroup_get() is used to get a permanent reference.
> > */
> > objcg = current_obj_cgroup();
> > - if (!objcg)
> > + if (obj_cgroup_is_root(objcg))
> > return true;
> >
> > /*
> > @@ -3708,6 +3701,9 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
> > if (!objcg)
> > goto free_shrinker;
> >
> > + if (unlikely(mem_cgroup_is_root(memcg)))
> > + root_obj_cgroup = objcg;
> > +
> > objcg->memcg = memcg;
> > rcu_assign_pointer(memcg->objcg, objcg);
> > obj_cgroup_get(objcg);
> > @@ -5302,6 +5298,9 @@ void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size)
> > if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
> > return;
> >
> > + if (obj_cgroup_is_root(objcg))
> > + return;
> > +
> > VM_WARN_ON_ONCE(!(current->flags & PF_MEMALLOC));
> >
> > /* PF_MEMALLOC context, charging must succeed */
> > @@ -5329,6 +5328,9 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
> > if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
> > return;
> >
> > + if (obj_cgroup_is_root(objcg))
> > + return;
> > +
> > obj_cgroup_uncharge(objcg, size);
> >
> > rcu_read_lock();
> > diff --git a/mm/percpu.c b/mm/percpu.c
> > index b35494c8ede2..3e54c6fca9bd 100644
> > --- a/mm/percpu.c
> > +++ b/mm/percpu.c
> > @@ -1616,7 +1616,7 @@ static bool pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
> > return true;
> >
> > objcg = current_obj_cgroup();
> > - if (!objcg)
> > + if (obj_cgroup_is_root(objcg))
> > return true;
> >
> > if (obj_cgroup_charge(objcg, gfp, pcpu_obj_full_size(size)))
>
Powered by blists - more mailing lists