[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <graryni6wadpi3ytfq7zimj2kbmm7dumxvhxtzmxndrv5s2y67@ju4cdnsmos6e>
Date: Tue, 23 Dec 2025 23:08:32 +0800
From: Hao Li <hao.li@...ux.dev>
To: Harry Yoo <harry.yoo@...cle.com>
Cc: akpm@...ux-foundation.org, vbabka@...e.cz, andreyknvl@...il.com,
cl@...two.org, dvyukov@...gle.com, glider@...gle.com, hannes@...xchg.org,
linux-mm@...ck.org, mhocko@...nel.org, muchun.song@...ux.dev, rientjes@...gle.com,
roman.gushchin@...ux.dev, ryabinin.a.a@...il.com, shakeel.butt@...ux.dev,
surenb@...gle.com, vincenzo.frascino@....com, yeoreum.yun@....com, tytso@....edu,
adilger.kernel@...ger.ca, linux-ext4@...r.kernel.org, linux-kernel@...r.kernel.org,
cgroups@...r.kernel.org
Subject: Re: [PATCH V4 7/8] mm/slab: save memory by allocating slabobj_ext
array from leftover
On Mon, Dec 22, 2025 at 08:08:42PM +0900, Harry Yoo wrote:
> The leftover space in a slab is always smaller than s->size, and
> kmem caches for large objects that are not power-of-two sizes tend to have
> a greater amount of leftover space per slab. In some cases, the leftover
> space is larger than the size of the slabobj_ext array for the slab.
>
> An excellent example of such a cache is ext4_inode_cache. On my system,
> the object size is 1144, with a preferred order of 3, 28 objects per slab,
> and 736 bytes of leftover space per slab.
>
> Since the size of the slabobj_ext array is only 224 bytes (w/o mem
> profiling) or 448 bytes (w/ mem profiling) per slab, the entire array
> fits within the leftover space.
>
> Allocate the slabobj_exts array from this unused space instead of using
> kcalloc() when it is large enough. The array is allocated from unused
> space only when creating new slabs, and it doesn't try to utilize unused
> space if alloc_slab_obj_exts() is called after slab creation because
> implementing lazy allocation involves more expensive synchronization.
>
> The implementation and evaluation of lazy allocation from unused space
> is left as future-work. As pointed by Vlastimil Babka [1], it could be
> beneficial when a slab cache without SLAB_ACCOUNT can be created, and
> some of the allocations from the cache use __GFP_ACCOUNT. For example,
> xarray does that.
>
> To avoid unnecessary overhead when MEMCG (with SLAB_ACCOUNT) and
> MEM_ALLOC_PROFILING are not used for the cache, allocate the slabobj_ext
> array only when either of them is enabled.
>
> [ MEMCG=y, MEM_ALLOC_PROFILING=n ]
>
> Before patch (creating ~2.64M directories on ext4):
> Slab: 4747880 kB
> SReclaimable: 4169652 kB
> SUnreclaim: 578228 kB
>
> After patch (creating ~2.64M directories on ext4):
> Slab: 4724020 kB
> SReclaimable: 4169188 kB
> SUnreclaim: 554832 kB (-22.84 MiB)
>
> Enjoy the memory savings!
>
> Link: https://lore.kernel.org/linux-mm/48029aab-20ea-4d90-bfd1-255592b2018e@suse.cz [1]
> Signed-off-by: Harry Yoo <harry.yoo@...cle.com>
> ---
> mm/slub.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 151 insertions(+), 5 deletions(-)
>
> diff --git a/mm/slub.c b/mm/slub.c
> index 39c381cc1b2c..3fc3d2ca42e7 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -886,6 +886,99 @@ static inline unsigned long get_orig_size(struct kmem_cache *s, void *object)
> return *(unsigned long *)p;
> }
>
> +#ifdef CONFIG_SLAB_OBJ_EXT
> +
> +/*
> + * Check if memory cgroup or memory allocation profiling is enabled.
> + * If enabled, SLUB tries to reduce memory overhead of accounting
> + * slab objects. If neither is enabled when this function is called,
> + * the optimization is simply skipped to avoid affecting caches that do not
> + * need slabobj_ext metadata.
> + *
> + * However, this may disable optimization when memory cgroup or memory
> + * allocation profiling is used, but slabs are created too early
> + * even before those subsystems are initialized.
> + */
> +static inline bool need_slab_obj_exts(struct kmem_cache *s)
> +{
> + if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
> + return true;
> +
> + if (mem_alloc_profiling_enabled())
> + return true;
> +
> + return false;
> +}
> +
> +static inline unsigned int obj_exts_size_in_slab(struct slab *slab)
> +{
> + return sizeof(struct slabobj_ext) * slab->objects;
> +}
> +
> +static inline unsigned long obj_exts_offset_in_slab(struct kmem_cache *s,
> + struct slab *slab)
> +{
> + unsigned long objext_offset;
> +
> + objext_offset = s->red_left_pad + s->size * slab->objects;
Hi Harry,
As s->size already includes s->red_left_pad, do we still need
s->red_left_pad here?
> + objext_offset = ALIGN(objext_offset, sizeof(struct slabobj_ext));
> + return objext_offset;
> +}
> +
> +static inline bool obj_exts_fit_within_slab_leftover(struct kmem_cache *s,
> + struct slab *slab)
> +{
> + unsigned long objext_offset = obj_exts_offset_in_slab(s, slab);
> + unsigned long objext_size = obj_exts_size_in_slab(slab);
> +
> + return objext_offset + objext_size <= slab_size(slab);
> +}
> +
> +static inline bool obj_exts_in_slab(struct kmem_cache *s, struct slab *slab)
> +{
> + unsigned long expected;
> + unsigned long obj_exts;
> +
> + obj_exts = slab_obj_exts(slab);
> + if (!obj_exts)
> + return false;
> +
> + if (!obj_exts_fit_within_slab_leftover(s, slab))
> + return false;
> +
> + expected = (unsigned long)slab_address(slab);
> + expected += obj_exts_offset_in_slab(s, slab);
> + return obj_exts == expected;
> +}
> +#else
> +static inline bool need_slab_obj_exts(struct kmem_cache *s)
> +{
> + return false;
> +}
> +
> +static inline unsigned int obj_exts_size_in_slab(struct slab *slab)
> +{
> + return 0;
> +}
> +
> +static inline unsigned long obj_exts_offset_in_slab(struct kmem_cache *s,
> + struct slab *slab)
> +{
> + return 0;
> +}
> +
> +static inline bool obj_exts_fit_within_slab_leftover(struct kmem_cache *s,
> + struct slab *slab)
> +{
> + return false;
> +}
> +
> +static inline bool obj_exts_in_slab(struct kmem_cache *s, struct slab *slab)
> +{
> + return false;
> +}
> +#endif
> +
> #ifdef CONFIG_SLUB_DEBUG
>
> /*
> @@ -1405,7 +1498,15 @@ slab_pad_check(struct kmem_cache *s, struct slab *slab)
> start = slab_address(slab);
> length = slab_size(slab);
> end = start + length;
> - remainder = length % s->size;
> +
> + if (obj_exts_in_slab(s, slab)) {
> + remainder = length;
> + remainder -= obj_exts_offset_in_slab(s, slab);
> + remainder -= obj_exts_size_in_slab(slab);
> + } else {
> + remainder = length % s->size;
> + }
> +
> if (!remainder)
> return;
>
> @@ -2179,6 +2280,11 @@ static inline void free_slab_obj_exts(struct slab *slab)
> return;
> }
>
> + if (obj_exts_in_slab(slab->slab_cache, slab)) {
> + slab->obj_exts = 0;
> + return;
> + }
> +
> /*
> * obj_exts was created with __GFP_NO_OBJ_EXT flag, therefore its
> * corresponding extension will be NULL. alloc_tag_sub() will throw a
> @@ -2194,6 +2300,35 @@ static inline void free_slab_obj_exts(struct slab *slab)
> slab->obj_exts = 0;
> }
>
> +/*
> + * Try to allocate slabobj_ext array from unused space.
> + * This function must be called on a freshly allocated slab to prevent
> + * concurrency problems.
> + */
> +static void alloc_slab_obj_exts_early(struct kmem_cache *s, struct slab *slab)
> +{
> + void *addr;
> + unsigned long obj_exts;
> +
> + if (!need_slab_obj_exts(s))
> + return;
> +
> + if (obj_exts_fit_within_slab_leftover(s, slab)) {
> + addr = slab_address(slab) + obj_exts_offset_in_slab(s, slab);
> + addr = kasan_reset_tag(addr);
> + obj_exts = (unsigned long)addr;
> +
> + get_slab_obj_exts(obj_exts);
> + memset(addr, 0, obj_exts_size_in_slab(slab));
> + put_slab_obj_exts(obj_exts);
> +
> + if (IS_ENABLED(CONFIG_MEMCG))
> + obj_exts |= MEMCG_DATA_OBJEXTS;
> + slab->obj_exts = obj_exts;
> + slab_set_stride(slab, sizeof(struct slabobj_ext));
> + }
> +}
> +
> #else /* CONFIG_SLAB_OBJ_EXT */
>
> static inline void init_slab_obj_exts(struct slab *slab)
> @@ -2210,6 +2345,11 @@ static inline void free_slab_obj_exts(struct slab *slab)
> {
> }
>
> +static inline void alloc_slab_obj_exts_early(struct kmem_cache *s,
> + struct slab *slab)
> +{
> +}
> +
> #endif /* CONFIG_SLAB_OBJ_EXT */
>
> #ifdef CONFIG_MEM_ALLOC_PROFILING
> @@ -3206,7 +3346,9 @@ static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
> static __always_inline void account_slab(struct slab *slab, int order,
> struct kmem_cache *s, gfp_t gfp)
> {
> - if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
> + if (memcg_kmem_online() &&
> + (s->flags & SLAB_ACCOUNT) &&
> + !slab_obj_exts(slab))
> alloc_slab_obj_exts(slab, s, gfp, true);
>
> mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
> @@ -3270,9 +3412,6 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
> slab->objects = oo_objects(oo);
> slab->inuse = 0;
> slab->frozen = 0;
> - init_slab_obj_exts(slab);
> -
> - account_slab(slab, oo_order(oo), s, flags);
>
> slab->slab_cache = s;
>
> @@ -3281,6 +3420,13 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
> start = slab_address(slab);
>
> setup_slab_debug(s, slab, start);
> + init_slab_obj_exts(slab);
> + /*
> + * Poison the slab before initializing the slabobj_ext array
> + * to prevent the array from being overwritten.
> + */
> + alloc_slab_obj_exts_early(s, slab);
> + account_slab(slab, oo_order(oo), s, flags);
>
> shuffle = shuffle_freelist(s, slab);
>
> --
> 2.43.0
>
Powered by blists - more mailing lists