[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJuCfpHNhes_csqvm9-Z2f-C6XWuyRuXpchNtXwTSXxTpARZSg@mail.gmail.com>
Date: Tue, 28 Oct 2025 20:19:59 -0700
From: Suren Baghdasaryan <surenb@...gle.com>
To: Harry Yoo <harry.yoo@...cle.com>
Cc: akpm@...ux-foundation.org, vbabka@...e.cz, andreyknvl@...il.com, 
	cl@...ux.com, dvyukov@...gle.com, glider@...gle.com, hannes@...xchg.org, 
	linux-mm@...ck.org, mhocko@...nel.org, muchun.song@...ux.dev, 
	rientjes@...gle.com, roman.gushchin@...ux.dev, ryabinin.a.a@...il.com, 
	shakeel.butt@...ux.dev, vincenzo.frascino@....com, yeoreum.yun@....com, 
	tytso@....edu, adilger.kernel@...ger.ca, linux-ext4@...r.kernel.org, 
	linux-kernel@...r.kernel.org
Subject: Re: [RFC PATCH V3 7/7] mm/slab: place slabobj_ext metadata in unused
 space within s->size
On Mon, Oct 27, 2025 at 5:29 AM Harry Yoo <harry.yoo@...cle.com> wrote:
>
> When a cache has high s->align value and s->object_size is not aligned
> to it, each object ends up with some unused space because of alignment.
> If this wasted space is big enough, we can use it to store the
> slabobj_ext metadata instead of wasting it.
>
> On my system, this happens with caches like kmem_cache, mm_struct, pid,
> task_struct, sighand_cache, xfs_inode, and others.
>
> To place the slabobj_ext metadata within each object, the existing
> slab_obj_ext() logic can still be used by setting:
>
>   - slab->obj_exts = slab_address(slab) + s->red_left_zone +
>                      (slabobj_ext offset)
>   - stride = s->size
>
> slab_obj_ext() doesn't need know where the metadata is stored,
> so this method works without adding extra overhead to slab_obj_ext().
>
> A good example benefiting from this optimization is xfs_inode
> (object_size: 992, align: 64). To measure memory savings, 2 millions of
> files were created on XFS.
>
> [ MEMCG=y, MEM_ALLOC_PROFILING=n ]
>
> Before patch (creating 2M directories on xfs):
>   Slab:            6693844 kB
>   SReclaimable:    6016332 kB
>   SUnreclaim:       677512 kB
>
> After patch (creating 2M directories on xfs):
>   Slab:            6697572 kB
>   SReclaimable:    6034744 kB
>   SUnreclaim:       662828 kB (-14.3 MiB)
>
> Enjoy the memory savings!
>
> Suggested-by: Vlastimil Babka <vbabka@...e.cz>
> Signed-off-by: Harry Yoo <harry.yoo@...cle.com>
> ---
>  include/linux/slab.h |  9 ++++++
>  mm/slab_common.c     |  6 ++--
>  mm/slub.c            | 72 ++++++++++++++++++++++++++++++++++++++++++--
>  3 files changed, 82 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 561597dd2164..fd09674cc117 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -59,6 +59,9 @@ enum _slab_flag_bits {
>         _SLAB_CMPXCHG_DOUBLE,
>  #ifdef CONFIG_SLAB_OBJ_EXT
>         _SLAB_NO_OBJ_EXT,
> +#endif
> +#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
> +       _SLAB_OBJ_EXT_IN_OBJ,
>  #endif
>         _SLAB_FLAGS_LAST_BIT
>  };
> @@ -244,6 +247,12 @@ enum _slab_flag_bits {
>  #define SLAB_NO_OBJ_EXT                __SLAB_FLAG_UNUSED
>  #endif
>
> +#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
> +#define SLAB_OBJ_EXT_IN_OBJ    __SLAB_FLAG_BIT(_SLAB_OBJ_EXT_IN_OBJ)
> +#else
> +#define SLAB_OBJ_EXT_IN_OBJ    __SLAB_FLAG_UNUSED
> +#endif
> +
>  /*
>   * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests.
>   *
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 2c2ed2452271..bfe2f498e622 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -43,11 +43,13 @@ DEFINE_MUTEX(slab_mutex);
>  struct kmem_cache *kmem_cache;
>
>  /*
> - * Set of flags that will prevent slab merging
> + * Set of flags that will prevent slab merging.
> + * Any flag that adds per-object metadata should be included,
> + * since slab merging can update s->inuse that affects the metadata layout.
>   */
>  #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
>                 SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
> -               SLAB_FAILSLAB | SLAB_NO_MERGE)
> +               SLAB_FAILSLAB | SLAB_NO_MERGE | SLAB_OBJ_EXT_IN_OBJ)
>
>  #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
>                          SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
> diff --git a/mm/slub.c b/mm/slub.c
> index 8101df5fdccf..7de6e8f8f8c2 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -970,6 +970,40 @@ static inline bool obj_exts_in_slab(struct kmem_cache *s, struct slab *slab)
>  {
>         return false;
>  }
> +
> +#endif
> +
> +#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
> +static bool obj_exts_in_object(struct kmem_cache *s)
> +{
> +       return s->flags & SLAB_OBJ_EXT_IN_OBJ;
> +}
> +
> +static unsigned int obj_exts_offset_in_object(struct kmem_cache *s)
> +{
> +       unsigned int offset = get_info_end(s);
> +
> +       if (kmem_cache_debug_flags(s, SLAB_STORE_USER))
> +               offset += sizeof(struct track) * 2;
> +
> +       if (slub_debug_orig_size(s))
> +               offset += ALIGN(sizeof(unsigned int),
> +                               __alignof__(unsigned long));
> +
> +       offset += kasan_metadata_size(s, false);
> +
> +       return offset;
> +}
> +#else
> +static inline bool obj_exts_in_object(struct kmem_cache *s)
> +{
> +       return false;
> +}
> +
> +static inline unsigned int obj_exts_offset_in_object(struct kmem_cache *s)
> +{
> +       return 0;
> +}
>  #endif
>
>  #ifdef CONFIG_SLUB_DEBUG
> @@ -1270,6 +1304,9 @@ static void print_trailer(struct kmem_cache *s, struct slab *slab, u8 *p)
>
>         off += kasan_metadata_size(s, false);
>
> +       if (obj_exts_in_object(s))
> +               off += sizeof(struct slabobj_ext);
> +
>         if (off != size_from_object(s))
>                 /* Beginning of the filler is the free pointer */
>                 print_section(KERN_ERR, "Padding  ", p + off,
> @@ -1439,7 +1476,10 @@ check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
>   *     A. Free pointer (if we cannot overwrite object on free)
>   *     B. Tracking data for SLAB_STORE_USER
>   *     C. Original request size for kmalloc object (SLAB_STORE_USER enabled)
> - *     D. Padding to reach required alignment boundary or at minimum
> + *     D. KASAN alloc metadata (KASAN enabled)
> + *     E. struct slabobj_ext to store accounting metadata
> + *        (SLAB_OBJ_EXT_IN_OBJ enabled)
> + *     F. Padding to reach required alignment boundary or at minimum
>   *             one word if debugging is on to be able to detect writes
>   *             before the word boundary.
>   *
> @@ -1468,6 +1508,9 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
>
>         off += kasan_metadata_size(s, false);
>
> +       if (obj_exts_in_object(s))
> +               off += sizeof(struct slabobj_ext);
> +
>         if (size_from_object(s) == off)
>                 return 1;
>
> @@ -2250,7 +2293,8 @@ static inline void free_slab_obj_exts(struct slab *slab)
>         if (!obj_exts)
>                 return;
>
> -       if (obj_exts_in_slab(slab->slab_cache, slab)) {
> +       if (obj_exts_in_slab(slab->slab_cache, slab) ||
> +                       obj_exts_in_object(slab->slab_cache)) {
I think you need a check for obj_exts_in_object() inside
alloc_slab_obj_exts() to avoid allocating the vector.
>                 slab->obj_exts = 0;
>                 return;
>         }
> @@ -2291,6 +2335,21 @@ static void alloc_slab_obj_exts_early(struct kmem_cache *s, struct slab *slab)
>                 if (IS_ENABLED(CONFIG_MEMCG))
>                         slab->obj_exts |= MEMCG_DATA_OBJEXTS;
>                 slab_set_stride(slab, sizeof(struct slabobj_ext));
> +       } else if (obj_exts_in_object(s)) {
> +               unsigned int offset = obj_exts_offset_in_object(s);
> +
> +               slab->obj_exts = (unsigned long)slab_address(slab);
> +               slab->obj_exts += s->red_left_pad;
> +               slab->obj_exts += obj_exts_offset_in_object(s);
> +               if (IS_ENABLED(CONFIG_MEMCG))
> +                       slab->obj_exts |= MEMCG_DATA_OBJEXTS;
> +               slab_set_stride(slab, s->size);
> +
> +               for_each_object(addr, s, slab_address(slab), slab->objects) {
> +                       kasan_unpoison_range(addr + offset,
> +                                            sizeof(struct slabobj_ext));
> +                       memset(addr + offset, 0, sizeof(struct slabobj_ext));
> +               }
>         }
>         metadata_access_disable();
>  }
> @@ -7883,6 +7942,7 @@ static int calculate_sizes(struct kmem_cache_args *args, struct kmem_cache *s)
>  {
>         slab_flags_t flags = s->flags;
>         unsigned int size = s->object_size;
> +       unsigned int aligned_size;
>         unsigned int order;
>
>         /*
> @@ -7997,7 +8057,13 @@ static int calculate_sizes(struct kmem_cache_args *args, struct kmem_cache *s)
>          * offset 0. In order to align the objects we have to simply size
>          * each object to conform to the alignment.
>          */
> -       size = ALIGN(size, s->align);
> +       aligned_size = ALIGN(size, s->align);
> +#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
> +       if (aligned_size - size >= sizeof(struct slabobj_ext))
> +               s->flags |= SLAB_OBJ_EXT_IN_OBJ;
> +#endif
> +       size = aligned_size;
> +
>         s->size = size;
>         s->reciprocal_size = reciprocal_value(size);
>         order = calculate_order(size);
> --
> 2.43.0
>
Powered by blists - more mailing lists
 
