lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJuCfpGY0h2d6VEAEa4kjH2yUMGDdke_QTFt6d+gb+kH=rnXyQ@mail.gmail.com>
Date: Tue, 28 Oct 2025 20:07:42 -0700
From: Suren Baghdasaryan <surenb@...gle.com>
To: Harry Yoo <harry.yoo@...cle.com>
Cc: akpm@...ux-foundation.org, vbabka@...e.cz, andreyknvl@...il.com, 
	cl@...ux.com, dvyukov@...gle.com, glider@...gle.com, hannes@...xchg.org, 
	linux-mm@...ck.org, mhocko@...nel.org, muchun.song@...ux.dev, 
	rientjes@...gle.com, roman.gushchin@...ux.dev, ryabinin.a.a@...il.com, 
	shakeel.butt@...ux.dev, vincenzo.frascino@....com, yeoreum.yun@....com, 
	tytso@....edu, adilger.kernel@...ger.ca, linux-ext4@...r.kernel.org, 
	linux-kernel@...r.kernel.org
Subject: Re: [RFC PATCH V3 6/7] mm/slab: save memory by allocating slabobj_ext
 array from leftover

On Mon, Oct 27, 2025 at 5:29 AM Harry Yoo <harry.yoo@...cle.com> wrote:
>
> The leftover space in a slab is always smaller than s->size, and
> kmem caches for large objects that are not power-of-two sizes tend to have
> a greater amount of leftover space per slab. In some cases, the leftover
> space is larger than the size of the slabobj_ext array for the slab.
>
> An excellent example of such a cache is ext4_inode_cache. On my system,
> the object size is 1144, with a preferred order of 3, 28 objects per slab,
> and 736 bytes of leftover space per slab.
>
> Since the size of the slabobj_ext array is only 224 bytes (w/o mem
> profiling) or 448 bytes (w/ mem profiling) per slab, the entire array
> fits within the leftover space.
>
> Allocate the slabobj_exts array from this unused space instead of using
> kcalloc(), when it is large enough. The array is always allocated when
> creating new slabs, because implementing lazy allocation correctly is
> difficult without expensive synchronization.
>
> To avoid unnecessary overhead when MEMCG (with SLAB_ACCOUNT) and
> MEM_ALLOC_PROFILING are not used for the cache, only allocate the
> slabobj_ext array only when either of them are enabled when slabs are
> created.
>
> [ MEMCG=y, MEM_ALLOC_PROFILING=n ]
>
> Before patch (creating 2M directories on ext4):
>   Slab:            3575348 kB
>   SReclaimable:    3137804 kB
>   SUnreclaim:       437544 kB
>
> After patch (creating 2M directories on ext4):
>   Slab:            3558236 kB
>   SReclaimable:    3139268 kB
>   SUnreclaim:       418968 kB (-18.14 MiB)
>
> Enjoy the memory savings!
>
> Signed-off-by: Harry Yoo <harry.yoo@...cle.com>
> ---
>  mm/slub.c | 147 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 142 insertions(+), 5 deletions(-)
>
> diff --git a/mm/slub.c b/mm/slub.c
> index 13acc9437ef5..8101df5fdccf 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -884,6 +884,94 @@ static inline unsigned int get_orig_size(struct kmem_cache *s, void *object)
>         return *(unsigned int *)p;
>  }
>
> +#ifdef CONFIG_SLAB_OBJ_EXT
> +
> +/*
> + * Check if memory cgroup or memory allocation profiling is enabled.
> + * If enabled, SLUB tries to reduce memory overhead of accounting
> + * slab objects. If neither is enabled when this function is called,
> + * the optimization is simply skipped to avoid affecting caches that do not
> + * need slabobj_ext metadata.
> + *
> + * However, this may disable optimization when memory cgroup or memory
> + * allocation profiling is used, but slabs are created too early
> + * even before those subsystems are initialized.
> + */
> +static inline bool need_slab_obj_exts(struct kmem_cache *s)
> +{
> +       if (!mem_cgroup_disabled() && (s->flags & SLAB_ACCOUNT))
> +               return true;
> +
> +       if (mem_alloc_profiling_enabled())
> +               return true;
> +
> +       return false;
> +}
> +
> +static inline unsigned int obj_exts_size_in_slab(struct slab *slab)
> +{
> +       return sizeof(struct slabobj_ext) * slab->objects;
> +}
> +
> +static inline unsigned long obj_exts_offset_in_slab(struct kmem_cache *s,
> +                                                   struct slab *slab)
> +{
> +       unsigned long objext_offset;
> +
> +       objext_offset = s->red_left_pad + s->size * slab->objects;
> +       objext_offset = ALIGN(objext_offset, sizeof(struct slabobj_ext));
> +       return objext_offset;
> +}
> +
> +static inline bool obj_exts_fit_within_slab_leftover(struct kmem_cache *s,
> +                                                    struct slab *slab)
> +{
> +       unsigned long objext_offset = obj_exts_offset_in_slab(s, slab);
> +       unsigned long objext_size = obj_exts_size_in_slab(slab);
> +
> +       return objext_offset + objext_size <= slab_size(slab);
> +}
> +
> +static inline bool obj_exts_in_slab(struct kmem_cache *s, struct slab *slab)
> +{
> +       unsigned long obj_exts;
> +
> +       if (!obj_exts_fit_within_slab_leftover(s, slab))
> +               return false;
> +
> +       obj_exts = (unsigned long)slab_address(slab);
> +       obj_exts += obj_exts_offset_in_slab(s, slab);
> +       return obj_exts == slab_obj_exts(slab);

You can check that slab_obj_exts(slab) is not NULL before making the
above calculations.

> +}
> +#else
> +static inline bool need_slab_obj_exts(struct kmem_cache *s)
> +{
> +       return false;
> +}
> +
> +static inline unsigned int obj_exts_size_in_slab(struct slab *slab)
> +{
> +       return 0;
> +}
> +
> +static inline unsigned long obj_exts_offset_in_slab(struct kmem_cache *s,
> +                                                   struct slab *slab)
> +{
> +       return 0;
> +}
> +
> +static inline bool obj_exts_fit_within_slab_leftover(struct kmem_cache *s,
> +                                                    struct slab *slab)
> +{
> +       return false;
> +}
> +
> +static inline bool obj_exts_in_slab(struct kmem_cache *s, struct slab *slab)
> +{
> +       return false;
> +}
> +#endif
> +
>  #ifdef CONFIG_SLUB_DEBUG
>
>  /*
> @@ -1404,7 +1492,15 @@ slab_pad_check(struct kmem_cache *s, struct slab *slab)
>         start = slab_address(slab);
>         length = slab_size(slab);
>         end = start + length;
> -       remainder = length % s->size;
> +
> +       if (obj_exts_in_slab(s, slab)) {
> +               remainder = length;
> +               remainder -= obj_exts_offset_in_slab(s, slab);
> +               remainder -= obj_exts_size_in_slab(slab);
> +       } else {
> +               remainder = length % s->size;
> +       }
> +
>         if (!remainder)
>                 return;
>
> @@ -2154,6 +2250,11 @@ static inline void free_slab_obj_exts(struct slab *slab)
>         if (!obj_exts)
>                 return;
>
> +       if (obj_exts_in_slab(slab->slab_cache, slab)) {
> +               slab->obj_exts = 0;
> +               return;
> +       }
> +
>         /*
>          * obj_exts was created with __GFP_NO_OBJ_EXT flag, therefore its
>          * corresponding extension will be NULL. alloc_tag_sub() will throw a
> @@ -2169,6 +2270,31 @@ static inline void free_slab_obj_exts(struct slab *slab)
>         slab->obj_exts = 0;
>  }
>
> +/*
> + * Try to allocate slabobj_ext array from unused space.
> + * This function must be called on a freshly allocated slab to prevent
> + * concurrency problems.
> + */
> +static void alloc_slab_obj_exts_early(struct kmem_cache *s, struct slab *slab)
> +{
> +       void *addr;
> +
> +       if (!need_slab_obj_exts(s))
> +               return;
> +
> +       metadata_access_enable();
> +       if (obj_exts_fit_within_slab_leftover(s, slab)) {
> +               addr = slab_address(slab) + obj_exts_offset_in_slab(s, slab);
> +               addr = kasan_reset_tag(addr);
> +               memset(addr, 0, obj_exts_size_in_slab(slab));
> +               slab->obj_exts = (unsigned long)addr;
> +               if (IS_ENABLED(CONFIG_MEMCG))
> +                       slab->obj_exts |= MEMCG_DATA_OBJEXTS;
> +               slab_set_stride(slab, sizeof(struct slabobj_ext));
> +       }
> +       metadata_access_disable();
> +}
> +
>  #else /* CONFIG_SLAB_OBJ_EXT */
>
>  static inline void init_slab_obj_exts(struct slab *slab)
> @@ -2185,6 +2311,11 @@ static inline void free_slab_obj_exts(struct slab *slab)
>  {
>  }
>
> +static inline void alloc_slab_obj_exts_early(struct kmem_cache *s,
> +                                                      struct slab *slab)
> +{
> +}
> +
>  #endif /* CONFIG_SLAB_OBJ_EXT */
>
>  #ifdef CONFIG_MEM_ALLOC_PROFILING
> @@ -3155,7 +3286,9 @@ static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
>  static __always_inline void account_slab(struct slab *slab, int order,
>                                          struct kmem_cache *s, gfp_t gfp)
>  {
> -       if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
> +       if (memcg_kmem_online() &&
> +                       (s->flags & SLAB_ACCOUNT) &&
> +                       !slab_obj_exts(slab))
>                 alloc_slab_obj_exts(slab, s, gfp, true);

Don't you need to add a check for !obj_exts_in_slab() inside
alloc_slab_obj_exts() to avoid allocating slab->obj_exts?

>
>         mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
> @@ -3219,9 +3352,6 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
>         slab->objects = oo_objects(oo);slab_obj_exts
>         slab->inuse = 0;
>         slab->frozen = 0;
> -       init_slab_obj_exts(slab);
> -
> -       account_slab(slab, oo_order(oo), s, flags);
>
>         slab->slab_cache = s;
>
> @@ -3230,6 +3360,13 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
>         start = slab_address(slab);
>
>         setup_slab_debug(s, slab, start);
> +       init_slab_obj_exts(slab);
> +       /*
> +        * Poison the slab before initializing the slabobj_ext array
> +        * to prevent the array from being overwritten.
> +        */
> +       alloc_slab_obj_exts_early(s, slab);
> +       account_slab(slab, oo_order(oo), s, flags);

 alloc_slab_obj_exts() is called in 2 other places:
1. __memcg_slab_post_alloc_hook()
2. prepare_slab_obj_exts_hook()

Don't you need alloc_slab_obj_exts_early() there as well?

>
>         shuffle = shuffle_freelist(s, slab);
>
> --
> 2.43.0
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ