linux-kernel - Re: [PATCH 10/12] mm, slub: remove percpu slabs with CONFIG_SLUB

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <Y57ojm02Zi+CLdq/@hyeyoo>
Date:   Sun, 18 Dec 2022 19:16:46 +0900
From:   Hyeonggon Yoo <42.hyeyoo@...il.com>
To:     Vlastimil Babka <vbabka@...e.cz>
Cc:     Christoph Lameter <cl@...ux.com>,
        David Rientjes <rientjes@...gle.com>,
        Joonsoo Kim <iamjoonsoo.kim@....com>,
        Pekka Enberg <penberg@...nel.org>,
        Roman Gushchin <roman.gushchin@...ux.dev>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        Matthew Wilcox <willy@...radead.org>, patches@...ts.linux.dev,
        linux-mm@...ck.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 10/12] mm, slub: remove percpu slabs with CONFIG_SLUB_TINY

On Mon, Nov 21, 2022 at 06:12:00PM +0100, Vlastimil Babka wrote:
> SLUB gets most of its scalability by percpu slabs. However for
> CONFIG_SLUB_TINY the goal is minimal memory overhead, not scalability.
> Thus, #ifdef out the whole kmem_cache_cpu percpu structure and
> associated code. Additionally to the slab page savings, this reduces
> percpu allocator usage, and code size.
> 
> This change builds on recent commit c7323a5ad078 ("mm/slub: restrict
> sysfs validation to debug caches and make it safe"), as caches with
> enabled debugging also avoid percpu slabs and all allocations and
> freeing ends up working with the partial list. With a bit more
> refactoring by the preceding patches, use the same code paths with
> CONFIG_SLUB_TINY.
> 
> Signed-off-by: Vlastimil Babka <vbabka@...e.cz>
> ---
>  include/linux/slub_def.h |   4 ++
>  mm/slub.c                | 102 +++++++++++++++++++++++++++++++++++++--
>  2 files changed, 103 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
> index c186f25c8148..79df64eb054e 100644
> --- a/include/linux/slub_def.h
> +++ b/include/linux/slub_def.h
> @@ -41,6 +41,7 @@ enum stat_item {
>  	CPU_PARTIAL_DRAIN,	/* Drain cpu partial to node partial */
>  	NR_SLUB_STAT_ITEMS };
>  
> +#ifndef CONFIG_SLUB_TINY
>  /*
>   * When changing the layout, make sure freelist and tid are still compatible
>   * with this_cpu_cmpxchg_double() alignment requirements.
> @@ -57,6 +58,7 @@ struct kmem_cache_cpu {
>  	unsigned stat[NR_SLUB_STAT_ITEMS];
>  #endif
>  };
> +#endif /* CONFIG_SLUB_TINY */
>  
>  #ifdef CONFIG_SLUB_CPU_PARTIAL
>  #define slub_percpu_partial(c)		((c)->partial)
> @@ -88,7 +90,9 @@ struct kmem_cache_order_objects {
>   * Slab cache management.
>   */
>  struct kmem_cache {
> +#ifndef CONFIG_SLUB_TINY
>  	struct kmem_cache_cpu __percpu *cpu_slab;
> +#endif
>  	/* Used for retrieving partial slabs, etc. */
>  	slab_flags_t flags;
>  	unsigned long min_partial;
> diff --git a/mm/slub.c b/mm/slub.c
> index 5677db3f6d15..7f1cd702c3b4 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -337,10 +337,12 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
>   */
>  static nodemask_t slab_nodes;
>  
> +#ifndef CONFIG_SLUB_TINY
>  /*
>   * Workqueue used for flush_cpu_slab().
>   */
>  static struct workqueue_struct *flushwq;
> +#endif
>  
>  /********************************************************************
>   * 			Core slab cache functions
> @@ -386,10 +388,12 @@ static inline void *get_freepointer(struct kmem_cache *s, void *object)
>  	return freelist_dereference(s, object + s->offset);
>  }
>  
> +#ifndef CONFIG_SLUB_TINY
>  static void prefetch_freepointer(const struct kmem_cache *s, void *object)
>  {
>  	prefetchw(object + s->offset);
>  }
> +#endif
>  
>  /*
>   * When running under KMSAN, get_freepointer_safe() may return an uninitialized
> @@ -1681,11 +1685,13 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
>  static inline void dec_slabs_node(struct kmem_cache *s, int node,
>  							int objects) {}
>  
> +#ifndef CONFIG_SLUB_TINY
>  static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
>  			       void **freelist, void *nextfree)
>  {
>  	return false;
>  }
> +#endif
>  #endif /* CONFIG_SLUB_DEBUG */
>  
>  /*
> @@ -2219,7 +2225,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
>  		if (!pfmemalloc_match(slab, pc->flags))
>  			continue;
>  
> -		if (kmem_cache_debug(s)) {
> +		if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) {
>  			object = alloc_single_from_partial(s, n, slab,
>  							pc->orig_size);
>  			if (object)
> @@ -2334,6 +2340,8 @@ static void *get_partial(struct kmem_cache *s, int node, struct partial_context
>  	return get_any_partial(s, pc);
>  }
>  
> +#ifndef CONFIG_SLUB_TINY
> +
>  #ifdef CONFIG_PREEMPTION
>  /*
>   * Calculate the next globally unique transaction for disambiguation
> @@ -2347,7 +2355,7 @@ static void *get_partial(struct kmem_cache *s, int node, struct partial_context
>   * different cpus.
>   */
>  #define TID_STEP 1
> -#endif
> +#endif /* CONFIG_PREEMPTION */
>  
>  static inline unsigned long next_tid(unsigned long tid)
>  {
> @@ -2808,6 +2816,13 @@ static int slub_cpu_dead(unsigned int cpu)
>  	return 0;
>  }
>  
> +#else /* CONFIG_SLUB_TINY */
> +static inline void flush_all_cpus_locked(struct kmem_cache *s) { }
> +static inline void flush_all(struct kmem_cache *s) { }
> +static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) { }
> +static inline int slub_cpu_dead(unsigned int cpu) { return 0; }
> +#endif /* CONFIG_SLUB_TINY */
> +
>  /*
>   * Check if the objects in a per cpu structure fit numa
>   * locality expectations.
> @@ -2955,6 +2970,7 @@ static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags)
>  	return true;
>  }
>  
> +#ifndef CONFIG_SLUB_TINY
>  /*
>   * Check the slab->freelist and either transfer the freelist to the
>   * per cpu freelist or deactivate the slab.
> @@ -3320,6 +3336,33 @@ static __always_inline void *__slab_alloc_node(struct kmem_cache *s,
>  
>  	return object;
>  }
> +#else /* CONFIG_SLUB_TINY */
> +static void *__slab_alloc_node(struct kmem_cache *s,
> +		gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
> +{
> +	struct partial_context pc;
> +	struct slab *slab;
> +	void *object;
> +
> +	pc.flags = gfpflags;
> +	pc.slab = &slab;
> +	pc.orig_size = orig_size;
> +	object = get_partial(s, node, &pc);
> +
> +	if (object)
> +		return object;
> +
> +	slab = new_slab(s, gfpflags, node);
> +	if (unlikely(!slab)) {
> +		slab_out_of_memory(s, gfpflags, node);
> +		return NULL;
> +	}
> +
> +	object = alloc_single_from_new_slab(s, slab, orig_size);
> +
> +	return object;
> +}
> +#endif /* CONFIG_SLUB_TINY */
>  
>  /*
>   * If the object has been wiped upon free, make sure it's fully initialized by
> @@ -3503,7 +3546,7 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
>  	if (kfence_free(head))
>  		return;
>  
> -	if (kmem_cache_debug(s)) {
> +	if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) {
>  		free_to_partial_list(s, slab, head, tail, cnt, addr);
>  		return;
>  	}
> @@ -3604,6 +3647,7 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
>  	discard_slab(s, slab);
>  }
>  
> +#ifndef CONFIG_SLUB_TINY
>  /*
>   * Fastpath with forced inlining to produce a kfree and kmem_cache_free that
>   * can perform fastpath freeing without additional function calls.
> @@ -3678,6 +3722,16 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
>  	}
>  	stat(s, FREE_FASTPATH);
>  }
> +#else /* CONFIG_SLUB_TINY */
> +static void do_slab_free(struct kmem_cache *s,
> +				struct slab *slab, void *head, void *tail,
> +				int cnt, unsigned long addr)
> +{
> +	void *tail_obj = tail ? : head;
> +
> +	__slab_free(s, slab, head, tail_obj, cnt, addr);
> +}
> +#endif /* CONFIG_SLUB_TINY */
>  
>  static __always_inline void slab_free(struct kmem_cache *s, struct slab *slab,
>  				      void *head, void *tail, void **p, int cnt,
> @@ -3812,6 +3866,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
>  }
>  EXPORT_SYMBOL(kmem_cache_free_bulk);
>  
> +#ifndef CONFIG_SLUB_TINY
>  static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
>  			size_t size, void **p, struct obj_cgroup *objcg)
>  {
> @@ -3880,6 +3935,36 @@ static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
>  	return 0;
>  
>  }
> +#else /* CONFIG_SLUB_TINY */
> +static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
> +			size_t size, void **p, struct obj_cgroup *objcg)
> +{
> +	int i;
> +
> +	for (i = 0; i < size; i++) {
> +		void *object = kfence_alloc(s, s->object_size, flags);
> +
> +		if (unlikely(object)) {
> +			p[i] = object;
> +			continue;
> +		}
> +
> +		p[i] = __slab_alloc_node(s, flags, NUMA_NO_NODE,
> +					 _RET_IP_, s->object_size);
> +		if (unlikely(!p[i]))
> +			goto error;
> +
> +		maybe_wipe_obj_freeptr(s, p[i]);
> +	}
> +
> +	return i;
> +
> +error:
> +	slab_post_alloc_hook(s, objcg, flags, i, p, false);
> +	kmem_cache_free_bulk(s, i, p);
> +	return 0;
> +}
> +#endif /* CONFIG_SLUB_TINY */
>  
>  /* Note that interrupts must be enabled when calling this function. */
>  int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
> @@ -4059,6 +4144,7 @@ init_kmem_cache_node(struct kmem_cache_node *n)
>  #endif
>  }
>  
> +#ifndef CONFIG_SLUB_TINY
>  static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
>  {
>  	BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
> @@ -4078,6 +4164,12 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
>  
>  	return 1;
>  }
> +#else
> +static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
> +{
> +	return 1;
> +}
> +#endif /* CONFIG_SLUB_TINY */
>  
>  static struct kmem_cache *kmem_cache_node;
>  
> @@ -4140,7 +4232,9 @@ static void free_kmem_cache_nodes(struct kmem_cache *s)
>  void __kmem_cache_release(struct kmem_cache *s)
>  {
>  	cache_random_seq_destroy(s);
> +#ifndef CONFIG_SLUB_TINY
>  	free_percpu(s->cpu_slab);
> +#endif
>  	free_kmem_cache_nodes(s);
>  }
>  
> @@ -4917,8 +5011,10 @@ void __init kmem_cache_init(void)
>  
>  void __init kmem_cache_init_late(void)
>  {
> +#ifndef CONFIG_SLUB_TINY
>  	flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM, 0);
>  	WARN_ON(!flushwq);
> +#endif
>  }
>  
>  struct kmem_cache *
> -- 
> 2.38.1
> 

For the record:

Looks good to me.
Reviewed-by: Hyeonggon Yoo <42.hyeyoo@...il.com>

-- 
Thanks,
Hyeonggon