lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250424080755.272925-4-harry.yoo@oracle.com>
Date: Thu, 24 Apr 2025 17:07:51 +0900
From: Harry Yoo <harry.yoo@...cle.com>
To: Vlastimil Babka <vbabka@...e.cz>, Christoph Lameter <cl@...two.org>,
        David Rientjes <rientjes@...gle.com>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Dennis Zhou <dennis@...nel.org>, Tejun Heo <tj@...nel.org>,
        Mateusz Guzik <mjguzik@...il.com>
Cc: Jamal Hadi Salim <jhs@...atatu.com>, Cong Wang <xiyou.wangcong@...il.com>,
        Jiri Pirko <jiri@...nulli.us>, Vlad Buslov <vladbu@...dia.com>,
        Yevgeny Kliteynik <kliteyn@...dia.com>, Jan Kara <jack@...e.cz>,
        Byungchul Park <byungchul@...com>, linux-mm@...ck.org,
        netdev@...r.kernel.org, linux-kernel@...r.kernel.org,
        Harry Yoo <harry.yoo@...cle.com>
Subject: [RFC PATCH 3/7] mm/slab: revive the destructor feature in slab allocator

Commit c59def9f222d ("Slab allocators: Drop support for destructors")
removed support for destructors, based on the belief that the feature had
few users and its usefulness was questionable.

Mateusz Guzik suggests [1] that using a constructor-destructor pair
can be beneficial when object initialization and de-initialization
require global serialization. For example, mm_struct allocates per-CPU
variables that rely on a global lock for serialization.

With the constructor-destructor pair, the serialization occurs only when
a slab is allocated or freed, rather than each time an individual object
is allocated or freed.

Introduce the destructor feature. A user can enable it by specifying
the 'dtor' field in kmem_cache_args. When a cache uses a destructor,
cache merging is disabled and the destructor is called before freeing a slab.
In case of SLAB_TYPESAFE_BY_RCU, it's invoked after RCU grace period.
Unlike the constructor, the destructor does not fail and it mustn't.

init_on_alloc, init_on_free, placing free pointer within the object,
slab merging, __GFP_ZERO, object poisoning do not work for caches with
destructor, for the same reason as constructor.

Meanwhile, refactor __kmem_cache_alias() to remove the check for the the
mergeability of the cache. Instead, these checks are performed before
calling __kmem_cache_alias().

[1] https://lore.kernel.org/linux-mm/CAGudoHFc+Km-3usiy4Wdm1JkM+YjCgD9A8dDKQ06pZP070f1ig@mail.gmail.com

Suggested-by: Mateusz Guzik <mjguzik@...il.com>
Signed-off-by: Harry Yoo <harry.yoo@...cle.com>
---
 include/linux/slab.h | 10 ++++++++++
 mm/slab.h            |  9 +++++----
 mm/slab_common.c     | 41 +++++++++++++++++++++++++++--------------
 mm/slub.c            | 29 ++++++++++++++++++++++++-----
 4 files changed, 66 insertions(+), 23 deletions(-)

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 1ef6d5384f0b..12a8a6b07050 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -336,6 +336,16 @@ struct kmem_cache_args {
 	 * %NULL means no constructor.
 	 */
 	int (*ctor)(void *);
+	/**
+	 * @dtor: A destructor for the objects.
+	 *
+	 * The destructor is invoked for each object when a slab page is freed.
+	 * In case of &SLAB_TYPESAFE_BY_RCU caches, dtor is called after RCU
+	 * grace period.
+	 *
+	 * %NULL means no destructor.
+	 */
+	void (*dtor)(void *);
 };
 
 struct kmem_cache *__kmem_cache_create_args(const char *name,
diff --git a/mm/slab.h b/mm/slab.h
index 30603907d936..cffe960f2611 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -279,6 +279,7 @@ struct kmem_cache {
 	gfp_t allocflags;		/* gfp flags to use on each alloc */
 	int refcount;			/* Refcount for slab cache destroy */
 	int (*ctor)(void *object);	/* Object constructor */
+	void (*dtor)(void *object);	/* Object destructor */
 	unsigned int inuse;		/* Offset to metadata */
 	unsigned int align;		/* Alignment */
 	unsigned int red_left_pad;	/* Left redzone padding size */
@@ -438,10 +439,10 @@ extern void create_boot_cache(struct kmem_cache *, const char *name,
 
 int slab_unmergeable(struct kmem_cache *s);
 struct kmem_cache *find_mergeable(unsigned size, unsigned align,
-		slab_flags_t flags, const char *name, int (*ctor)(void *));
+		slab_flags_t flags, const char *name);
 struct kmem_cache *
 __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
-		   slab_flags_t flags, int (*ctor)(void *));
+		   slab_flags_t flags);
 
 slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name);
 
@@ -638,7 +639,7 @@ static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c)
 {
 	if (static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON,
 				&init_on_alloc)) {
-		if (c->ctor)
+		if (c->ctor || c->dtor)
 			return false;
 		if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON))
 			return flags & __GFP_ZERO;
@@ -651,7 +652,7 @@ static inline bool slab_want_init_on_free(struct kmem_cache *c)
 {
 	if (static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON,
 				&init_on_free))
-		return !(c->ctor ||
+		return !(c->ctor || c->dtor ||
 			 (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)));
 	return false;
 }
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 59938e44a8c2..f2f1f7bb7170 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -155,7 +155,7 @@ int slab_unmergeable(struct kmem_cache *s)
 	if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE))
 		return 1;
 
-	if (s->ctor)
+	if (s->ctor || s->dtor)
 		return 1;
 
 #ifdef CONFIG_HARDENED_USERCOPY
@@ -172,22 +172,36 @@ int slab_unmergeable(struct kmem_cache *s)
 	return 0;
 }
 
-struct kmem_cache *find_mergeable(unsigned int size, unsigned int align,
-		slab_flags_t flags, const char *name, int (*ctor)(void *))
+/**
+ * Can this cache that's going to be created merged with others?
+ * We can't use struct kmem_cache here because it is not created yet.
+ */
+static bool is_mergeable(const char *name, slab_flags_t flags,
+			 struct kmem_cache_args *args)
 {
-	struct kmem_cache *s;
-
 	if (slab_nomerge)
-		return NULL;
-
-	if (ctor)
-		return NULL;
+		return false;
 
 	flags = kmem_cache_flags(flags, name);
-
 	if (flags & SLAB_NEVER_MERGE)
-		return NULL;
+		return false;
 
+	if (args->ctor || args->dtor)
+		return false;
+
+#ifdef CONFIG_HARDENED_USERCOPY
+	if (args->usersize)
+		return false;
+#endif
+	return true;
+}
+
+struct kmem_cache *find_mergeable(unsigned int size, unsigned int align,
+		slab_flags_t flags, const char *name)
+{
+	struct kmem_cache *s;
+
+	flags = kmem_cache_flags(flags, name);
 	size = ALIGN(size, sizeof(void *));
 	align = calculate_alignment(flags, align, size);
 	size = ALIGN(size, align);
@@ -321,9 +335,8 @@ struct kmem_cache *__kmem_cache_create_args(const char *name,
 		    object_size - args->usersize < args->useroffset))
 		args->usersize = args->useroffset = 0;
 
-	if (!args->usersize)
-		s = __kmem_cache_alias(name, object_size, args->align, flags,
-				       args->ctor);
+	if (is_mergeable(name, flags, args))
+		s = __kmem_cache_alias(name, object_size, args->align, flags);
 	if (s)
 		goto out_unlock;
 
diff --git a/mm/slub.c b/mm/slub.c
index 10b9c87792b7..b7e158da7ed3 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2626,6 +2626,15 @@ static void __free_slab(struct kmem_cache *s, struct slab *slab)
 	struct folio *folio = slab_folio(slab);
 	int order = folio_order(folio);
 	int pages = 1 << order;
+	void *p;
+
+	if (unlikely(s->dtor)) {
+		p = slab->freelist;
+		while (p != NULL) {
+			s->dtor(p);
+			p = get_freepointer(s, p);
+		}
+	}
 
 	__slab_clear_pfmemalloc(slab);
 	folio->mapping = NULL;
@@ -2717,7 +2726,7 @@ static struct slab *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 	if (unlikely(flags & GFP_SLAB_BUG_MASK))
 		flags = kmalloc_fix_flags(flags);
 
-	WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
+	WARN_ON_ONCE((s->ctor || s->dtor) && (flags & __GFP_ZERO));
 
 	return allocate_slab(s,
 		flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
@@ -5735,7 +5744,7 @@ static int calculate_sizes(struct kmem_cache_args *args, struct kmem_cache *s)
 	 * then we should never poison the object itself.
 	 */
 	if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
-			!s->ctor)
+			!s->ctor && !s->dtor)
 		s->flags |= __OBJECT_POISON;
 	else
 		s->flags &= ~__OBJECT_POISON;
@@ -5757,7 +5766,7 @@ static int calculate_sizes(struct kmem_cache_args *args, struct kmem_cache *s)
 	s->inuse = size;
 
 	if (((flags & SLAB_TYPESAFE_BY_RCU) && !args->use_freeptr_offset) ||
-	    (flags & SLAB_POISON) || s->ctor ||
+	    (flags & SLAB_POISON) || s->ctor || s->dtor ||
 	    ((flags & SLAB_RED_ZONE) &&
 	     (s->object_size < sizeof(void *) || slub_debug_orig_size(s)))) {
 		/*
@@ -6405,11 +6414,11 @@ void __init kmem_cache_init_late(void)
 
 struct kmem_cache *
 __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
-		   slab_flags_t flags, int (*ctor)(void *))
+		   slab_flags_t flags)
 {
 	struct kmem_cache *s;
 
-	s = find_mergeable(size, align, flags, name, ctor);
+	s = find_mergeable(size, align, flags, name);
 	if (s) {
 		if (sysfs_slab_alias(s, name))
 			pr_err("SLUB: Unable to add cache alias %s to sysfs\n",
@@ -6443,6 +6452,7 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name,
 #endif
 	s->align = args->align;
 	s->ctor = args->ctor;
+	s->dtor = args->dtor;
 #ifdef CONFIG_HARDENED_USERCOPY
 	s->useroffset = args->useroffset;
 	s->usersize = args->usersize;
@@ -7003,6 +7013,14 @@ static ssize_t ctor_show(struct kmem_cache *s, char *buf)
 }
 SLAB_ATTR_RO(ctor);
 
+static ssize_t dtor_show(struct kmem_cache *s, char *buf)
+{
+	if (!s->dtor)
+		return 0;
+	return sysfs_emit(buf, "%pS\n", s->dtor);
+}
+SLAB_ATTR_RO(dtor);
+
 static ssize_t aliases_show(struct kmem_cache *s, char *buf)
 {
 	return sysfs_emit(buf, "%d\n", s->refcount < 0 ? 0 : s->refcount - 1);
@@ -7356,6 +7374,7 @@ static struct attribute *slab_attrs[] = {
 	&partial_attr.attr,
 	&cpu_slabs_attr.attr,
 	&ctor_attr.attr,
+	&dtor_attr.attr,
 	&aliases_attr.attr,
 	&align_attr.attr,
 	&hwcache_align_attr.attr,
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ