linux-kernel - Re: [PATCH v3 1/3] mm/slub: enable debugging memory wasting of kmalloc

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <442d2b9c-9f07-8954-b90e-b4a9f8b64303@intel.com>
Date:   Wed, 27 Jul 2022 20:59:00 +0800
From:   Feng Tang <feng.tang@...el.com>
To:     Christoph Lameter <cl@...two.de>
CC:     Andrew Morton <akpm@...ux-foundation.org>,
        Vlastimil Babka <vbabka@...e.cz>,
        Pekka Enberg <penberg@...nel.org>,
        David Rientjes <rientjes@...gle.com>,
        Joonsoo Kim <iamjoonsoo.kim@....com>,
        Roman Gushchin <roman.gushchin@...ux.dev>,
        Hyeonggon Yoo <42.hyeyoo@...il.com>,
        "linux-mm@...ck.org" <linux-mm@...ck.org>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        "Hansen, Dave" <dave.hansen@...el.com>,
        Robin Murphy <robin.murphy@....com>,
        John Garry <john.garry@...wei.com>,
        Kefeng Wang <wangkefeng.wang@...wei.com>
Subject: Re: [PATCH v3 1/3] mm/slub: enable debugging memory wasting of
 kmalloc



On 2022/7/27 18:20, Christoph Lameter wrote:
> On Wed, 27 Jul 2022, Feng Tang wrote:
> 
>> @@ -2905,7 +2950,7 @@ static inline void *get_freelist(struct kmem_cache *s, struct slab *slab)
>>    * already disabled (which is the case for bulk allocation).
>>    */
>>   static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
>> -			  unsigned long addr, struct kmem_cache_cpu *c)
>> +			  unsigned long addr, struct kmem_cache_cpu *c, unsigned int orig_size)
>>   {
>>   	void *freelist;
>>   	struct slab *slab;
>> @@ -3102,7 +3147,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
>>    * pointer.
>>    */
>>   static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
>> -			  unsigned long addr, struct kmem_cache_cpu *c)
>> +			  unsigned long addr, struct kmem_cache_cpu *c, unsigned int orig_size)
>>   {
>>   	void *p;
>>
>> @@ -3115,7 +3160,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
>>   	c = slub_get_cpu_ptr(s->cpu_slab);
>>   #endif
>>
>> -	p = ___slab_alloc(s, gfpflags, node, addr, c);
>> +	p = ___slab_alloc(s, gfpflags, node, addr, c, orig_size);
>>   #ifdef CONFIG_PREEMPT_COUNT
>>   	slub_put_cpu_ptr(s->cpu_slab);
> 
> This is modifying and making execution of standard slab functions more
> expensive. Could you restrict modifications to the kmalloc subsystem?
> 
> kmem_cache_alloc() and friends are not doing any rounding up to power of
> two  sizes.
> 
> What is happening here is that you pass kmalloc object size info through
> the kmem_cache_alloc functions so that the regular allocation functions
> debug functionality can then save the kmalloc specific object request
> size. This is active even when no debugging options are enabled.

Yes, it indeed is some extra cost which I don't like either.

> 
> Can you avoid that? Have kmalloc do the object allocation without passing
> through the kmalloc request size and then add the original size info
> to the debug field later after execution continues in the kmalloc functions?


How about the following patch which adds no new 'orig_size' to core 
functions (the following 2nd, 3rd redzone debug patch may also need
some changes).

(Our email server has just been changed, and my mutt can't
work correctly, so the format could be broken, and I attached the
new patch as well. Sorry for the inconvenience!)

Thanks,
Feng

---
diff --git a/include/linux/slab.h b/include/linux/slab.h

index 0fefdf528e0d..a713b0e5bbcd 100644

--- a/include/linux/slab.h

+++ b/include/linux/slab.h

@@ -29,6 +29,8 @@

  #define SLAB_RED_ZONE		((slab_flags_t __force)0x00000400U)

  /* DEBUG: Poison objects */

  #define SLAB_POISON		((slab_flags_t __force)0x00000800U)

+/* Indicate a kmalloc slab */

+#define SLAB_KMALLOC		((slab_flags_t __force)0x00001000U)

  /* Align objs on cache lines */

  #define SLAB_HWCACHE_ALIGN	((slab_flags_t __force)0x00002000U)

  /* Use GFP_DMA memory */

diff --git a/mm/slub.c b/mm/slub.c

index 862dbd9af4f5..97c21a37a6a1 100644

--- a/mm/slub.c

+++ b/mm/slub.c

@@ -191,6 +191,12 @@ static inline bool kmem_cache_debug(struct 
kmem_cache *s)

  	return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);

  }



+static inline bool slub_debug_orig_size(struct kmem_cache *s)

+{

+	return (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&

+			(s->flags & SLAB_KMALLOC));

+}

+

  void *fixup_red_left(struct kmem_cache *s, void *p)

  {

  	if (kmem_cache_debug_flags(s, SLAB_RED_ZONE))

@@ -816,6 +822,39 @@ static void print_slab_info(const struct slab *slab)

  	       folio_flags(folio, 0));

  }



+static inline unsigned int *get_orig_size_pointer(struct kmem_cache *s,

+						void *object)

+{

+	void *p = kasan_reset_tag(object);

+

+	p += get_info_end(s);

+	p += sizeof(struct track) * 2;

+	return (unsigned int *)p;

+}

+

+static void set_orig_size(struct kmem_cache *s,

+					void *object, unsigned int orig_size)

+{

+	unsigned int *p;

+

+	if (!slub_debug_orig_size(s))

+		return;

+

+	p = get_orig_size_pointer(s, object);

+	*p = orig_size;

+}

+

+static unsigned int get_orig_size(struct kmem_cache *s, void *object)

+{

+	unsigned int *p;

+

+	if (!slub_debug_orig_size(s))

+		return s->object_size;

+

+	p = get_orig_size_pointer(s, object);

+	return *p;

+}

+

  static void slab_bug(struct kmem_cache *s, char *fmt, ...)

  {

  	struct va_format vaf;

@@ -875,6 +914,9 @@ static void print_trailer(struct kmem_cache *s, 
struct slab *slab, u8 *p)

  	if (s->flags & SLAB_STORE_USER)

  		off += 2 * sizeof(struct track);



+	if (slub_debug_orig_size(s))

+		off += sizeof(unsigned int);

+

  	off += kasan_metadata_size(s);



  	if (off != size_from_object(s))

@@ -1026,10 +1068,14 @@ static int check_pad_bytes(struct kmem_cache *s, 
struct slab *slab, u8 *p)

  {

  	unsigned long off = get_info_end(s);	/* The end of info */



-	if (s->flags & SLAB_STORE_USER)

+	if (s->flags & SLAB_STORE_USER) {

  		/* We also have user information there */

  		off += 2 * sizeof(struct track);



+		if (s->flags & SLAB_KMALLOC)

+			off += sizeof(unsigned int);

+	}

+

  	off += kasan_metadata_size(s);



  	if (size_from_object(s) == off)

@@ -1335,6 +1381,7 @@ static noinline int alloc_debug_processing(struct 
kmem_cache *s,

  	/* Success perform special debug activities for allocs */

  	if (s->flags & SLAB_STORE_USER)

  		set_track(s, object, TRACK_ALLOC, addr);

+

  	trace(s, slab, object, 1);

  	init_object(s, object, SLUB_RED_ACTIVE);

  	return 1;

@@ -3240,6 +3287,9 @@ static __always_inline void 
*slab_alloc_node(struct kmem_cache *s, struct list_l

  	init = slab_want_init_on_alloc(gfpflags, s);



  out:

+#ifdef CONFIG_SLUB_DEBUG

+	set_orig_size(s, object, orig_size);

+#endif

  	slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init);



  	return object;

@@ -4112,12 +4162,17 @@ static int calculate_sizes(struct kmem_cache *s)

  	}



  #ifdef CONFIG_SLUB_DEBUG

-	if (flags & SLAB_STORE_USER)

+	if (flags & SLAB_STORE_USER) {

  		/*

  		 * Need to store information about allocs and frees after

  		 * the object.

  		 */

  		size += 2 * sizeof(struct track);

+

+		/* Save the original kmalloc request size */

+		if (flags & SLAB_KMALLOC)

+			size += sizeof(unsigned int);

+	}

  #endif



  	kasan_cache_create(s, &size, &s->flags);

@@ -4842,7 +4897,7 @@ void __init kmem_cache_init(void)



  	/* Now we can use the kmem_cache to allocate kmalloc slabs */

  	setup_kmalloc_cache_index_table();

-	create_kmalloc_caches(0);

+	create_kmalloc_caches(SLAB_KMALLOC);



  	/* Setup random freelists for each cache */

  	init_freelist_randomization();

@@ -5068,6 +5123,7 @@ struct location {

  	depot_stack_handle_t handle;

  	unsigned long count;

  	unsigned long addr;

+	unsigned long waste;

  	long long sum_time;

  	long min_time;

  	long max_time;

@@ -5114,13 +5170,15 @@ static int alloc_loc_track(struct loc_track *t, 
unsigned long max, gfp_t flags)

  }



  static int add_location(struct loc_track *t, struct kmem_cache *s,

-				const struct track *track)

+				const struct track *track,

+				unsigned int orig_size)

  {

  	long start, end, pos;

  	struct location *l;

-	unsigned long caddr, chandle;

+	unsigned long caddr, chandle, cwaste;

  	unsigned long age = jiffies - track->when;

  	depot_stack_handle_t handle = 0;

+	unsigned int waste = s->object_size - orig_size;



  #ifdef CONFIG_STACKDEPOT

  	handle = READ_ONCE(track->handle);

@@ -5138,11 +5196,13 @@ static int add_location(struct loc_track *t, 
struct kmem_cache *s,

  		if (pos == end)

  			break;



-		caddr = t->loc[pos].addr;

-		chandle = t->loc[pos].handle;

-		if ((track->addr == caddr) && (handle == chandle)) {

+		l = &t->loc[pos];

+		caddr = l->addr;

+		chandle = l->handle;

+		cwaste = l->waste;

+		if ((track->addr == caddr) && (handle == chandle) &&

+			(waste == cwaste)) {



-			l = &t->loc[pos];

  			l->count++;

  			if (track->when) {

  				l->sum_time += age;

@@ -5167,6 +5227,9 @@ static int add_location(struct loc_track *t, 
struct kmem_cache *s,

  			end = pos;

  		else if (track->addr == caddr && handle < chandle)

  			end = pos;

+		else if (track->addr == caddr && handle == chandle &&

+				waste < cwaste)

+			end = pos;

  		else

  			start = pos;

  	}

@@ -5190,6 +5253,7 @@ static int add_location(struct loc_track *t, 
struct kmem_cache *s,

  	l->min_pid = track->pid;

  	l->max_pid = track->pid;

  	l->handle = handle;

+	l->waste = waste;

  	cpumask_clear(to_cpumask(l->cpus));

  	cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));

  	nodes_clear(l->nodes);

@@ -5208,7 +5272,7 @@ static void process_slab(struct loc_track *t, 
struct kmem_cache *s,



  	for_each_object(p, s, addr, slab->objects)

  		if (!test_bit(__obj_to_index(s, addr, p), obj_map))

-			add_location(t, s, get_track(s, p, alloc));

+			add_location(t, s, get_track(s, p, alloc), get_orig_size(s, p));

  }

  #endif  /* CONFIG_DEBUG_FS   */

  #endif	/* CONFIG_SLUB_DEBUG */

@@ -6078,6 +6142,10 @@ static int slab_debugfs_show(struct seq_file 
*seq, void *v)

  		else

  			seq_puts(seq, "<not-available>");



+		if (l->waste)

+			seq_printf(seq, " waste=%lu/%lu",

+				l->count * l->waste, l->waste);

+

  		if (l->sum_time != l->min_time) {

  			seq_printf(seq, " age=%ld/%llu/%ld",

  				l->min_time, div_u64(l->sum_time, l->count),


View attachment "kmalloc_debug.patch" of type "text/x-patch" (6660 bytes)