lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1568727601.5576.160.camel@lca.pw>
Date:   Tue, 17 Sep 2019 09:40:01 -0400
From:   Qian Cai <cai@....pw>
To:     David Rientjes <rientjes@...gle.com>,
        Pengfei Li <lpf.vector@...il.com>
Cc:     cl@...ux.com, penberg@...nel.org,
        Andrew Morton <akpm@...ux-foundation.org>, linux-mm@...ck.org,
        linux-kernel@...r.kernel.org
Subject: Re: [RFC PATCH] mm/slub: remove left-over debugging code

On Mon, 2019-09-16 at 11:32 -0700, David Rientjes wrote:
> On Mon, 16 Sep 2019, Qian Cai wrote:
> 
> > SLUB_RESILIENCY_TEST and SLUB_DEBUG_CMPXCHG look like some left-over
> > debugging code during the internal development that probably nobody uses
> > it anymore. Remove them to make the world greener.
> 
> Adding Pengfei Li who has been working on a patchset for modified handling 
> of kmalloc cache initialization and touches the resiliency test.
> 
> I still find the resiliency test to be helpful/instructional for handling 
> unexpected conditions in these caches, so I'd suggest against removing it: 
> the only downside is that it's additional source code.  But it's helpful 
> source code for reference.
> 
> The cmpxchg failures could likely be more generalized beyond SLUB since 
> there will be other dependencies in the kernel than just this allocator.

OK, SLUB_RESILIENCY_TEST is fine to keep around and maybe be turned into a
Kconfig option to make it more visible.

Is it fine to remove SLUB_DEBUG_CMPXCHG? If somebody later want to generalize it
beyond SLUB, he/she can always find the old code somewhere anyway.

> 
> (I assume you didn't send a Signed-off-by line because this is an RFC.)
> 
> > ---
> >  mm/slub.c | 110 --------------------------------------------------------------
> >  1 file changed, 110 deletions(-)
> > 
> > diff --git a/mm/slub.c b/mm/slub.c
> > index 8834563cdb4b..f97155ba097d 100644
> > --- a/mm/slub.c
> > +++ b/mm/slub.c
> > @@ -150,12 +150,6 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
> >   * - Variable sizing of the per node arrays
> >   */
> >  
> > -/* Enable to test recovery from slab corruption on boot */
> > -#undef SLUB_RESILIENCY_TEST
> > -
> > -/* Enable to log cmpxchg failures */
> > -#undef SLUB_DEBUG_CMPXCHG
> > -
> >  /*
> >   * Mininum number of partial slabs. These will be left on the partial
> >   * lists even if they are empty. kmem_cache_shrink may reclaim them.
> > @@ -392,10 +386,6 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
> >  	cpu_relax();
> >  	stat(s, CMPXCHG_DOUBLE_FAIL);
> >  
> > -#ifdef SLUB_DEBUG_CMPXCHG
> > -	pr_info("%s %s: cmpxchg double redo ", n, s->name);
> > -#endif
> > -
> >  	return false;
> >  }
> >  
> > @@ -433,10 +423,6 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
> >  	cpu_relax();
> >  	stat(s, CMPXCHG_DOUBLE_FAIL);
> >  
> > -#ifdef SLUB_DEBUG_CMPXCHG
> > -	pr_info("%s %s: cmpxchg double redo ", n, s->name);
> > -#endif
> > -
> >  	return false;
> >  }
> >  
> > @@ -2004,45 +1990,11 @@ static inline unsigned long next_tid(unsigned long tid)
> >  	return tid + TID_STEP;
> >  }
> >  
> > -static inline unsigned int tid_to_cpu(unsigned long tid)
> > -{
> > -	return tid % TID_STEP;
> > -}
> > -
> > -static inline unsigned long tid_to_event(unsigned long tid)
> > -{
> > -	return tid / TID_STEP;
> > -}
> > -
> >  static inline unsigned int init_tid(int cpu)
> >  {
> >  	return cpu;
> >  }
> >  
> > -static inline void note_cmpxchg_failure(const char *n,
> > -		const struct kmem_cache *s, unsigned long tid)
> > -{
> > -#ifdef SLUB_DEBUG_CMPXCHG
> > -	unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
> > -
> > -	pr_info("%s %s: cmpxchg redo ", n, s->name);
> > -
> > -#ifdef CONFIG_PREEMPT
> > -	if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
> > -		pr_warn("due to cpu change %d -> %d\n",
> > -			tid_to_cpu(tid), tid_to_cpu(actual_tid));
> > -	else
> > -#endif
> > -	if (tid_to_event(tid) != tid_to_event(actual_tid))
> > -		pr_warn("due to cpu running other code. Event %ld->%ld\n",
> > -			tid_to_event(tid), tid_to_event(actual_tid));
> > -	else
> > -		pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
> > -			actual_tid, tid, next_tid(tid));
> > -#endif
> > -	stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
> > -}
> > -
> >  static void init_kmem_cache_cpus(struct kmem_cache *s)
> >  {
> >  	int cpu;
> > @@ -2751,7 +2703,6 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
> >  				object, tid,
> >  				next_object, next_tid(tid)))) {
> >  
> > -			note_cmpxchg_failure("slab_alloc", s, tid);
> >  			goto redo;
> >  		}
> >  		prefetch_freepointer(s, next_object);
> > @@ -4694,66 +4645,6 @@ static int list_locations(struct kmem_cache *s, char *buf,
> >  }
> >  #endif	/* CONFIG_SLUB_DEBUG */
> >  
> > -#ifdef SLUB_RESILIENCY_TEST
> > -static void __init resiliency_test(void)
> > -{
> > -	u8 *p;
> > -	int type = KMALLOC_NORMAL;
> > -
> > -	BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
> > -
> > -	pr_err("SLUB resiliency testing\n");
> > -	pr_err("-----------------------\n");
> > -	pr_err("A. Corruption after allocation\n");
> > -
> > -	p = kzalloc(16, GFP_KERNEL);
> > -	p[16] = 0x12;
> > -	pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n",
> > -	       p + 16);
> > -
> > -	validate_slab_cache(kmalloc_caches[type][4]);
> > -
> > -	/* Hmmm... The next two are dangerous */
> > -	p = kzalloc(32, GFP_KERNEL);
> > -	p[32 + sizeof(void *)] = 0x34;
> > -	pr_err("\n2. kmalloc-32: Clobber next pointer/next slab 0x34 -> -0x%p\n",
> > -	       p);
> > -	pr_err("If allocated object is overwritten then not detectable\n\n");
> > -
> > -	validate_slab_cache(kmalloc_caches[type][5]);
> > -	p = kzalloc(64, GFP_KERNEL);
> > -	p += 64 + (get_cycles() & 0xff) * sizeof(void *);
> > -	*p = 0x56;
> > -	pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
> > -	       p);
> > -	pr_err("If allocated object is overwritten then not detectable\n\n");
> > -	validate_slab_cache(kmalloc_caches[type][6]);
> > -
> > -	pr_err("\nB. Corruption after free\n");
> > -	p = kzalloc(128, GFP_KERNEL);
> > -	kfree(p);
> > -	*p = 0x78;
> > -	pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
> > -	validate_slab_cache(kmalloc_caches[type][7]);
> > -
> > -	p = kzalloc(256, GFP_KERNEL);
> > -	kfree(p);
> > -	p[50] = 0x9a;
> > -	pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
> > -	validate_slab_cache(kmalloc_caches[type][8]);
> > -
> > -	p = kzalloc(512, GFP_KERNEL);
> > -	kfree(p);
> > -	p[512] = 0xab;
> > -	pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
> > -	validate_slab_cache(kmalloc_caches[type][9]);
> > -}
> > -#else
> > -#ifdef CONFIG_SYSFS
> > -static void resiliency_test(void) {};
> > -#endif
> > -#endif	/* SLUB_RESILIENCY_TEST */
> > -
> >  #ifdef CONFIG_SYSFS
> >  enum slab_stat_type {
> >  	SL_ALL,			/* All slabs */
> > @@ -5875,7 +5766,6 @@ static int __init slab_sysfs_init(void)
> >  	}
> >  
> >  	mutex_unlock(&slab_mutex);
> > -	resiliency_test();
> >  	return 0;
> >  }
> >  
> > -- 
> > 1.8.3.1
> > 
> > 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ