linux-kernel - Re: [block IO crash] Re: 2.6.39-rc5-git2 boot crashs

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <alpine.DEB.2.00.1105040905280.5495@router.home>
Date:	Wed, 4 May 2011 09:21:05 -0500 (CDT)
From:	Christoph Lameter <cl@...ux.com>
To:	Thomas Gleixner <tglx@...utronix.de>
cc:	Tejun Heo <tj@...nel.org>, Pekka Enberg <penberg@...nel.org>,
	Ingo Molnar <mingo@...e.hu>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Jens Axboe <axboe@...nel.dk>,
	Andrew Morton <akpm@...ux-foundation.org>,
	werner <w.landgraf@...ru>, "H. Peter Anvin" <hpa@...or.com>,
	Linux Kernel Mailing List <linux-kernel@...r.kernel.org>
Subject: Re: [block IO crash] Re: 2.6.39-rc5-git2 boot crashs

On Wed, 4 May 2011, Thomas Gleixner wrote:

> So the question is whether CMPXCHG_LOCAL for x86 wants to depend on
> X86_CMPXCHG64.

Right.

> The other solution is to use irqsafe_cpu_cmpxchg_double() instead of
> this_cpu_cmpxchg_double() in slub.c.
>
> This will not hurt the X86_CMPXCHG64=y case, but keep the expansion to
> the above __this_cpu_generic_cmpxchg_double working.
>
> Which makes me even wonder some more whether we need that whole
> CMPXCHG_LOCAL #ifdeffery in slub.c at all.

Hmmm... I have not measured it but it would certainly be nice to get rid
of it. The period of irq disablement is then reduced in the fastpath too.
However, we need to do additional tid checking (which should be fast).

The following patch does that and runs here in kvm

---
 include/linux/percpu.h   |    2 -
 include/linux/slub_def.h |    2 -
 mm/slub.c                |   60 +----------------------------------------------
 3 files changed, 3 insertions(+), 61 deletions(-)

Index: linux-2.6/include/linux/percpu.h
===================================================================
--- linux-2.6.orig/include/linux/percpu.h	2011-05-04 09:13:10.000000000 -0500
+++ linux-2.6/include/linux/percpu.h	2011-05-04 09:13:28.000000000 -0500
@@ -948,7 +948,7 @@ do {									\
 	irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
 # endif
 # define irqsafe_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	__pcpu_double_call_return_int(irqsafe_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
+	__pcpu_double_call_return_bool(irqsafe_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
 #endif

 #endif /* __LINUX_PERCPU_H */
Index: linux-2.6/include/linux/slub_def.h
===================================================================
--- linux-2.6.orig/include/linux/slub_def.h	2011-05-04 09:11:12.000000000 -0500
+++ linux-2.6/include/linux/slub_def.h	2011-05-04 09:11:17.000000000 -0500
@@ -37,9 +37,7 @@ enum stat_item {

 struct kmem_cache_cpu {
 	void **freelist;	/* Pointer to next available object */
-#ifdef CONFIG_CMPXCHG_LOCAL
 	unsigned long tid;	/* Globally unique transaction id */
-#endif
 	struct page *page;	/* The slab from which we are allocating */
 	int node;		/* The node of the page (or -1 for debug) */
 #ifdef CONFIG_SLUB_STATS
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c	2011-05-04 09:07:10.000000000 -0500
+++ linux-2.6/mm/slub.c	2011-05-04 09:13:58.000000000 -0500
@@ -1540,7 +1540,6 @@ static void unfreeze_slab(struct kmem_ca
 	}
 }

-#ifdef CONFIG_CMPXCHG_LOCAL
 #ifdef CONFIG_PREEMPT
 /*
  * Calculate the next globally unique transaction for disambiguiation
@@ -1600,17 +1599,12 @@ static inline void note_cmpxchg_failure(
 	stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
 }

-#endif
-
 void init_kmem_cache_cpus(struct kmem_cache *s)
 {
-#ifdef CONFIG_CMPXCHG_LOCAL
 	int cpu;

 	for_each_possible_cpu(cpu)
 		per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
-#endif
-
 }
 /*
  * Remove the cpu slab
@@ -1643,9 +1637,7 @@ static void deactivate_slab(struct kmem_
 		page->inuse--;
 	}
 	c->page = NULL;
-#ifdef CONFIG_CMPXCHG_LOCAL
 	c->tid = next_tid(c->tid);
-#endif
 	unfreeze_slab(s, page, tail);
 }

@@ -1780,7 +1772,6 @@ static void *__slab_alloc(struct kmem_ca
 {
 	void **object;
 	struct page *new;
-#ifdef CONFIG_CMPXCHG_LOCAL
 	unsigned long flags;

 	local_irq_save(flags);
@@ -1792,7 +1783,6 @@ static void *__slab_alloc(struct kmem_ca
 	 */
 	c = this_cpu_ptr(s->cpu_slab);
 #endif
-#endif

 	/* We handle __GFP_ZERO in the caller */
 	gfpflags &= ~__GFP_ZERO;
@@ -1819,10 +1809,8 @@ load_freelist:
 	c->node = page_to_nid(c->page);
 unlock_out:
 	slab_unlock(c->page);
-#ifdef CONFIG_CMPXCHG_LOCAL
 	c->tid = next_tid(c->tid);
 	local_irq_restore(flags);
-#endif
 	stat(s, ALLOC_SLOWPATH);
 	return object;

@@ -1858,9 +1846,7 @@ new_slab:
 	}
 	if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
 		slab_out_of_memory(s, gfpflags, node);
-#ifdef CONFIG_CMPXCHG_LOCAL
 	local_irq_restore(flags);
-#endif
 	return NULL;
 debug:
 	if (!alloc_debug_processing(s, c->page, object, addr))
@@ -1887,20 +1873,12 @@ static __always_inline void *slab_alloc(
 {
 	void **object;
 	struct kmem_cache_cpu *c;
-#ifdef CONFIG_CMPXCHG_LOCAL
 	unsigned long tid;
-#else
-	unsigned long flags;
-#endif

 	if (slab_pre_alloc_hook(s, gfpflags))
 		return NULL;

-#ifndef CONFIG_CMPXCHG_LOCAL
-	local_irq_save(flags);
-#else
 redo:
-#endif

 	/*
 	 * Must read kmem_cache cpu data via this cpu ptr. Preemption is
@@ -1910,7 +1888,6 @@ redo:
 	 */
 	c = __this_cpu_ptr(s->cpu_slab);

-#ifdef CONFIG_CMPXCHG_LOCAL
 	/*
 	 * The transaction ids are globally unique per cpu and per operation on
 	 * a per cpu queue. Thus they can be guarantee that the cmpxchg_double
@@ -1919,7 +1896,6 @@ redo:
 	 */
 	tid = c->tid;
 	barrier();
-#endif

 	object = c->freelist;
 	if (unlikely(!object || !node_match(c, node)))
@@ -1927,7 +1903,6 @@ redo:
 		object = __slab_alloc(s, gfpflags, node, addr, c);

 	else {
-#ifdef CONFIG_CMPXCHG_LOCAL
 		/*
 		 * The cmpxchg will only match if there was no additional
 		 * operation and if we are on the right processor.
@@ -1940,7 +1915,7 @@ redo:
 		 * Since this is without lock semantics the protection is only against
 		 * code executing on this cpu *not* from access by other cpus.
 		 */
-		if (unlikely(!this_cpu_cmpxchg_double(
+		if (unlikely(!irqsafe_cpu_cmpxchg_double(
 				s->cpu_slab->freelist, s->cpu_slab->tid,
 				object, tid,
 				get_freepointer(s, object), next_tid(tid)))) {
@@ -1948,16 +1923,9 @@ redo:
 			note_cmpxchg_failure("slab_alloc", s, tid);
 			goto redo;
 		}
-#else
-		c->freelist = get_freepointer(s, object);
-#endif
 		stat(s, ALLOC_FASTPATH);
 	}

-#ifndef CONFIG_CMPXCHG_LOCAL
-	local_irq_restore(flags);
-#endif
-
 	if (unlikely(gfpflags & __GFP_ZERO) && object)
 		memset(object, 0, s->objsize);

@@ -2034,11 +2002,9 @@ static void __slab_free(struct kmem_cach
 {
 	void *prior;
 	void **object = (void *)x;
-#ifdef CONFIG_CMPXCHG_LOCAL
 	unsigned long flags;

 	local_irq_save(flags);
-#endif
 	slab_lock(page);
 	stat(s, FREE_SLOWPATH);

@@ -2070,9 +2036,7 @@ checks_ok:

 out_unlock:
 	slab_unlock(page);
-#ifdef CONFIG_CMPXCHG_LOCAL
 	local_irq_restore(flags);
-#endif
 	return;

 slab_empty:
@@ -2084,9 +2048,7 @@ slab_empty:
 		stat(s, FREE_REMOVE_PARTIAL);
 	}
 	slab_unlock(page);
-#ifdef CONFIG_CMPXCHG_LOCAL
 	local_irq_restore(flags);
-#endif
 	stat(s, FREE_SLAB);
 	discard_slab(s, page);
 	return;
@@ -2113,20 +2075,11 @@ static __always_inline void slab_free(st
 {
 	void **object = (void *)x;
 	struct kmem_cache_cpu *c;
-#ifdef CONFIG_CMPXCHG_LOCAL
 	unsigned long tid;
-#else
-	unsigned long flags;
-#endif

 	slab_free_hook(s, x);

-#ifndef CONFIG_CMPXCHG_LOCAL
-	local_irq_save(flags);
-
-#else
 redo:
-#endif

 	/*
 	 * Determine the currently cpus per cpu slab.
@@ -2136,16 +2089,13 @@ redo:
 	 */
 	c = __this_cpu_ptr(s->cpu_slab);

-#ifdef CONFIG_CMPXCHG_LOCAL
 	tid = c->tid;
 	barrier();
-#endif

 	if (likely(page == c->page && c->node != NUMA_NO_NODE)) {
 		set_freepointer(s, object, c->freelist);

-#ifdef CONFIG_CMPXCHG_LOCAL
-		if (unlikely(!this_cpu_cmpxchg_double(
+		if (unlikely(!irqsafe_cpu_cmpxchg_double(
 				s->cpu_slab->freelist, s->cpu_slab->tid,
 				c->freelist, tid,
 				object, next_tid(tid)))) {
@@ -2153,16 +2103,10 @@ redo:
 			note_cmpxchg_failure("slab_free", s, tid);
 			goto redo;
 		}
-#else
-		c->freelist = object;
-#endif
 		stat(s, FREE_FASTPATH);
 	} else
 		__slab_free(s, page, x, addr);

-#ifndef CONFIG_CMPXCHG_LOCAL
-	local_irq_restore(flags);
-#endif
 }

 void kmem_cache_free(struct kmem_cache *s, void *x)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/