[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20110324161446.GA32068@elte.hu>
Date: Thu, 24 Mar 2011 17:14:46 +0100
From: Ingo Molnar <mingo@...e.hu>
To: Pekka Enberg <penberg@...nel.org>
Cc: torvalds@...ux-foundation.org, cl@...ux-foundation.org,
akpm@...ux-foundation.org, tj@...nel.org, npiggin@...nel.dk,
rientjes@...gle.com, linux-kernel@...r.kernel.org,
linux-mm@...ck.org
Subject: Re: [GIT PULL] SLAB changes for v2.6.39-rc1
* Ingo Molnar <mingo@...e.hu> wrote:
> Caused by:
>
> | 8a5ec0ba42c4919e2d8f4c3138cc8b987fdb0b79 is the first bad commit
> | commit 8a5ec0ba42c4919e2d8f4c3138cc8b987fdb0b79
> | Author: Christoph Lameter <cl@...ux.com>
> | Date: Fri Feb 25 11:38:54 2011 -0600
> |
> | Lockless (and preemptless) fastpaths for slub
>
> I'll try to revert these:
>
> 2fd66c517d5e: slub: Add missing irq restore for the OOM path
> a24c5a0ea902: slub: Dont define useless label in the !CONFIG_CMPXCHG_LOCAL case
> 8a5ec0ba42c4: Lockless (and preemptless) fastpaths for slub
The combo revert below solves the boot crash.
Thanks,
Ingo
------------------------------->
>From 1b322eee05a96e8395b62e04a3d1f10fb483c259 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@...e.hu>
Date: Thu, 24 Mar 2011 17:03:51 +0100
Subject: [PATCH] Revert various slub patches
Revert "slub: Add missing irq restore for the OOM path"
This reverts commit 2fd66c517d5e98de2528d86e0e62f5069ff99f59.
Revert "slub: Dont define useless label in the !CONFIG_CMPXCHG_LOCAL case"
This reverts commit a24c5a0ea902bcda348f086bd909cc2d6e305bf8.
Revert "slub,rcu: don't assume the size of struct rcu_head"
This reverts commit da9a638c6f8fc0633fa94a334f1c053f5e307177.
Revert "slub: Add statistics for this_cmpxchg_double failures"
This reverts commit 4fdccdfbb4652a7bbac8adbce7449eb093775118.
Revert "Lockless (and preemptless) fastpaths for slub"
This reverts commit 8a5ec0ba42c4919e2d8f4c3138cc8b987fdb0b79.
---
include/linux/slub_def.h | 6 +-
mm/slub.c | 243 ++--------------------------------------------
2 files changed, 9 insertions(+), 240 deletions(-)
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 45ca123..74af3d6 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -32,14 +32,10 @@ enum stat_item {
DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */
DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
ORDER_FALLBACK, /* Number of times fallback was necessary */
- CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */
NR_SLUB_STAT_ITEMS };
struct kmem_cache_cpu {
- void **freelist; /* Pointer to next available object */
-#ifdef CONFIG_CMPXCHG_LOCAL
- unsigned long tid; /* Globally unique transaction id */
-#endif
+ void **freelist; /* Pointer to first free per cpu object */
struct page *page; /* The slab from which we are allocating */
int node; /* The node of the page (or -1 for debug) */
#ifdef CONFIG_SLUB_STATS
diff --git a/mm/slub.c b/mm/slub.c
index 93de30d..bd84722 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -217,7 +217,7 @@ static inline void sysfs_slab_remove(struct kmem_cache *s)
#endif
-static inline void stat(const struct kmem_cache *s, enum stat_item si)
+static inline void stat(struct kmem_cache *s, enum stat_item si)
{
#ifdef CONFIG_SLUB_STATS
__this_cpu_inc(s->cpu_slab->stat[si]);
@@ -1285,38 +1285,21 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
__free_pages(page, order);
}
-#define need_reserve_slab_rcu \
- (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
-
static void rcu_free_slab(struct rcu_head *h)
{
struct page *page;
- if (need_reserve_slab_rcu)
- page = virt_to_head_page(h);
- else
- page = container_of((struct list_head *)h, struct page, lru);
-
+ page = container_of((struct list_head *)h, struct page, lru);
__free_slab(page->slab, page);
}
static void free_slab(struct kmem_cache *s, struct page *page)
{
if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
- struct rcu_head *head;
-
- if (need_reserve_slab_rcu) {
- int order = compound_order(page);
- int offset = (PAGE_SIZE << order) - s->reserved;
-
- VM_BUG_ON(s->reserved != sizeof(*head));
- head = page_address(page) + offset;
- } else {
- /*
- * RCU free overloads the RCU head over the LRU
- */
- head = (void *)&page->lru;
- }
+ /*
+ * RCU free overloads the RCU head over the LRU
+ */
+ struct rcu_head *head = (void *)&page->lru;
call_rcu(head, rcu_free_slab);
} else
@@ -1540,78 +1523,6 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
}
}
-#ifdef CONFIG_CMPXCHG_LOCAL
-#ifdef CONFIG_PREEMPT
-/*
- * Calculate the next globally unique transaction for disambiguiation
- * during cmpxchg. The transactions start with the cpu number and are then
- * incremented by CONFIG_NR_CPUS.
- */
-#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
-#else
-/*
- * No preemption supported therefore also no need to check for
- * different cpus.
- */
-#define TID_STEP 1
-#endif
-
-static inline unsigned long next_tid(unsigned long tid)
-{
- return tid + TID_STEP;
-}
-
-static inline unsigned int tid_to_cpu(unsigned long tid)
-{
- return tid % TID_STEP;
-}
-
-static inline unsigned long tid_to_event(unsigned long tid)
-{
- return tid / TID_STEP;
-}
-
-static inline unsigned int init_tid(int cpu)
-{
- return cpu;
-}
-
-static inline void note_cmpxchg_failure(const char *n,
- const struct kmem_cache *s, unsigned long tid)
-{
-#ifdef SLUB_DEBUG_CMPXCHG
- unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
-
- printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name);
-
-#ifdef CONFIG_PREEMPT
- if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
- printk("due to cpu change %d -> %d\n",
- tid_to_cpu(tid), tid_to_cpu(actual_tid));
- else
-#endif
- if (tid_to_event(tid) != tid_to_event(actual_tid))
- printk("due to cpu running other code. Event %ld->%ld\n",
- tid_to_event(tid), tid_to_event(actual_tid));
- else
- printk("for unknown reason: actual=%lx was=%lx target=%lx\n",
- actual_tid, tid, next_tid(tid));
-#endif
- stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
-}
-
-#endif
-
-void init_kmem_cache_cpus(struct kmem_cache *s)
-{
-#if defined(CONFIG_CMPXCHG_LOCAL) && defined(CONFIG_PREEMPT)
- int cpu;
-
- for_each_possible_cpu(cpu)
- per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
-#endif
-
-}
/*
* Remove the cpu slab
*/
@@ -1643,9 +1554,6 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
page->inuse--;
}
c->page = NULL;
-#ifdef CONFIG_CMPXCHG_LOCAL
- c->tid = next_tid(c->tid);
-#endif
unfreeze_slab(s, page, tail);
}
@@ -1780,19 +1688,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
{
void **object;
struct page *new;
-#ifdef CONFIG_CMPXCHG_LOCAL
- unsigned long flags;
-
- local_irq_save(flags);
-#ifdef CONFIG_PREEMPT
- /*
- * We may have been preempted and rescheduled on a different
- * cpu before disabling interrupts. Need to reload cpu area
- * pointer.
- */
- c = this_cpu_ptr(s->cpu_slab);
-#endif
-#endif
/* We handle __GFP_ZERO in the caller */
gfpflags &= ~__GFP_ZERO;
@@ -1819,10 +1714,6 @@ load_freelist:
c->node = page_to_nid(c->page);
unlock_out:
slab_unlock(c->page);
-#ifdef CONFIG_CMPXCHG_LOCAL
- c->tid = next_tid(c->tid);
- local_irq_restore(flags);
-#endif
stat(s, ALLOC_SLOWPATH);
return object;
@@ -1858,9 +1749,6 @@ new_slab:
}
if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
slab_out_of_memory(s, gfpflags, node);
-#ifdef CONFIG_CMPXCHG_LOCAL
- local_irq_restore(flags);
-#endif
return NULL;
debug:
if (!alloc_debug_processing(s, c->page, object, addr))
@@ -1887,76 +1775,23 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
{
void **object;
struct kmem_cache_cpu *c;
-#ifdef CONFIG_CMPXCHG_LOCAL
- unsigned long tid;
-#else
unsigned long flags;
-#endif
if (slab_pre_alloc_hook(s, gfpflags))
return NULL;
-#ifndef CONFIG_CMPXCHG_LOCAL
local_irq_save(flags);
-#else
-redo:
-#endif
-
- /*
- * Must read kmem_cache cpu data via this cpu ptr. Preemption is
- * enabled. We may switch back and forth between cpus while
- * reading from one cpu area. That does not matter as long
- * as we end up on the original cpu again when doing the cmpxchg.
- */
c = __this_cpu_ptr(s->cpu_slab);
-
-#ifdef CONFIG_CMPXCHG_LOCAL
- /*
- * The transaction ids are globally unique per cpu and per operation on
- * a per cpu queue. Thus they can be guarantee that the cmpxchg_double
- * occurs on the right processor and that there was no operation on the
- * linked list in between.
- */
- tid = c->tid;
- barrier();
-#endif
-
object = c->freelist;
if (unlikely(!object || !node_match(c, node)))
object = __slab_alloc(s, gfpflags, node, addr, c);
else {
-#ifdef CONFIG_CMPXCHG_LOCAL
- /*
- * The cmpxchg will only match if there was no additonal
- * operation and if we are on the right processor.
- *
- * The cmpxchg does the following atomically (without lock semantics!)
- * 1. Relocate first pointer to the current per cpu area.
- * 2. Verify that tid and freelist have not been changed
- * 3. If they were not changed replace tid and freelist
- *
- * Since this is without lock semantics the protection is only against
- * code executing on this cpu *not* from access by other cpus.
- */
- if (unlikely(!this_cpu_cmpxchg_double(
- s->cpu_slab->freelist, s->cpu_slab->tid,
- object, tid,
- get_freepointer(s, object), next_tid(tid)))) {
-
- note_cmpxchg_failure("slab_alloc", s, tid);
- goto redo;
- }
-#else
c->freelist = get_freepointer(s, object);
-#endif
stat(s, ALLOC_FASTPATH);
}
-
-#ifndef CONFIG_CMPXCHG_LOCAL
local_irq_restore(flags);
-#endif
if (unlikely(gfpflags & __GFP_ZERO) && object)
memset(object, 0, s->objsize);
@@ -2034,13 +1869,9 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
{
void *prior;
void **object = (void *)x;
-#ifdef CONFIG_CMPXCHG_LOCAL
- unsigned long flags;
- local_irq_save(flags);
-#endif
- slab_lock(page);
stat(s, FREE_SLOWPATH);
+ slab_lock(page);
if (kmem_cache_debug(s))
goto debug;
@@ -2070,9 +1901,6 @@ checks_ok:
out_unlock:
slab_unlock(page);
-#ifdef CONFIG_CMPXCHG_LOCAL
- local_irq_restore(flags);
-#endif
return;
slab_empty:
@@ -2084,9 +1912,6 @@ slab_empty:
stat(s, FREE_REMOVE_PARTIAL);
}
slab_unlock(page);
-#ifdef CONFIG_CMPXCHG_LOCAL
- local_irq_restore(flags);
-#endif
stat(s, FREE_SLAB);
discard_slab(s, page);
return;
@@ -2113,56 +1938,21 @@ static __always_inline void slab_free(struct kmem_cache *s,
{
void **object = (void *)x;
struct kmem_cache_cpu *c;
-#ifdef CONFIG_CMPXCHG_LOCAL
- unsigned long tid;
-#else
unsigned long flags;
-#endif
slab_free_hook(s, x);
-#ifndef CONFIG_CMPXCHG_LOCAL
local_irq_save(flags);
-
-#else
-redo:
-#endif
-
- /*
- * Determine the currently cpus per cpu slab.
- * The cpu may change afterward. However that does not matter since
- * data is retrieved via this pointer. If we are on the same cpu
- * during the cmpxchg then the free will succedd.
- */
c = __this_cpu_ptr(s->cpu_slab);
-#ifdef CONFIG_CMPXCHG_LOCAL
- tid = c->tid;
- barrier();
-#endif
-
if (likely(page == c->page && c->node != NUMA_NO_NODE)) {
set_freepointer(s, object, c->freelist);
-
-#ifdef CONFIG_CMPXCHG_LOCAL
- if (unlikely(!this_cpu_cmpxchg_double(
- s->cpu_slab->freelist, s->cpu_slab->tid,
- c->freelist, tid,
- object, next_tid(tid)))) {
-
- note_cmpxchg_failure("slab_free", s, tid);
- goto redo;
- }
-#else
c->freelist = object;
-#endif
stat(s, FREE_FASTPATH);
} else
__slab_free(s, page, x, addr);
-#ifndef CONFIG_CMPXCHG_LOCAL
local_irq_restore(flags);
-#endif
}
void kmem_cache_free(struct kmem_cache *s, void *x)
@@ -2354,23 +2144,9 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu));
-#ifdef CONFIG_CMPXCHG_LOCAL
- /*
- * Must align to double word boundary for the double cmpxchg instructions
- * to work.
- */
- s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu), 2 * sizeof(void *));
-#else
- /* Regular alignment is sufficient */
s->cpu_slab = alloc_percpu(struct kmem_cache_cpu);
-#endif
-
- if (!s->cpu_slab)
- return 0;
- init_kmem_cache_cpus(s);
-
- return 1;
+ return s->cpu_slab != NULL;
}
static struct kmem_cache *kmem_cache_node;
@@ -2609,9 +2385,6 @@ static int kmem_cache_open(struct kmem_cache *s,
s->flags = kmem_cache_flags(size, flags, name, ctor);
s->reserved = 0;
- if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
- s->reserved = sizeof(struct rcu_head);
-
if (!calculate_sizes(s, -1))
goto error;
if (disable_higher_order_debug) {
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists