[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <alpine.DEB.1.10.0910081520210.8030@gentwo.org>
Date: Thu, 8 Oct 2009 15:21:50 -0400 (EDT)
From: Christoph Lameter <cl@...ux-foundation.org>
To: Mathieu Desnoyers <mathieu.desnoyers@...ymtl.ca>
cc: Peter Zijlstra <peterz@...radead.org>, akpm@...ux-foundation.org,
linux-kernel@...r.kernel.org,
Pekka Enberg <penberg@...helsinki.fi>,
Tejun Heo <tj@...nel.org>, Mel Gorman <mel@....ul.ie>,
mingo@...e.hu
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o
interrupt disable
On Thu, 8 Oct 2009, Mathieu Desnoyers wrote:
> OK, sounds good,
Then here the full patch for review (vs. percpu-next):
From: Christoph Lameter <cl@...ux-foundation.org>
Subject: SLUB: Experimental new fastpath w/o interrupt disable
This is a bit of a different tack on things than the last version provided
by Mathieu.
Instead of using a cmpxchg we keep a state variable in the per cpu structure
that is incremented when we enter the hot path. We can then detect that
a thread is in the fastpath and fall back to alternate allocation / free
technique that bypasses fastpath caching.
V1->V2:
- Use safe preempt_enable / disable.
- Enable preempt before calling into the page allocator
- Checkup on hotpath activity changes when returning from page allocator.
- Add barriers.
Todo:
- Verify that this is really safe
- Is this a benefit?
Cc: Mathieu Desnoyers <mathieu.desnoyers@...ymtl.ca>
Cc: Pekka Enberg <penberg@...helsinki.fi>
Signed-off-by: Christoph Lameter <cl@...ux-foundation.org>
---
include/linux/slub_def.h | 1
mm/slub.c | 112 +++++++++++++++++++++++++++++++++++++++--------
2 files changed, 96 insertions(+), 17 deletions(-)
Index: linux-2.6/include/linux/slub_def.h
===================================================================
--- linux-2.6.orig/include/linux/slub_def.h 2009-10-08 11:35:59.000000000 -0500
+++ linux-2.6/include/linux/slub_def.h 2009-10-08 11:35:59.000000000 -0500
@@ -38,6 +38,7 @@ struct kmem_cache_cpu {
void **freelist; /* Pointer to first free per cpu object */
struct page *page; /* The slab from which we are allocating */
int node; /* The node of the page (or -1 for debug) */
+ int active; /* Active fastpaths */
#ifdef CONFIG_SLUB_STATS
unsigned stat[NR_SLUB_STAT_ITEMS];
#endif
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c 2009-10-08 11:35:59.000000000 -0500
+++ linux-2.6/mm/slub.c 2009-10-08 14:03:22.000000000 -0500
@@ -1606,7 +1606,14 @@ static void *__slab_alloc(struct kmem_ca
unsigned long addr)
{
void **object;
- struct page *page = __this_cpu_read(s->cpu_slab->page);
+ struct page *page;
+ unsigned long flags;
+ int hotpath;
+
+ local_irq_save(flags);
+ hotpath = __this_cpu_read(s->cpu_slab->active) != 0;
+ __this_cpu_dec(s->cpu_slab->active); /* Drop count from hotpath */
+ page = __this_cpu_read(s->cpu_slab->page);
/* We handle __GFP_ZERO in the caller */
gfpflags &= ~__GFP_ZERO;
@@ -1627,12 +1634,24 @@ load_freelist:
if (unlikely(SLABDEBUG && PageSlubDebug(page)))
goto debug;
- __this_cpu_write(s->cpu_slab->node, page_to_nid(page));
- __this_cpu_write(s->cpu_slab->freelist, get_freepointer(s, object));
- page->inuse = page->objects;
- page->freelist = NULL;
+ if (unlikely(hotpath)) {
+ /* Object on page free list available and hotpath busy */
+ page->inuse++;
+ page->freelist = get_freepointer(s, object);
+
+ } else {
+
+ /* Prepare new list of objects for hotpath */
+ __this_cpu_write(s->cpu_slab->freelist, get_freepointer(s, object));
+ page->inuse = page->objects;
+ page->freelist = NULL;
+ __this_cpu_write(s->cpu_slab->node, page_to_nid(page));
+
+ }
+
unlock_out:
slab_unlock(page);
+ local_irq_restore(flags);
stat(s, ALLOC_SLOWPATH);
return object;
@@ -1642,21 +1661,38 @@ another_slab:
new_slab:
page = get_partial(s, gfpflags, node);
if (page) {
- __this_cpu_write(s->cpu_slab->page, page);
stat(s, ALLOC_FROM_PARTIAL);
+
+ if (hotpath)
+ goto hot_lock;
+
+ __this_cpu_write(s->cpu_slab->page, page);
goto load_freelist;
}
if (gfpflags & __GFP_WAIT)
local_irq_enable();
+ preempt_enable();
page = new_slab(s, gfpflags, node);
+ preempt_disable();
+
+ /*
+ * We have already decremented our count. Someone else
+ * could be running right now or we were moved to a
+ * processor that is in the hotpath. So check against -1.
+ */
+ hotpath = __this_cpu_read(s->cpu_slab->active) != -1;
if (gfpflags & __GFP_WAIT)
local_irq_disable();
if (page) {
stat(s, ALLOC_SLAB);
+
+ if (hotpath)
+ goto hot_no_lock;
+
if (__this_cpu_read(s->cpu_slab->page))
flush_slab(s, __this_cpu_ptr(s->cpu_slab));
slab_lock(page);
@@ -1664,9 +1700,13 @@ new_slab:
__this_cpu_write(s->cpu_slab->page, page);
goto load_freelist;
}
+
+ local_irq_restore(flags);
+
if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
slab_out_of_memory(s, gfpflags, node);
return NULL;
+
debug:
if (!alloc_debug_processing(s, page, object, addr))
goto another_slab;
@@ -1675,6 +1715,20 @@ debug:
page->freelist = get_freepointer(s, object);
__this_cpu_write(s->cpu_slab->node, -1);
goto unlock_out;
+
+ /*
+ * Hotpath is busy and we need to avoid touching
+ * hotpath variables
+ */
+hot_no_lock:
+ slab_lock(page);
+
+hot_lock:
+ __ClearPageSlubFrozen(page);
+ if (get_freepointer(s, page->freelist))
+ /* Cannot put page into the hotpath. Instead back to partial */
+ add_partial(get_node(s, page_to_nid(page)), page, 0);
+ goto load_freelist;
}
/*
@@ -1691,7 +1745,6 @@ static __always_inline void *slab_alloc(
gfp_t gfpflags, int node, unsigned long addr)
{
void **object;
- unsigned long flags;
gfpflags &= gfp_allowed_mask;
@@ -1701,18 +1754,27 @@ static __always_inline void *slab_alloc(
if (should_failslab(s->objsize, gfpflags))
return NULL;
- local_irq_save(flags);
+ preempt_disable();
+
+ irqsafe_cpu_inc(s->cpu_slab->active);
+ barrier();
object = __this_cpu_read(s->cpu_slab->freelist);
- if (unlikely(!object || !node_match(s, node)))
+ if (unlikely(!object || !node_match(s, node) ||
+ __this_cpu_read(s->cpu_slab->active)))
object = __slab_alloc(s, gfpflags, node, addr);
else {
+
__this_cpu_write(s->cpu_slab->freelist,
get_freepointer(s, object));
+ barrier();
+ irqsafe_cpu_dec(s->cpu_slab->active);
stat(s, ALLOC_FASTPATH);
+
}
- local_irq_restore(flags);
+
+ preempt_enable();
if (unlikely((gfpflags & __GFP_ZERO) && object))
memset(object, 0, s->objsize);
@@ -1777,7 +1839,9 @@ static void __slab_free(struct kmem_cach
{
void *prior;
void **object = (void *)x;
+ unsigned long flags;
+ local_irq_save(flags);
stat(s, FREE_SLOWPATH);
slab_lock(page);
@@ -1809,6 +1873,7 @@ checks_ok:
out_unlock:
slab_unlock(page);
+ local_irq_restore(flags);
return;
slab_empty:
@@ -1820,6 +1885,7 @@ slab_empty:
stat(s, FREE_REMOVE_PARTIAL);
}
slab_unlock(page);
+ local_irq_restore(flags);
stat(s, FREE_SLAB);
discard_slab(s, page);
return;
@@ -1845,24 +1911,31 @@ static __always_inline void slab_free(st
struct page *page, void *x, unsigned long addr)
{
void **object = (void *)x;
- unsigned long flags;
kmemleak_free_recursive(x, s->flags);
- local_irq_save(flags);
+ preempt_disable();
kmemcheck_slab_free(s, object, s->objsize);
debug_check_no_locks_freed(object, s->objsize);
if (!(s->flags & SLAB_DEBUG_OBJECTS))
debug_check_no_obj_freed(object, s->objsize);
+ irqsafe_cpu_inc(s->cpu_slab->active);
+ barrier();
if (likely(page == __this_cpu_read(s->cpu_slab->page) &&
- __this_cpu_read(s->cpu_slab->node) >= 0)) {
- set_freepointer(s, object, __this_cpu_read(s->cpu_slab->freelist));
+ __this_cpu_read(s->cpu_slab->node) >= 0) &&
+ !__this_cpu_read(s->cpu_slab->active)) {
+ set_freepointer(s, object,
+ __this_cpu_read(s->cpu_slab->freelist));
__this_cpu_write(s->cpu_slab->freelist, object);
+ barrier();
+ irqsafe_cpu_dec(s->cpu_slab->active);
+ preempt_enable();
stat(s, FREE_FASTPATH);
- } else
+ } else {
+ irqsafe_cpu_dec(s->cpu_slab->active);
+ preempt_enable();
__slab_free(s, page, x, addr);
-
- local_irq_restore(flags);
+ }
}
void kmem_cache_free(struct kmem_cache *s, void *x)
@@ -2064,6 +2137,8 @@ static DEFINE_PER_CPU(struct kmem_cache_
static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
{
+ int cpu;
+
if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches)
/*
* Boot time creation of the kmalloc array. Use static per cpu data
@@ -2073,6 +2148,9 @@ static inline int alloc_kmem_cache_cpus(
else
s->cpu_slab = alloc_percpu(struct kmem_cache_cpu);
+ for_each_possible_cpu(cpu)
+ per_cpu_ptr(s->cpu_slab, cpu)->active = -1;
+
if (!s->cpu_slab)
return 0;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists