[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Pine.LNX.4.64.0804082340120.11104@sbz-30.cs.Helsinki.FI>
Date: Tue, 8 Apr 2008 23:42:27 +0300 (EEST)
From: Pekka J Enberg <penberg@...helsinki.fi>
To: Linus Torvalds <torvalds@...ux-foundation.org>
cc: Hugh Dickins <hugh@...itas.com>,
Peter Zijlstra <a.p.zijlstra@...llo.nl>,
Christoph Lameter <clameter@....com>,
James Bottomley <James.Bottomley@...senPartnership.com>,
Andrew Morton <akpm@...ux-foundation.org>,
FUJITA Tomonori <fujita.tomonori@....ntt.co.jp>,
Jens Axboe <jens.axboe@...cle.com>,
"Rafael J. Wysocki" <rjw@...k.pl>, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] scsi: fix sense_slab/bio swapping livelock
On Tue, 8 Apr 2008, Pekka J Enberg wrote:
> >
> > So something like the following (totally untested) patch modulo the
> > pre-allocation bits.
On Mon, 7 Apr 2008, Linus Torvalds wrote:
> Hmm. I didn't check, but won't this cause problems on the freeing path
> when we call kmem_cache_free() on the result but with the wrong "struct
> kmem_cache" pointer?
So something like this fugly patch on top of the SLUB variable order
patches that let the allocator fall-back to smaller page orders. Survives
OOM in my testing and the box seems to be bit more responsive even under X
with this.
Pekka
---
mm/slub.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 92 insertions(+), 1 deletion(-)
Index: slab-2.6/mm/slub.c
===================================================================
--- slab-2.6.orig/mm/slub.c 2008-04-08 22:52:02.000000000 +0300
+++ slab-2.6/mm/slub.c 2008-04-08 23:27:27.000000000 +0300
@@ -1549,6 +1549,88 @@
}
/*
+ * Emergency caches are reserved for GFP_TEMPORARY allocations on OOM.
+ */
+
+#define MIN_EMERGENCY_SIZE 32
+#define NR_EMERGENCY_CACHES 4
+
+struct kmem_cache *emergency_caches[NR_EMERGENCY_CACHES];
+
+static void pre_alloc_cpu_slabs(struct kmem_cache *s)
+{
+ int cpu;
+
+ /*
+ * FIXME: CPU hot-plug, stats, debug?
+ */
+ for_each_online_cpu(cpu) {
+ struct kmem_cache_cpu *c;
+ struct page *new;
+ void **object;
+ int node;
+
+ c = get_cpu_slab(s, cpu);
+ node = cpu_to_node(cpu);
+
+ new = new_slab(s, GFP_KERNEL, node);
+ BUG_ON(!new);
+
+ slab_lock(new);
+ SetSlabFrozen(new);
+ c->page = new;
+ object = c->page->freelist;
+ c->freelist = object[c->offset];
+ c->page->inuse = c->page->objects;
+ c->page->freelist = NULL;
+ c->node = page_to_nid(c->page);
+ slab_unlock(c->page);
+ }
+}
+
+static void init_emergency_caches(void)
+{
+ unsigned long size = MIN_EMERGENCY_SIZE;
+ int i;
+
+ for (i = 0; i < NR_EMERGENCY_CACHES; i++) {
+ struct kmem_cache *cache;
+ char *name;
+
+ name = kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i);
+ BUG_ON(!name);
+
+ cache = kmem_cache_create(name, size, 0, 0, NULL);
+ BUG_ON(!cache);
+ kfree(name);
+
+ pre_alloc_cpu_slabs(cache);
+ emergency_caches[i] = cache;
+
+ size *= 2;
+ }
+}
+
+static void *emergency_alloc(struct kmem_cache *cache, gfp_t gfp)
+{
+ unsigned long cache_size = MIN_EMERGENCY_SIZE;
+ void *p = NULL;
+ int i;
+
+ for (i = 0; i < NR_EMERGENCY_CACHES; i++) {
+ if (cache->objsize < cache_size) {
+ p = kmem_cache_alloc(emergency_caches[i], gfp);
+ if (p)
+ break;
+ }
+ cache_size *= 2;
+ }
+ if (p && cache->ctor)
+ cache->ctor(cache, p);
+ return p;
+}
+
+/*
* Slow path. The lockless freelist is empty or we need to perform
* debugging duties.
*
@@ -1626,6 +1708,14 @@
c->page = new;
goto load_freelist;
}
+
+ /*
+ * We are really OOM. Let short-lived allocations dip into the reserves
+ * to ensure writeback makes progress.
+ */
+ if ((gfpflags & GFP_TEMPORARY) == GFP_TEMPORARY)
+ return emergency_alloc(s, gfpflags);
+
return NULL;
debug:
if (!alloc_debug_processing(s, c->page, object, addr))
@@ -1791,7 +1881,7 @@
page = virt_to_head_page(x);
- slab_free(s, page, x, __builtin_return_address(0));
+ slab_free(page->slab, page, x, __builtin_return_address(0));
}
EXPORT_SYMBOL(kmem_cache_free);
@@ -3225,6 +3315,7 @@
#else
kmem_size = sizeof(struct kmem_cache);
#endif
+ init_emergency_caches();
printk(KERN_INFO
"SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists