Restrict objects from reserve slabs (ALLOC_NO_WATERMARKS) to allocation contexts that are entitled to it. Care is taken to only touch the SLUB slow path. Because the reserve threshold is system wide (by virtue of the previous patches) we can do with a single kmem_cache wide state. Signed-off-by: Peter Zijlstra Cc: Christoph Lameter --- include/linux/slub_def.h | 2 + mm/slub.c | 75 ++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 70 insertions(+), 7 deletions(-) Index: linux-2.6-2/include/linux/slub_def.h =================================================================== --- linux-2.6-2.orig/include/linux/slub_def.h +++ linux-2.6-2/include/linux/slub_def.h @@ -50,6 +50,8 @@ struct kmem_cache { struct kobject kobj; /* For sysfs */ #endif + struct page *reserve_slab; + #ifdef CONFIG_NUMA int defrag_ratio; struct kmem_cache_node *node[MAX_NUMNODES]; Index: linux-2.6-2/mm/slub.c =================================================================== --- linux-2.6-2.orig/mm/slub.c +++ linux-2.6-2/mm/slub.c @@ -20,11 +20,13 @@ #include #include #include +#include "internal.h" /* * Lock order: - * 1. slab_lock(page) - * 2. slab->list_lock + * 1. reserve_lock + * 2. slab_lock(page) + * 3. node->list_lock * * The slab_lock protects operations on the object of a particular * slab and its metadata in the page struct. If the slab lock @@ -258,6 +260,8 @@ static inline int sysfs_slab_alias(struc static inline void sysfs_slab_remove(struct kmem_cache *s) {} #endif +static DEFINE_SPINLOCK(reserve_lock); + /******************************************************************** * Core slab cache functions *******************************************************************/ @@ -1069,7 +1073,7 @@ static void setup_object(struct kmem_cac s->ctor(object, s, 0); } -static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) +static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node, int *reserve) { struct page *page; struct kmem_cache_node *n; @@ -1087,6 +1091,7 @@ static struct page *new_slab(struct kmem if (!page) goto out; + *reserve = page->reserve; n = get_node(s, page_to_nid(page)); if (n) atomic_long_inc(&n->nr_slabs); @@ -1457,6 +1462,7 @@ static void *__slab_alloc(struct kmem_ca { void **object; int cpu = smp_processor_id(); + int reserve = 0; if (!page) goto new_slab; @@ -1486,10 +1492,25 @@ new_slab: if (page) { s->cpu_slab[cpu] = page; goto load_freelist; - } + } else if (unlikely(gfp_to_alloc_flags(gfpflags) & ALLOC_NO_WATERMARKS)) + goto try_reserve; - page = new_slab(s, gfpflags, node); - if (page) { +alloc_slab: + page = new_slab(s, gfpflags, node, &reserve); + if (page && !reserve) { + if (unlikely(s->reserve_slab)) { + struct page *reserve; + + spin_lock(&reserve_lock); + reserve = s->reserve_slab; + s->reserve_slab = NULL; + spin_unlock(&reserve_lock); + + if (reserve) { + slab_lock(reserve); + unfreeze_slab(s, reserve); + } + } cpu = smp_processor_id(); if (s->cpu_slab[cpu]) { /* @@ -1517,6 +1538,18 @@ new_slab: SetSlabFrozen(page); s->cpu_slab[cpu] = page; goto load_freelist; + } else if (page) { + spin_lock(&reserve_lock); + if (s->reserve_slab) { + discard_slab(s, page); + page = s->reserve_slab; + goto got_reserve; + } + slab_lock(page); + SetSlabFrozen(page); + s->reserve_slab = page; + spin_unlock(&reserve_lock); + goto use_reserve; } return NULL; debug: @@ -1528,6 +1561,31 @@ debug: page->freelist = object[page->offset]; slab_unlock(page); return object; + +try_reserve: + spin_lock(&reserve_lock); + page = s->reserve_slab; + if (!page) { + spin_unlock(&reserve_lock); + goto alloc_slab; + } + +got_reserve: + slab_lock(page); + if (!page->freelist) { + s->reserve_slab = NULL; + spin_unlock(&reserve_lock); + unfreeze_slab(s, page); + goto alloc_slab; + } + spin_unlock(&reserve_lock); + +use_reserve: + object = page->freelist; + page->inuse++; + page->freelist = object[page->offset]; + slab_unlock(page); + return object; } /* @@ -1872,10 +1930,11 @@ static struct kmem_cache_node * __init e { struct page *page; struct kmem_cache_node *n; + int reserve; BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node)); - page = new_slab(kmalloc_caches, gfpflags | GFP_THISNODE, node); + page = new_slab(kmalloc_caches, gfpflags | GFP_THISNODE, node, &reserve); BUG_ON(!page); n = page->freelist; @@ -2091,6 +2150,8 @@ static int kmem_cache_open(struct kmem_c s->defrag_ratio = 100; #endif + s->reserve_slab = NULL; + if (init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) return 1; error: -- - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/