lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1187595513.6114.176.camel@twins>
Date:	Mon, 20 Aug 2007 09:38:33 +0200
From:	Peter Zijlstra <a.p.zijlstra@...llo.nl>
To:	linux-kernel@...r.kernel.org
Cc:	linux-mm@...ck.org, David Miller <davem@...emloft.net>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Daniel Phillips <phillips@...gle.com>,
	Pekka Enberg <penberg@...helsinki.fi>,
	Christoph Lameter <clameter@....com>,
	Matt Mackall <mpm@...enic.com>,
	Lee Schermerhorn <Lee.Schermerhorn@...com>,
	Steve Dickson <SteveD@...hat.com>
Subject: Re: [PATCH 04/10] mm: slub: add knowledge of reserve pages

Ok, so I got rid of the global stuff, this also obsoletes 3/10.


---
Subject: mm: slub: add knowledge of reserve pages

Restrict objects from reserve slabs (ALLOC_NO_WATERMARKS) to allocation
contexts that are entitled to it.

Care is taken to only touch the SLUB slow path.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@...llo.nl>
Cc: Christoph Lameter <clameter@....com>
---
 mm/slub.c |   87 +++++++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 69 insertions(+), 18 deletions(-)

Index: linux-2.6-2/mm/slub.c
===================================================================
--- linux-2.6-2.orig/mm/slub.c
+++ linux-2.6-2/mm/slub.c
@@ -20,11 +20,12 @@
 #include <linux/mempolicy.h>
 #include <linux/ctype.h>
 #include <linux/kallsyms.h>
+#include "internal.h"
 
 /*
  * Lock order:
  *   1. slab_lock(page)
- *   2. slab->list_lock
+ *   2. node->list_lock
  *
  *   The slab_lock protects operations on the object of a particular
  *   slab and its metadata in the page struct. If the slab lock
@@ -1069,7 +1070,7 @@ static void setup_object(struct kmem_cac
 		s->ctor(object, s, 0);
 }
 
-static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
+static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node, int *reserve)
 {
 	struct page *page;
 	struct kmem_cache_node *n;
@@ -1087,6 +1088,7 @@ static struct page *new_slab(struct kmem
 	if (!page)
 		goto out;
 
+	*reserve = page->reserve;
 	n = get_node(s, page_to_nid(page));
 	if (n)
 		atomic_long_inc(&n->nr_slabs);
@@ -1403,12 +1405,36 @@ static inline void flush_slab(struct kme
 }
 
 /*
+ * cpu slab reserve magic
+ *
+ * we mark reserve status in the lsb of the ->cpu_slab[] pointer.
+ */
+static inline unsigned long cpu_slab_reserve(struct kmem_cache *s, int cpu)
+{
+	return unlikely((unsigned long)s->cpu_slab[cpu] & 1);
+}
+
+static inline void
+cpu_slab_set(struct kmem_cache *s, int cpu, struct page *page, int reserve)
+{
+	if (unlikely(reserve))
+		page = (struct page *)((unsigned long)page | 1);
+
+	s->cpu_slab[cpu] = page;
+}
+
+static inline struct page *cpu_slab(struct kmem_cache *s, int cpu)
+{
+	return (struct page *)((unsigned long)s->cpu_slab[cpu] & ~1UL);
+}
+
+/*
  * Flush cpu slab.
  * Called from IPI handler with interrupts disabled.
  */
 static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
 {
-	struct page *page = s->cpu_slab[cpu];
+	struct page *page = cpu_slab(s, cpu);
 
 	if (likely(page))
 		flush_slab(s, page, cpu);
@@ -1457,10 +1483,22 @@ static void *__slab_alloc(struct kmem_ca
 {
 	void **object;
 	int cpu = smp_processor_id();
+	int reserve = 0;
 
 	if (!page)
 		goto new_slab;
 
+	if (cpu_slab_reserve(s, cpu)) {
+		/*
+		 * If the current slab is a reserve slab and the current
+		 * allocation context does not allow access to the reserves
+		 * we must force an allocation to test the current levels.
+		 */
+		if (!(gfp_to_alloc_flags(gfpflags) & ALLOC_NO_WATERMARKS))
+			goto alloc_slab;
+		reserve = 1;
+	}
+
 	slab_lock(page);
 	if (unlikely(node != -1 && page_to_nid(page) != node))
 		goto another_slab;
@@ -1468,10 +1506,9 @@ load_freelist:
 	object = page->freelist;
 	if (unlikely(!object))
 		goto another_slab;
-	if (unlikely(SlabDebug(page)))
+	if (unlikely(SlabDebug(page) || reserve))
 		goto debug;
 
-	object = page->freelist;
 	page->lockless_freelist = object[page->offset];
 	page->inuse = s->objects;
 	page->freelist = NULL;
@@ -1484,14 +1521,28 @@ another_slab:
 new_slab:
 	page = get_partial(s, gfpflags, node);
 	if (page) {
-		s->cpu_slab[cpu] = page;
+		cpu_slab_set(s, cpu, page, reserve);
 		goto load_freelist;
 	}
 
-	page = new_slab(s, gfpflags, node);
+alloc_slab:
+	page = new_slab(s, gfpflags, node, &reserve);
 	if (page) {
+		struct page *slab;
+
 		cpu = smp_processor_id();
-		if (s->cpu_slab[cpu]) {
+		slab = cpu_slab(s, cpu);
+
+		if (cpu_slab_reserve(s, cpu) && !reserve) {
+			/*
+			 * If the current cpu_slab is a reserve slab but we
+			 * managed to allocate a new slab the pressure is
+			 * lifted and we can unmark the current one.
+			 */
+			cpu_slab_set(s, cpu, slab, 0);
+		}
+
+		if (slab) {
 			/*
 			 * Someone else populated the cpu_slab while we
 			 * enabled interrupts, or we have gotten scheduled
@@ -1499,29 +1550,28 @@ new_slab:
 			 * requested node even if __GFP_THISNODE was
 			 * specified. So we need to recheck.
 			 */
-			if (node == -1 ||
-				page_to_nid(s->cpu_slab[cpu]) == node) {
+			if (node == -1 || page_to_nid(slab) == node) {
 				/*
 				 * Current cpuslab is acceptable and we
 				 * want the current one since its cache hot
 				 */
 				discard_slab(s, page);
-				page = s->cpu_slab[cpu];
+				page = slab;
 				slab_lock(page);
 				goto load_freelist;
 			}
 			/* New slab does not fit our expectations */
-			flush_slab(s, s->cpu_slab[cpu], cpu);
+			flush_slab(s, slab, cpu);
 		}
 		slab_lock(page);
 		SetSlabFrozen(page);
-		s->cpu_slab[cpu] = page;
+		cpu_slab_set(s, cpu, page, reserve);
 		goto load_freelist;
 	}
 	return NULL;
 debug:
-	object = page->freelist;
-	if (!alloc_debug_processing(s, page, object, addr))
+	if (SlabDebug(page) &&
+			!alloc_debug_processing(s, page, object, addr))
 		goto another_slab;
 
 	page->inuse++;
@@ -1548,7 +1598,7 @@ static void __always_inline *slab_alloc(
 	unsigned long flags;
 
 	local_irq_save(flags);
-	page = s->cpu_slab[smp_processor_id()];
+	page = cpu_slab(s, smp_processor_id());
 	if (unlikely(!page || !page->lockless_freelist ||
 			(node != -1 && page_to_nid(page) != node)))
 
@@ -1873,10 +1923,11 @@ static struct kmem_cache_node * __init e
 {
 	struct page *page;
 	struct kmem_cache_node *n;
+	int reserve;
 
 	BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node));
 
-	page = new_slab(kmalloc_caches, gfpflags | GFP_THISNODE, node);
+	page = new_slab(kmalloc_caches, gfpflags | GFP_THISNODE, node, &reserve);
 
 	BUG_ON(!page);
 	n = page->freelist;
@@ -3189,7 +3240,7 @@ static unsigned long slab_objects(struct
 	per_cpu = nodes + nr_node_ids;
 
 	for_each_possible_cpu(cpu) {
-		struct page *page = s->cpu_slab[cpu];
+		struct page *page = cpu_slab(s, cpu);
 		int node;
 
 		if (page) {


Download attachment "signature.asc" of type "application/pgp-signature" (190 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ