linux-kernel - [GIT PULL] SLAB updates for 2.6.32-rc0

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <alpine.DEB.2.00.0909142025290.5303@melkki.cs.helsinki.fi>
Date:	Mon, 14 Sep 2009 20:46:18 +0300 (EEST)
From:	Pekka J Enberg <penberg@...helsinki.fi>
To:	torvalds@...ux-foundation.org
cc:	linux-kernel@...r.kernel.org, akpm@...ux-foundation.org,
	cl@...ux-foundation.org, aaro.koskinen@...ia.com,
	amwang@...hat.com, dfeng@...hat.com, eric.dumazet@...il.com,
	fengguang.wu@...el.com, Larry.Finger@...inger.net,
	rientjes@...gle.com, yanmin_zhang@...ux.intel.com,
	zdenek.kabelac@...il.com
Subject: [GIT PULL] SLAB updates for 2.6.32-rc0

Hi Linus,

Here's the usual batch of SLAB allocator fixes and cleanups accumulated 
over the past few months.

 			Pekka

The following changes since commit 0cc6d77e55eca9557bbe41bf2db94b31aa8fcb2a:
   Linus Torvalds (1):
         Merge branch 'x86-setup-for-linus' of git://git.kernel.org/.../tip/linux-2.6-tip

are available in the git repository at:

   ssh://master.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6 for-linus

Aaro Koskinen (1):
       SLUB: fix ARCH_KMALLOC_MINALIGN cases 64 and 256

Amerigo Wang (1):
       SLUB: Fix some coding style issues

David Rientjes (2):
       slub: add option to disable higher order debugging slabs
       slub: use size and objsize orders to disable debug flags

Eric Dumazet (1):
       slub: fix slab_pad_check()

Pekka Enberg (1):
       Merge branches 'slab/cleanups' and 'slab/fixes' into for-linus

WANG Cong (1):
       SLUB: Drop write permission to /proc/slabinfo

Wu Fengguang (1):
       slab: remove duplicate kmem_cache_init_late() declarations

Xiaotian Feng (1):
       slub: release kobject if sysfs_create_group failed in sysfs_slab_add

Zhang, Yanmin (1):
       slub: change kmem_cache->align to record the real alignment

  Documentation/vm/slub.txt |   10 +++++
  include/linux/slob_def.h  |    5 ---
  include/linux/slub_def.h  |    8 +---
  mm/slob.c                 |    5 +++
  mm/slub.c                 |   82 +++++++++++++++++++++++++++++++++++++--------
  5 files changed, 85 insertions(+), 25 deletions(-)

diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt
index bb1f5c6..510917f 100644
--- a/Documentation/vm/slub.txt
+++ b/Documentation/vm/slub.txt
@@ -41,6 +41,8 @@ Possible debug options are
  	P		Poisoning (object and padding)
  	U		User tracking (free and alloc)
  	T		Trace (please only use on single slabs)
+	O		Switch debugging off for caches that would have
+			caused higher minimum slab orders
  	-		Switch all debugging off (useful if the kernel is
  			configured with CONFIG_SLUB_DEBUG_ON)

@@ -59,6 +61,14 @@ to the dentry cache with

  	slub_debug=F,dentry

+Debugging options may require the minimum possible slab order to increase as
+a result of storing the metadata (for example, caches with PAGE_SIZE object
+sizes).  This has a higher liklihood of resulting in slab allocation errors
+in low memory situations or if there's high fragmentation of memory.  To
+switch off debugging for such caches by default, use
+
+	slub_debug=O
+
  In case you forgot to enable debugging on the kernel command line: It is
  possible to enable debugging manually when the kernel is up. Look at the
  contents of:
diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h
index bb5368d..0ec00b3 100644
--- a/include/linux/slob_def.h
+++ b/include/linux/slob_def.h
@@ -34,9 +34,4 @@ static __always_inline void *__kmalloc(size_t size, gfp_t flags)
  	return kmalloc(size, flags);
  }

-static inline void kmem_cache_init_late(void)
-{
-	/* Nothing to do */
-}
-
  #endif /* __LINUX_SLOB_DEF_H */
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index c1c862b..5ad70a6 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -153,12 +153,10 @@ static __always_inline int kmalloc_index(size_t size)
  	if (size <= KMALLOC_MIN_SIZE)
  		return KMALLOC_SHIFT_LOW;

-#if KMALLOC_MIN_SIZE <= 64
-	if (size > 64 && size <= 96)
+	if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96)
  		return 1;
-	if (size > 128 && size <= 192)
+	if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192)
  		return 2;
-#endif
  	if (size <=          8) return 3;
  	if (size <=         16) return 4;
  	if (size <=         32) return 5;
@@ -304,6 +302,4 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
  }
  #endif

-void __init kmem_cache_init_late(void);
-
  #endif /* _LINUX_SLUB_DEF_H */
diff --git a/mm/slob.c b/mm/slob.c
index 9641da3..837ebd6 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -692,3 +692,8 @@ void __init kmem_cache_init(void)
  {
  	slob_ready = 1;
  }
+
+void __init kmem_cache_init_late(void)
+{
+	/* Nothing to do */
+}
diff --git a/mm/slub.c b/mm/slub.c
index b627675..417ed84 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -141,6 +141,13 @@
  				SLAB_POISON | SLAB_STORE_USER)

  /*
+ * Debugging flags that require metadata to be stored in the slab.  These get
+ * disabled when slub_debug=O is used and a cache's min order increases with
+ * metadata.
+ */
+#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
+
+/*
   * Set of flags that will prevent slab merging
   */
  #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
@@ -325,6 +332,7 @@ static int slub_debug;
  #endif

  static char *slub_debug_slabs;
+static int disable_higher_order_debug;

  /*
   * Object debugging
@@ -646,7 +654,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
  	slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
  	print_section("Padding", end - remainder, remainder);

-	restore_bytes(s, "slab padding", POISON_INUSE, start, end);
+	restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
  	return 0;
  }

@@ -976,6 +984,15 @@ static int __init setup_slub_debug(char *str)
  		 */
  		goto check_slabs;

+	if (tolower(*str) == 'o') {
+		/*
+		 * Avoid enabling debugging on caches if its minimum order
+		 * would increase as a result.
+		 */
+		disable_higher_order_debug = 1;
+		goto out;
+	}
+
  	slub_debug = 0;
  	if (*str == '-')
  		/*
@@ -1026,8 +1043,8 @@ static unsigned long kmem_cache_flags(unsigned long objsize,
  	 * Enable debugging if selected on the kernel commandline.
  	 */
  	if (slub_debug && (!slub_debug_slabs ||
-	    strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)) == 0))
-			flags |= slub_debug;
+		!strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))
+		flags |= slub_debug;

  	return flags;
  }
@@ -1109,8 +1126,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
  	}

  	if (kmemcheck_enabled
-		&& !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS)))
-	{
+		&& !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
  		int pages = 1 << oo_order(oo);

  		kmemcheck_alloc_shadow(page, oo_order(oo), flags, node);
@@ -1560,6 +1576,10 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
  		"default order: %d, min order: %d\n", s->name, s->objsize,
  		s->size, oo_order(s->oo), oo_order(s->min));

+	if (oo_order(s->min) > get_order(s->objsize))
+		printk(KERN_WARNING "  %s debugging increased min order, use "
+		       "slub_debug=O to disable.\n", s->name);
+
  	for_each_online_node(node) {
  		struct kmem_cache_node *n = get_node(s, node);
  		unsigned long nr_slabs;
@@ -2001,7 +2021,7 @@ static inline int calculate_order(int size)
  				return order;
  			fraction /= 2;
  		}
-		min_objects --;
+		min_objects--;
  	}

  	/*
@@ -2400,6 +2420,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
  	 * on bootup.
  	 */
  	align = calculate_alignment(flags, align, s->objsize);
+	s->align = align;

  	/*
  	 * SLUB stores one object immediately after another beginning from
@@ -2452,6 +2473,18 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,

  	if (!calculate_sizes(s, -1))
  		goto error;
+	if (disable_higher_order_debug) {
+		/*
+		 * Disable debugging flags that store metadata if the min slab
+		 * order increased.
+		 */
+		if (get_order(s->size) > get_order(s->objsize)) {
+			s->flags &= ~DEBUG_METADATA_FLAGS;
+			s->offset = 0;
+			if (!calculate_sizes(s, -1))
+				goto error;
+		}
+	}

  	/*
  	 * The larger the object size is, the more pages we want on the partial
@@ -2790,6 +2823,11 @@ static s8 size_index[24] = {
  	2	/* 192 */
  };

+static inline int size_index_elem(size_t bytes)
+{
+	return (bytes - 1) / 8;
+}
+
  static struct kmem_cache *get_slab(size_t size, gfp_t flags)
  {
  	int index;
@@ -2798,7 +2836,7 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags)
  		if (!size)
  			return ZERO_SIZE_PTR;

-		index = size_index[(size - 1) / 8];
+		index = size_index[size_index_elem(size)];
  	} else
  		index = fls(size - 1);

@@ -3156,10 +3194,12 @@ void __init kmem_cache_init(void)
  	slab_state = PARTIAL;

  	/* Caches that are not of the two-to-the-power-of size */
-	if (KMALLOC_MIN_SIZE <= 64) {
+	if (KMALLOC_MIN_SIZE <= 32) {
  		create_kmalloc_cache(&kmalloc_caches[1],
  				"kmalloc-96", 96, GFP_NOWAIT);
  		caches++;
+	}
+	if (KMALLOC_MIN_SIZE <= 64) {
  		create_kmalloc_cache(&kmalloc_caches[2],
  				"kmalloc-192", 192, GFP_NOWAIT);
  		caches++;
@@ -3186,17 +3226,28 @@ void __init kmem_cache_init(void)
  	BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
  		(KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));

-	for (i = 8; i < KMALLOC_MIN_SIZE; i += 8)
-		size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW;
+	for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
+		int elem = size_index_elem(i);
+		if (elem >= ARRAY_SIZE(size_index))
+			break;
+		size_index[elem] = KMALLOC_SHIFT_LOW;
+	}

-	if (KMALLOC_MIN_SIZE == 128) {
+	if (KMALLOC_MIN_SIZE == 64) {
+		/*
+		 * The 96 byte size cache is not used if the alignment
+		 * is 64 byte.
+		 */
+		for (i = 64 + 8; i <= 96; i += 8)
+			size_index[size_index_elem(i)] = 7;
+	} else if (KMALLOC_MIN_SIZE == 128) {
  		/*
  		 * The 192 byte sized cache is not used if the alignment
  		 * is 128 byte. Redirect kmalloc to use the 256 byte cache
  		 * instead.
  		 */
  		for (i = 128 + 8; i <= 192; i += 8)
-			size_index[(i - 1) / 8] = 8;
+			size_index[size_index_elem(i)] = 8;
  	}

  	slab_state = UP;
@@ -4543,8 +4594,11 @@ static int sysfs_slab_add(struct kmem_cache *s)
  	}

  	err = sysfs_create_group(&s->kobj, &slab_attr_group);
-	if (err)
+	if (err) {
+		kobject_del(&s->kobj);
+		kobject_put(&s->kobj);
  		return err;
+	}
  	kobject_uevent(&s->kobj, KOBJ_ADD);
  	if (!unmergeable) {
  		/* Setup first alias */
@@ -4726,7 +4780,7 @@ static const struct file_operations proc_slabinfo_operations = {

  static int __init slab_proc_init(void)
  {
-	proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations);
+	proc_create("slabinfo", S_IRUGO, NULL, &proc_slabinfo_operations);
  	return 0;
  }
  module_init(slab_proc_init);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/