lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20090817204351.EEE0DDDB@kernel>
Date:	Mon, 17 Aug 2009 13:43:51 -0700
From:	Dave Hansen <dave@...ux.vnet.ibm.com>
To:	Dan Williams <dan.j.williams@...el.com>
Cc:	Andrew Morton <akpm@...ux-foundation.org>,
	linux-kernel@...r.kernel.org, Dave Hansen <dave@...ux.vnet.ibm.com>
Subject: [RFC][PATCH] flex_array: conditionally optimize out divides


There are three flex_array operations that require divides:
1. figuring out into which "part" we should access
2. figuring out where into that part we fit
3. figuring out in how many elements fit into a part

Division can get expensive, and we may incur one or two
divides for each put() or get() that is performed.  If we
rounded the elements to a power-of-two and stored shifts
and masks, we could rid ourselves of the divides, but we
would lose storage space with oddly-sized objects.  We
could code the implementation to handle divides and special-
case the shifts when they can be used, but that would
complicate the code.

This is an alternative.  We introduce variants of
flex_array_get() and flex_array_put() since they are the
most common operations.  We append an _es() to their names
(for Element Size) and get flex_array_get_es() and
flex_array_put_es().  The allocation and free functions
remain unoptimized since they're not indended to be hot
paths.

Passing the element size into each operation, and using it
like this:

	flex_array_get(fa, nr, sizeof(struct my_struct));

lets the compiler turn the divides into shifts if 'my_struct'
is a power-of-two in size.

It seems that only gcc 4.1 and up are smart enough to figure
this out, though.

---

 linux-2.6.git-dave/include/linux/flex_array.h |   54 ++++++++++++++++++++++++++
 linux-2.6.git-dave/lib/flex_array.c           |   52 +++++++++++++------------
 2 files changed, 82 insertions(+), 24 deletions(-)

diff -puN include/linux/flex_array.h~precalc include/linux/flex_array.h
--- linux-2.6.git/include/linux/flex_array.h~precalc	2009-08-17 13:13:21.000000000 -0700
+++ linux-2.6.git-dave/include/linux/flex_array.h	2009-08-17 13:34:45.000000000 -0700
@@ -1,6 +1,7 @@
 #ifndef _FLEX_ARRAY_H
 #define _FLEX_ARRAY_H
 
+#include <linux/errno.h>
 #include <linux/types.h>
 #include <asm/page.h>
 
@@ -36,6 +37,59 @@ struct flex_array {
 	.total_nr_elements = (total),	\
 } } }
 
+/*
+ * The 'unsigned' here seems to make gcc more happy
+ * to optimize the divide.
+ */
+static inline unsigned int __elements_per_part(int element_size)
+{
+	return FLEX_ARRAY_PART_SIZE / element_size;
+}
+
+static inline int __fa_element_to_part_nr(int element_size, int element_nr)
+{
+	return element_nr / __elements_per_part(element_size);
+}
+
+static inline int __fa_index_inside_part(int element_size, int element_nr)
+{
+	return element_nr % __elements_per_part(element_size);
+}
+
+int flex_array_put_precalc(struct flex_array *fa, int part_nr,
+			   int index_inside_part, void *src, gfp_t flags);
+void *flex_array_get_precalc(struct flex_array *fa, int part_nr,
+			     int index_inside_part);
+
+/*
+ * Use the _es() variants when you want the compiler to
+ * be able to optimize the divides like when you have a
+ * power-of-two element_size.
+ */
+static inline void *flex_array_get_es(struct flex_array *fa,
+		int element_nr, int element_size)
+{
+	int part_nr = __fa_element_to_part_nr(element_size, element_nr);
+	int index_inside = __fa_index_inside_part(element_size, element_nr);
+
+	if (element_nr >= fa->total_nr_elements)
+		return NULL;
+
+	return flex_array_get_precalc(fa, part_nr, index_inside);
+}
+
+static inline int flex_array_put_es(struct flex_array *fa, int element_nr,
+		int element_size, void *src, gfp_t flags)
+{
+	int part_nr = __fa_element_to_part_nr(element_size, element_nr);
+	int index_inside = __fa_index_inside_part(element_size, element_nr);
+
+	if (element_nr >= fa->total_nr_elements)
+		return -ENOSPC;
+
+	return flex_array_put_precalc(fa, part_nr, index_inside, src, flags);
+}
+
 struct flex_array *flex_array_alloc(int element_size, int total, gfp_t flags);
 int flex_array_prealloc(struct flex_array *fa, int start, int end, gfp_t flags);
 void flex_array_free(struct flex_array *fa);
diff -puN lib/flex_array.c~precalc lib/flex_array.c
--- linux-2.6.git/lib/flex_array.c~precalc	2009-08-17 13:13:21.000000000 -0700
+++ linux-2.6.git-dave/lib/flex_array.c	2009-08-17 13:41:24.000000000 -0700
@@ -28,11 +28,6 @@ struct flex_array_part {
 	char elements[FLEX_ARRAY_PART_SIZE];
 };
 
-static inline int __elements_per_part(int element_size)
-{
-	return FLEX_ARRAY_PART_SIZE / element_size;
-}
-
 static inline int bytes_left_in_base(void)
 {
 	int element_offset = offsetof(struct flex_array, parts);
@@ -115,11 +110,6 @@ struct flex_array *flex_array_alloc(int 
 	return ret;
 }
 
-static int fa_element_to_part_nr(struct flex_array *fa, int element_nr)
-{
-	return element_nr / __elements_per_part(fa->element_size);
-}
-
 /**
  * flex_array_free_parts - just free the second-level pages
  * @src:	address of data to copy into the array
@@ -151,12 +141,6 @@ static int fa_index_inside_part(struct f
 	return element_nr % __elements_per_part(fa->element_size);
 }
 
-static int index_inside_part(struct flex_array *fa, int element_nr)
-{
-	int part_offset = fa_index_inside_part(fa, element_nr);
-	return part_offset * fa->element_size;
-}
-
 static struct flex_array_part *
 __fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
 {
@@ -176,6 +160,12 @@ __fa_get_part(struct flex_array *fa, int
 	return part;
 }
 
+
+static int fa_element_to_part_nr(struct flex_array *fa, int element_nr)
+{
+	return __fa_element_to_part_nr(fa->element_size, element_nr);
+}
+
 /**
  * flex_array_put - copy data into the array at @element_nr
  * @src:	address of data to copy into the array
@@ -186,23 +176,30 @@ __fa_get_part(struct flex_array *fa, int
  * the array.  If you are trying to store an array of
  * pointers, make sure to pass in &ptr instead of ptr.
  *
+ * The _es() variant is if you want to pass in the element
+ * size yourself.  This allows the compiler to do some more
+ * optimization and possibly eliminate all the divides.
+ *
  * Locking must be provided by the caller.
  */
 int flex_array_put(struct flex_array *fa, int element_nr, void *src, gfp_t flags)
 {
-	int part_nr = fa_element_to_part_nr(fa, element_nr);
+	return flex_array_put_es(fa, element_nr, fa->element_size, src, flags);
+}
+
+int flex_array_put_precalc(struct flex_array *fa, int part_nr,
+			   int index_inside_part, void *src, gfp_t flags)
+{
 	struct flex_array_part *part;
 	void *dst;
 
-	if (element_nr >= fa->total_nr_elements)
-		return -ENOSPC;
 	if (elements_fit_in_base(fa))
 		part = (struct flex_array_part *)&fa->parts[0];
 	else
 		part = __fa_get_part(fa, part_nr, flags);
 	if (!part)
 		return -ENOMEM;
-	dst = &part->elements[index_inside_part(fa, element_nr)];
+	dst = &part->elements[index_inside_part];
 	memcpy(dst, src, fa->element_size);
 	return 0;
 }
@@ -248,20 +245,27 @@ int flex_array_prealloc(struct flex_arra
  * that this is a copy of the data that was passed in.  If you
  * are using this to store pointers, you'll get back &ptr.
  *
+ * The _es() variant is if you want to pass in the element
+ * size yourself.  This allows the compiler to do some more
+ * optimization and possibly eliminate all the divides.
+ *
  * Locking must be provided by the caller.
  */
 void *flex_array_get(struct flex_array *fa, int element_nr)
 {
-	int part_nr = fa_element_to_part_nr(fa, element_nr);
+	return flex_array_get_es(fa, element_nr, fa->element_size);
+}
+
+void *flex_array_get_precalc(struct flex_array *fa, int part_nr,
+			     int index_inside_part)
+{
 	struct flex_array_part *part;
 
-	if (element_nr >= fa->total_nr_elements)
-		return NULL;
 	if (!fa->parts[part_nr])
 		return NULL;
 	if (elements_fit_in_base(fa))
 		part = (struct flex_array_part *)&fa->parts[0];
 	else
 		part = fa->parts[part_nr];
-	return &part->elements[index_inside_part(fa, element_nr)];
+	return &part->elements[index_inside_part];
 }
diff -L include/linux/flex_array.hcd -puN /dev/null /dev/null
_
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ