lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Date:	Wed, 20 Feb 2013 09:02:59 -0800
From:	Davidlohr Bueso <davidlohr.bueso@...com>
To:	Andrew Morton <akpm@...ux-foundation.org>
Cc:	linux-kernel@...r.kernel.org, jgonzalez@...ets.cl
Subject: [PATCH] lib/int_sqrt: optimize square root algorithm

From: Davidlohr Bueso <davidlohr.bueso@...com>

This patch optimizes the current version of the shift-and-subtract
(hardware) algorithm, described by John von Newmann[1] and Guy L. Steele.

Iterating 1,000,000 times, perf shows for the current version:

 Performance counter stats for './sqrt-curr' (10 runs):

         27.170996 task-clock                #    0.979 CPUs utilized            ( +-  3.19% )
                 3 context-switches          #    0.103 K/sec                    ( +-  4.76% )
                 0 cpu-migrations            #    0.004 K/sec                    ( +-100.00% )
               104 page-faults               #    0.004 M/sec                    ( +-  0.16% )
        64,921,199 cycles                    #    2.389 GHz                      ( +-  0.03% )
        28,967,789 stalled-cycles-frontend   #   44.62% frontend cycles idle     ( +-  0.18% )
   <not supported> stalled-cycles-backend
       104,502,623 instructions              #    1.61  insns per cycle
                                             #    0.28  stalled cycles per insn  ( +-  0.00% )
        34,088,368 branches                  # 1254.587 M/sec                    ( +-  0.00% )
             4,901 branch-misses             #    0.01% of all branches          ( +-  1.32% )

       0.027763015 seconds time elapsed                                          ( +-  3.22% )

And for the new version:

Performance counter stats for './sqrt-new' (10 runs):

          0.496869 task-clock                #    0.519 CPUs utilized            ( +-  2.38% )
                 0 context-switches          #    0.000 K/sec
                 0 cpu-migrations            #    0.403 K/sec                    ( +-100.00% )
               104 page-faults               #    0.209 M/sec                    ( +-  0.15% )
           590,760 cycles                    #    1.189 GHz                      ( +-  2.35% )
           395,053 stalled-cycles-frontend   #   66.87% frontend cycles idle     ( +-  3.67% )
   <not supported> stalled-cycles-backend
           398,963 instructions              #    0.68  insns per cycle
                                             #    0.99  stalled cycles per insn  ( +-  0.39% )
            70,228 branches                  #  141.341 M/sec                    ( +-  0.36% )
             3,364 branch-misses             #    4.79% of all branches          ( +-  5.45% )

       0.000957440 seconds time elapsed                                          ( +-  2.42% )

Furthermore, this saves space in instruction text:

   text    data     bss     dec     hex filename
    111       0       0     111      6f lib/int_sqrt-baseline.o
     89       0       0      89      59 lib/int_sqrt.o

[1] http://en.wikipedia.org/wiki/First_Draft_of_a_Report_on_the_EDVAC

Signed-off-by: Davidlohr Bueso <davidlohr.bueso@...com>
Reviewed-and-tested-by: Jonathan Gonzalez <jgonzlez@...ets.cl>
---
 lib/int_sqrt.c | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/lib/int_sqrt.c b/lib/int_sqrt.c
index fc2eeb7..1ef4cc3 100644
--- a/lib/int_sqrt.c
+++ b/lib/int_sqrt.c
@@ -1,3 +1,9 @@
+/*
+ * Copyright (C) 2013 Davidlohr Bueso <davidlohr.bueso@...com>
+ *
+ *  Based on the shift-and-subtract algorithm for computing integer
+ *  square root from Guy L. Steele.
+ */
 
 #include <linux/kernel.h>
 #include <linux/export.h>
@@ -10,23 +16,23 @@
  */
 unsigned long int_sqrt(unsigned long x)
 {
-	unsigned long op, res, one;
+	unsigned long b, m, y = 0;
 
-	op = x;
-	res = 0;
+	if (x <= 1)
+		return x;
 
-	one = 1UL << (BITS_PER_LONG - 2);
-	while (one > op)
-		one >>= 2;
+	m = 1UL << (BITS_PER_LONG - 2);
+	while (m != 0) {
+		b = y + m;
+		y >>= 1;
 
-	while (one != 0) {
-		if (op >= res + one) {
-			op = op - (res + one);
-			res = res +  2 * one;
+		if (x >= b) {
+			x -= b;
+			y += m;
 		}
-		res /= 2;
-		one /= 4;
+		m >>= 2;
 	}
-	return res;
+
+	return y;
 }
 EXPORT_SYMBOL(int_sqrt);
-- 
1.7.11.7



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ