lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Mon, 8 Aug 2011 13:42:52 +0200
From:	Joakim Tjernlund <joakim.tjernlund@...nsmode.se>
To:	"George Spelvin" <linux@...izon.com>
Cc:	akpm@...ux-foundation.org, fzago@...temfabricworks.com,
	linux-kernel@...r.kernel.org, rpearson@...temfabricworks.com
Subject: Re: [PATCH] add slice by 8 algorithm to crc32.c

"George Spelvin" <linux@...izon.com> wrote on 2011/08/08 12:52:01:
>
> > I prefer to keep the current code which (at the time) generated good code
> > for at least ppc:
> >    /* Align it */
> >    if (unlikely((long)buf & 3 && len)) {
> >       do {
> >          DO_CRC(*buf++);
> >       } while ((--len) && ((long)buf)&3);
> >    }
>
> Ah, I was looking at fzago's initial patch; I hadn't realized you'd
> tweaked it.  That's pretty much what I was talking about.
>
> Would
>     if (unlikely((long)buf & 3) && len) {
>
> give the compiler better hints?  len != 0 is awfully
> likely, actually.

This is my current hack. Not ideal yet, just to give you a hint:
diff --git a/lib/crc32.c b/lib/crc32.c
index b06d1e7..d822ceb 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -28,13 +28,13 @@
 #include <linux/init.h>
 #include <asm/atomic.h>
 #include "crc32defs.h"
-#if CRC_LE_BITS == 8
+#if CRC_LE_BITS == 32 || CRC_LE_BITS == 64
 # define tole(x) __constant_cpu_to_le32(x)
 #else
 # define tole(x) (x)
 #endif

-#if CRC_BE_BITS == 8
+#if CRC_BE_BITS == 32 || CRC_BE_BITS == 64
 # define tobe(x) __constant_cpu_to_be32(x)
 #else
 # define tobe(x) (x)
@@ -45,27 +45,32 @@ MODULE_AUTHOR("Matt Domsch <Matt_Domsch@...l.com>");
 MODULE_DESCRIPTION("Ethernet CRC32 calculations");
 MODULE_LICENSE("GPL");

-#if CRC_LE_BITS == 8 || CRC_BE_BITS == 8
+#if CRC_LE_BITS == 32 || CRC_BE_BITS == 64

 static inline u32
 crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
 {
 # ifdef __LITTLE_ENDIAN
 #  define DO_CRC(x) crc = t0[(crc ^ (x)) & 255] ^ (crc >> 8)
-#  define DO_CRC4 crc = t3[(crc) & 255] ^ \
-		t2[(crc >> 8) & 255] ^ \
-		t1[(crc >> 16) & 255] ^ \
-		t0[(crc >> 24) & 255]
+#  define DO_CRC4(crc, x0, x1, x2, x3) \
+	x3[(crc) & 255] ^		\
+		x2[(crc >> 8) & 255] ^	\
+		x1[(crc >> 16) & 255] ^ \
+		x0[(crc >> 24) & 255]
 # else
 #  define DO_CRC(x) crc = t0[((crc >> 24) ^ (x)) & 255] ^ (crc << 8)
-#  define DO_CRC4 crc = t0[(crc) & 255] ^ \
-		t1[(crc >> 8) & 255] ^  \
-		t2[(crc >> 16) & 255] ^	\
-		t3[(crc >> 24) & 255]
+#  define DO_CRC4(crc, x0, x1, x2, x3) \
+	x0[(crc) & 255] ^		\
+		x1[(crc >> 8) & 255] ^  \
+		x2[(crc >> 16) & 255] ^	\
+		x3[(crc >> 24) & 255]
 # endif
 	const u32 *b;
 	size_t    rem_len;
 	const u32 *t0=tab[0], *t1=tab[1], *t2=tab[2], *t3=tab[3];
+#if CRC_LE_BITS == 64 || CRC_BE_BITS == 64
+	const u32 *t4=tab[4], *t5=tab[5], *t6=tab[6], *t7=tab[7];
+#endif

 	/* Align it */
 	if (unlikely((long)buf & 3 && len)) {
@@ -73,13 +78,25 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
 			DO_CRC(*buf++);
 		} while ((--len) && ((long)buf)&3);
 	}
+#if CRC_LE_BITS == 64 || CRC_BE_BITS == 64
+	rem_len = len & 7;
+	len = len >> 3;
+#else
 	rem_len = len & 3;
-	/* load data 32 bits wide, xor data 32 bits wide. */
 	len = len >> 2;
+#endif
+	/* load data 32 bits wide, xor data 32 bits wide. */
 	b = (const u32 *)buf;
 	for (--b; len; --len) {
 		crc ^= *++b; /* use pre increment for speed */
-		DO_CRC4;
+#if CRC_LE_BITS == 64 || CRC_BE_BITS == 64
+		crc = DO_CRC4(crc, t4, t5, t6, t7);
+		++b;
+		crc ^= DO_CRC4(*b, t0, t1, t2, t3);
+#else
+		crc = DO_CRC4(crc, t0, t1, t2, t3);
+#endif
+
 	}
 	len = rem_len;
 	/* And the last few bytes */
@@ -123,7 +140,7 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)

 u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
 {
-# if CRC_LE_BITS == 8
+# if CRC_LE_BITS == 32 || CRC_LE_BITS == 64
 	const u32      (*tab)[] = crc32table_le;

 	crc = __cpu_to_le32(crc);
@@ -132,17 +149,17 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
 # elif CRC_LE_BITS == 4
 	while (len--) {
 		crc ^= *p++;
-		crc = (crc >> 4) ^ crc32table_le[crc & 15];
-		crc = (crc >> 4) ^ crc32table_le[crc & 15];
+		crc = (crc >> 4) ^ crc32table_le[0][crc & 15];
+		crc = (crc >> 4) ^ crc32table_le[0][crc & 15];
 	}
 	return crc;
 # elif CRC_LE_BITS == 2
 	while (len--) {
 		crc ^= *p++;
-		crc = (crc >> 2) ^ crc32table_le[crc & 3];
-		crc = (crc >> 2) ^ crc32table_le[crc & 3];
-		crc = (crc >> 2) ^ crc32table_le[crc & 3];
-		crc = (crc >> 2) ^ crc32table_le[crc & 3];
+		crc = (crc >> 2) ^ crc32table_le[0][crc & 3];
+		crc = (crc >> 2) ^ crc32table_le[0][crc & 3];
+		crc = (crc >> 2) ^ crc32table_le[0][crc & 3];
+		crc = (crc >> 2) ^ crc32table_le[0][crc & 3];
 	}
 	return crc;
 # endif
@@ -180,7 +197,7 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
 #else				/* Table-based approach */
 u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
 {
-# if CRC_BE_BITS == 8
+# if CRC_BE_BITS == 32 || CRC_BE_BITS == 64
 	const u32      (*tab)[] = crc32table_be;

 	crc = __cpu_to_be32(crc);
@@ -189,17 +206,17 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
 # elif CRC_BE_BITS == 4
 	while (len--) {
 		crc ^= *p++ << 24;
-		crc = (crc << 4) ^ crc32table_be[crc >> 28];
-		crc = (crc << 4) ^ crc32table_be[crc >> 28];
+		crc = (crc << 4) ^ crc32table_be[0][crc >> 28];
+		crc = (crc << 4) ^ crc32table_be[0][crc >> 28];
 	}
 	return crc;
 # elif CRC_BE_BITS == 2
 	while (len--) {
 		crc ^= *p++ << 24;
-		crc = (crc << 2) ^ crc32table_be[crc >> 30];
-		crc = (crc << 2) ^ crc32table_be[crc >> 30];
-		crc = (crc << 2) ^ crc32table_be[crc >> 30];
-		crc = (crc << 2) ^ crc32table_be[crc >> 30];
+		crc = (crc << 2) ^ crc32table_be[0][crc >> 30];
+		crc = (crc << 2) ^ crc32table_be[0][crc >> 30];
+		crc = (crc << 2) ^ crc32table_be[0][crc >> 30];
+		crc = (crc << 2) ^ crc32table_be[0][crc >> 30];
 	}
 	return crc;
 # endif
diff --git a/lib/crc32defs.h b/lib/crc32defs.h
index 9b6773d..c5fa3eb 100644
--- a/lib/crc32defs.h
+++ b/lib/crc32defs.h
@@ -8,25 +8,25 @@

 /* How many bits at a time to use.  Requires a table of 4<<CRC_xx_BITS bytes. */
 /* For less performance-sensitive, use 4 */
-#ifndef CRC_LE_BITS
-# define CRC_LE_BITS 8
+#ifndef CRC_LE_BITS
+# define CRC_LE_BITS 32
 #endif
 #ifndef CRC_BE_BITS
-# define CRC_BE_BITS 8
+# define CRC_BE_BITS 32
 #endif

 /*
  * Little-endian CRC computation.  Used with serial bit streams sent
  * lsbit-first.  Be sure to use cpu_to_le32() to append the computed CRC.
  */
-#if CRC_LE_BITS > 8 || CRC_LE_BITS < 1 || CRC_LE_BITS & CRC_LE_BITS-1
-# error CRC_LE_BITS must be a power of 2 between 1 and 8
+#if CRC_LE_BITS > 64 || CRC_LE_BITS < 1 || CRC_LE_BITS & CRC_LE_BITS-1
+# error CRC_LE_BITS must be a power of 2 between 1 and 64
 #endif

 /*
  * Big-endian CRC computation.  Used with serial bit streams sent
  * msbit-first.  Be sure to use cpu_to_be32() to append the computed CRC.
  */
-#if CRC_BE_BITS > 8 || CRC_BE_BITS < 1 || CRC_BE_BITS & CRC_BE_BITS-1
+#if CRC_BE_BITS > 64 || CRC_BE_BITS < 1 || CRC_BE_BITS & CRC_BE_BITS-1
 # error CRC_BE_BITS must be a power of 2 between 1 and 8
 #endif
diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c
index 85d0e41..16336eb 100644
--- a/lib/gen_crc32table.c
+++ b/lib/gen_crc32table.c
@@ -4,11 +4,22 @@

 #define ENTRIES_PER_LINE 4

-#define LE_TABLE_SIZE (1 << CRC_LE_BITS)
-#define BE_TABLE_SIZE (1 << CRC_BE_BITS)
+#if CRC_LE_BITS > 8
+# define LE_TABLE_SIZE 256
+#else
+# define LE_TABLE_SIZE (1 << CRC_LE_BITS)
+#endif
+#if CRC_BE_BITS > 8
+# define BE_TABLE_SIZE 256
+#else
+# define BE_TABLE_SIZE (1 << CRC_BE_BITS)
+#endif

-static uint32_t crc32table_le[4][LE_TABLE_SIZE];
-static uint32_t crc32table_be[4][BE_TABLE_SIZE];
+#define LE_TABLE_ROWS ((CRC_LE_BITS - 1)/8 + 1)
+#define BE_TABLE_ROWS ((CRC_BE_BITS - 1)/8 + 1)
+
+static uint32_t crc32table_le[LE_TABLE_ROWS][LE_TABLE_SIZE];
+static uint32_t crc32table_be[BE_TABLE_ROWS][BE_TABLE_SIZE];

 /**
  * crc32init_le() - allocate and initialize LE table data
@@ -24,14 +35,14 @@ static void crc32init_le(void)

 	crc32table_le[0][0] = 0;

-	for (i = 1 << (CRC_LE_BITS - 1); i; i >>= 1) {
+	for (i = LE_TABLE_SIZE/2; i; i >>= 1) {
 		crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0);
 		for (j = 0; j < LE_TABLE_SIZE; j += 2 * i)
 			crc32table_le[0][i + j] = crc ^ crc32table_le[0][j];
 	}
 	for (i = 0; i < LE_TABLE_SIZE; i++) {
 		crc = crc32table_le[0][i];
-		for (j = 1; j < 4; j++) {
+		for (j = 1; j < LE_TABLE_ROWS; j++) {
 			crc = crc32table_le[0][crc & 0xff] ^ (crc >> 8);
 			crc32table_le[j][i] = crc;
 		}
@@ -55,43 +66,47 @@ static void crc32init_be(void)
 	}
 	for (i = 0; i < BE_TABLE_SIZE; i++) {
 		crc = crc32table_be[0][i];
-		for (j = 1; j < 4; j++) {
+		for (j = 1; j < BE_TABLE_ROWS; j++) {
 			crc = crc32table_be[0][(crc >> 24) & 0xff] ^ (crc << 8);
 			crc32table_be[j][i] = crc;
 		}
 	}
 }

-static void output_table(uint32_t table[4][256], int len, char *trans)
+static void output_table(uint32_t table[], int len, char *trans)
 {
-	int i, j;
-
-	for (j = 0 ; j < 4; j++) {
-		printf("{");
-		for (i = 0; i < len - 1; i++) {
-			if (i % ENTRIES_PER_LINE == 0)
-				printf("\n");
-			printf("%s(0x%8.8xL), ", trans, table[j][i]);
-		}
-		printf("%s(0x%8.8xL)},\n", trans, table[j][len - 1]);
+	int i;
+
+	printf("{");
+	for (i = 0; i < len - 1; i++) {
+		if (i % ENTRIES_PER_LINE == 0)
+			printf("\n");
+		printf("%s(0x%8.8xL), ", trans, table[i]);
 	}
+	printf("%s(0x%8.8xL)},\n", trans, table[len - 1]);
 }

 int main(int argc, char** argv)
 {
+	int i;
+
 	printf("/* this file is generated - do not edit */\n\n");

 	if (CRC_LE_BITS > 1) {
 		crc32init_le();
-		printf("static const u32 crc32table_le[4][256] = {");
-		output_table(crc32table_le, LE_TABLE_SIZE, "tole");
+		printf("static const u32 crc32table_le[%d][%d] = {",
+		       LE_TABLE_ROWS, LE_TABLE_SIZE);
+		for (i = 0 ; i < LE_TABLE_ROWS; i++)
+			output_table(crc32table_le[i], LE_TABLE_SIZE, "tole");
 		printf("};\n");
 	}

 	if (CRC_BE_BITS > 1) {
 		crc32init_be();
-		printf("static const u32 crc32table_be[4][256] = {");
-		output_table(crc32table_be, BE_TABLE_SIZE, "tobe");
+		printf("static const u32 crc32table_be[%d][%d] = {",
+		       BE_TABLE_ROWS, BE_TABLE_SIZE);
+		for (i = 0 ; i < BE_TABLE_ROWS; i++)
+			output_table(crc32table_be[i], BE_TABLE_SIZE, "tobe");
 		printf("};\n");
 	}


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ