lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 09 Aug 2011 01:58:23 -0700
From:	Joe Perches <joe@...ches.com>
To:	Mandeep Singh Baines <msb@...omium.org>
Cc:	linux-kernel@...r.kernel.org,
	Ramsay Jones <ramsay@...say1.demon.co.uk>,
	Nicolas Pitre <nico@...xnic.net>,
	Joachim Eastwood <manabian@...il.com>,
	Andreas Schwab <schwab@...ux-m68k.org>,
	Herbert Xu <herbert@...dor.hengli.com.au>,
	"David S. Miller" <davem@...emloft.net>,
	linux-crypto@...r.kernel.org,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	GeertUytterhoeven <geert@...ux-m68k.org>
Subject: [PATCH] treewide: Update sha_transform

Move the workspace into sha_transform as local stack variable struct.

Remove #define SHA_WORKSPACE_WORDS.
Remove workspace argument from sha_transform.
Convert uses of __u8 * to void * in sha_transform.
Eliminate possible sha_transform unaligned accesses to data by copying
data to an aligned __u32 array if necessary.
Add sha_transform wipe argument to force workspace clearing if desired.
A little macro neatening.

This should speed network syncookies a trivial bit.

Add #include <linux/cryptohash.h> to lib/sha1.c

Compiled/untested.

Signed-off-by: Joe Perches <joe@...ches.com>
---

On Mon, 2011-08-08 at 22:52 -0700, Mandeep Singh Baines wrote:
> We don't call sha_tranform directly. We use crypto_hash_digest. So maybe
> add a wipe param there. I'm happy to work on or test such a patch if folks
> think its interesting. Its saves me 190 ms on a 6 second boot. I suspect
> there may be other hash intense applications that also don't need secracy.
 
Well, here's the patch I produced.

 crypto/sha1_generic.c      |    5 +---
 drivers/char/random.c      |    7 ++---
 include/linux/cryptohash.h |    3 +-
 lib/sha1.c                 |   61 +++++++++++++++++++++++++++++++-------------
 net/ipv4/syncookies.c      |    5 +--
 net/ipv4/tcp_output.c      |    6 +---
 net/ipv6/syncookies.c      |    5 +--
 7 files changed, 54 insertions(+), 38 deletions(-)

diff --git a/crypto/sha1_generic.c b/crypto/sha1_generic.c
index 00ae60e..d0c3f4a 100644
--- a/crypto/sha1_generic.c
+++ b/crypto/sha1_generic.c
@@ -49,8 +49,6 @@ static int sha1_update(struct shash_desc *desc, const u8 *data,
 	src = data;
 
 	if ((partial + len) >= SHA1_BLOCK_SIZE) {
-		u32 temp[SHA_WORKSPACE_WORDS];
-
 		if (partial) {
 			done = -partial;
 			memcpy(sctx->buffer + partial, data,
@@ -59,12 +57,11 @@ static int sha1_update(struct shash_desc *desc, const u8 *data,
 		}
 
 		do {
-			sha_transform(sctx->state, src, temp);
+			sha_transform(sctx->state, src, true);
 			done += SHA1_BLOCK_SIZE;
 			src = data + done;
 		} while (done + SHA1_BLOCK_SIZE <= len);
 
-		memset(temp, 0, sizeof(temp));
 		partial = 0;
 	}
 	memcpy(sctx->buffer + partial, src, len - done);
diff --git a/drivers/char/random.c b/drivers/char/random.c
index c35a785..6b9e5dc 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -816,13 +816,13 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min,
 static void extract_buf(struct entropy_store *r, __u8 *out)
 {
 	int i;
-	__u32 hash[5], workspace[SHA_WORKSPACE_WORDS];
+	__u32 hash[5];
 	__u8 extract[64];
 
 	/* Generate a hash across the pool, 16 words (512 bits) at a time */
 	sha_init(hash);
 	for (i = 0; i < r->poolinfo->poolwords; i += 16)
-		sha_transform(hash, (__u8 *)(r->pool + i), workspace);
+		sha_transform(hash, r->pool + i, false);
 
 	/*
 	 * We mix the hash back into the pool to prevent backtracking
@@ -839,9 +839,8 @@ static void extract_buf(struct entropy_store *r, __u8 *out)
 	 * To avoid duplicates, we atomically extract a portion of the
 	 * pool while mixing, and hash one final time.
 	 */
-	sha_transform(hash, extract, workspace);
+	sha_transform(hash, extract, true);
 	memset(extract, 0, sizeof(extract));
-	memset(workspace, 0, sizeof(workspace));
 
 	/*
 	 * In case the hash function has some recognizable output
diff --git a/include/linux/cryptohash.h b/include/linux/cryptohash.h
index 2cd9f1c..c64b5cf 100644
--- a/include/linux/cryptohash.h
+++ b/include/linux/cryptohash.h
@@ -3,10 +3,9 @@
 
 #define SHA_DIGEST_WORDS 5
 #define SHA_MESSAGE_BYTES (512 /*bits*/ / 8)
-#define SHA_WORKSPACE_WORDS 16
 
 void sha_init(__u32 *buf);
-void sha_transform(__u32 *digest, const char *data, __u32 *W);
+void sha_transform(__u32 *digest, const void *data, bool wipe);
 
 #define MD5_DIGEST_WORDS 4
 #define MD5_MESSAGE_BYTES 64
diff --git a/lib/sha1.c b/lib/sha1.c
index f33271d..a78ca29 100644
--- a/lib/sha1.c
+++ b/lib/sha1.c
@@ -8,6 +8,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/bitops.h>
+#include <linux/cryptohash.h>
 #include <asm/unaligned.h>
 
 /*
@@ -41,45 +42,66 @@
 #endif
 
 /* This "rolls" over the 512-bit array */
-#define W(x) (array[(x)&15])
+#define W(x) (workspace.array[(x)&15])
 
 /*
  * Where do we get the source from? The first 16 iterations get it from
  * the input data, the next mix it from the 512-bit array.
  */
-#define SHA_SRC(t) get_unaligned_be32((__u32 *)data + t)
+#define SHA_SRC(t) (workspace.aligned_data[t])
 #define SHA_MIX(t) rol32(W(t+13) ^ W(t+8) ^ W(t+2) ^ W(t), 1)
 
-#define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \
-	__u32 TEMP = input(t); setW(t, TEMP); \
-	E += TEMP + rol32(A,5) + (fn) + (constant); \
-	B = ror32(B, 2); } while (0)
-
-#define T_0_15(t, A, B, C, D, E)  SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
-#define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
-#define T_20_39(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0x6ed9eba1, A, B, C, D, E )
-#define T_40_59(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, ((B&C)+(D&(B^C))) , 0x8f1bbcdc, A, B, C, D, E )
-#define T_60_79(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) ,  0xca62c1d6, A, B, C, D, E )
+#define SHA_ROUND(t, input, fn, constant, A, B, C, D, E)	\
+do {								\
+	__u32 TEMP = input(t);					\
+								\
+	setW(t, TEMP);						\
+	E += TEMP + rol32(A, 5) + (fn) + (constant);		\
+	B = ror32(B, 2);					\
+} while (0)
+
+#define T_0_15(t, A, B, C, D, E)					\
+	SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D), 0x5a827999, A, B, C, D, E)
+#define T_16_19(t, A, B, C, D, E)					\
+	SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D), 0x5a827999, A, B, C, D, E)
+#define T_20_39(t, A, B, C, D, E)					\
+	SHA_ROUND(t, SHA_MIX, (B^C^D), 0x6ed9eba1, A, B, C, D, E)
+#define T_40_59(t, A, B, C, D, E)					\
+	SHA_ROUND(t, SHA_MIX, ((B&C)+(D&(B^C))), 0x8f1bbcdc, A, B, C, D, E)
+#define T_60_79(t, A, B, C, D, E)					\
+	SHA_ROUND(t, SHA_MIX, (B^C^D), 0xca62c1d6, A, B, C, D, E)
 
 /**
  * sha_transform - single block SHA1 transform
  *
  * @digest: 160 bit digest to update
  * @data:   512 bits of data to hash
- * @array:  16 words of workspace (see note)
+ * @wipe:   true if the hash is security sensitive
  *
  * This function generates a SHA1 digest for a single 512-bit block.
  * Be warned, it does not handle padding and message digest, do not
  * confuse it with the full FIPS 180-1 digest algorithm for variable
  * length messages.
- *
- * Note: If the hash is security sensitive, the caller should be sure
- * to clear the workspace. This is left to the caller to avoid
- * unnecessary clears between chained hashing operations.
  */
-void sha_transform(__u32 *digest, const char *data, __u32 *array)
+void sha_transform(__u32 *digest, const void *data, bool wipe)
 {
 	__u32 A, B, C, D, E;
+	struct {
+		__u32 array[16];	/* working array */
+		__u32 aligned[16];	/* u32 aligned version of data */
+		const __u32 *aligned_data;	/* either data or aligned */
+	} workspace;
+	size_t wipe_size;
+
+	if (((unsigned long)data) & 3) {	/* unaligned word accesses */
+		workspace.aligned_data =
+			memcpy(workspace.aligned, data,
+			       sizeof(workspace.aligned));
+		wipe_size = sizeof(workspace);
+	} else {
+		workspace.aligned_data = data;
+		wipe_size = sizeof(workspace.array);
+	}
 
 	A = digest[0];
 	B = digest[1];
@@ -182,6 +204,9 @@ void sha_transform(__u32 *digest, const char *data, __u32 *array)
 	digest[2] += C;
 	digest[3] += D;
 	digest[4] += E;
+
+	if (wipe)
+		memset(&workspace, 0, wipe_size);
 }
 EXPORT_SYMBOL(sha_transform);
 
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 92bb943..8f429cd 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -37,8 +37,7 @@ __initcall(init_syncookies);
 #define COOKIEBITS 24	/* Upper bits store count */
 #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
 
-static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
-		      ipv4_cookie_scratch);
+static DEFINE_PER_CPU(__u32 [16 + 5], ipv4_cookie_scratch);
 
 static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
 		       u32 count, int c)
@@ -50,7 +49,7 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
 	tmp[1] = (__force u32)daddr;
 	tmp[2] = ((__force u32)sport << 16) + (__force u32)dport;
 	tmp[3] = count;
-	sha_transform(tmp + 16, (__u8 *)tmp, tmp + 16 + 5);
+	sha_transform(tmp + 16, tmp, false);
 
 	return tmp[17];
 }
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 882e0b0..454ed67 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2494,7 +2494,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 		}
 
 		if (opts.hash_size > 0) {
-			__u32 workspace[SHA_WORKSPACE_WORDS];
 			u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS];
 			u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1];
 
@@ -2510,9 +2509,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 			*tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source);
 			*tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */
 
-			sha_transform((__u32 *)&xvp->cookie_bakery[0],
-				      (char *)mess,
-				      &workspace[0]);
+			sha_transform((__u32 *)&xvp->cookie_bakery[0], mess,
+				      false);
 			opts.hash_location =
 				(__u8 *)&xvp->cookie_bakery[0];
 		}
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 89d5bf8..90823e0 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -63,8 +63,7 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
 	return child;
 }
 
-static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
-		      ipv6_cookie_scratch);
+static DEFINE_PER_CPU(__u32 [16 + 5], ipv6_cookie_scratch);
 
 static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr,
 		       __be16 sport, __be16 dport, u32 count, int c)
@@ -81,7 +80,7 @@ static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *dadd
 	memcpy(tmp + 4, daddr, 16);
 	tmp[8] = ((__force u32)sport << 16) + (__force u32)dport;
 	tmp[9] = count;
-	sha_transform(tmp + 16, (__u8 *)tmp, tmp + 16 + 5);
+	sha_transform(tmp + 16, tmp, false);
 
 	return tmp[17];
 }
-- 
1.7.6.405.gc1be0



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists