extract_buf() allocates a temporary buffer, copies a prefix to a caller-provided temporary buffer, then wipes it. By just having the caller allocate the full size, extract_buf() can work in-place, saving stack space, copying, and wiping. The FIPS initialization in extract_entropy() required some rejiggering, since that would have allocated a second (enlarged) buffer on the stack in addition to the one in _extract_entropy(). I had hoped there would be a code size reduction, but it's +80 bytes. :-( I guess the extra indirections in extract_buf() more than make up for the rest. text data bss dec hex filename 17783 1405 864 20052 4e54 drivers/char/random.o.old 17863 1405 864 20132 4ea4 drivers/char/random.o.new Signed-off-by: George Spelvin --- I started working on the idea in my previous e-mail, and spotted this cleanup. Only compile-tested so far; I don't want to stop to reboot. drivers/char/random.c | 105 +++++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 53 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 02e80000310c..38bb80a9efa2 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -357,11 +357,26 @@ #define OUTPUT_POOL_SHIFT 10 #define OUTPUT_POOL_WORDS (1 << (OUTPUT_POOL_SHIFT-5)) #define SEC_XFER_SIZE 512 -#define EXTRACT_SIZE 10 +#define SHA_DIGEST_BYTES (SHA_DIGEST_WORDS * 4) +#define EXTRACT_SIZE (SHA_DIGEST_BYTES / 2) #define LONGS(x) (((x) + sizeof(unsigned long) - 1)/sizeof(unsigned long)) +/* + * This buffer is used by the extract_buf function, and includes + * additional working space it needs. By having the caller + * allocate it, we save stack space, copying, and wiping overhead. + */ +union extract_buf { + __u8 b[EXTRACT_SIZE]; + struct { + __u32 w[SHA_DIGEST_WORDS]; + __u32 workspace[SHA_WORKSPACE_WORDS]; + }; + unsigned long l[LONGS(SHA_DIGEST_BYTES)]; +}; + /* * To allow fractional bits to be tracked, the entropy_count field is * denominated in units of 1/16 bits. @@ -1548,34 +1563,29 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, * This function does the actual extraction for extract_entropy and * extract_entropy_user. * - * Note: we assume that .poolwords is a multiple of 16 words. + * Note: we assume that .poolbytes is a multiple of SHA_MESSAGE_BYTES = 64. */ -static void extract_buf(struct entropy_store *r, __u8 *out) +static void extract_buf(struct entropy_store *r, union extract_buf *out) { int i; - union { - __u32 w[5]; - unsigned long l[LONGS(20)]; - } hash; - __u32 workspace[SHA_WORKSPACE_WORDS]; unsigned long flags; /* * If we have an architectural hardware random number * generator, use it for SHA's initial vector */ - sha_init(hash.w); - for (i = 0; i < LONGS(20); i++) { + sha_init(out->w); + for (i = 0; i < ARRAY_SIZE(out->l); i++) { unsigned long v; if (!arch_get_random_long(&v)) break; - hash.l[i] = v; + out->l[i] = v; } - /* Generate a hash across the pool, 16 words (512 bits) at a time */ + /* Generate a hash across the pool, 64 bytes (512 bits) at a time */ spin_lock_irqsave(&r->lock, flags); - for (i = 0; i < r->poolinfo->poolwords; i += 16) - sha_transform(hash.w, (__u8 *)(r->pool + i), workspace); + for (i = 0; i < r->poolinfo->poolbytes; i += SHA_MESSAGE_BYTES) + sha_transform(out->w, (__u8 *)r->pool + i, out->workspace); /* * We mix the hash back into the pool to prevent backtracking @@ -1586,50 +1596,50 @@ static void extract_buf(struct entropy_store *r, __u8 *out) * brute-forcing the feedback as hard as brute-forcing the * hash. */ - __mix_pool_bytes(r, hash.w, sizeof(hash.w)); + __mix_pool_bytes(r, out->w, sizeof(out->w)); spin_unlock_irqrestore(&r->lock, flags); - memzero_explicit(workspace, sizeof(workspace)); - /* * In case the hash function has some recognizable output * pattern, we fold it in half. Thus, we always feed back * twice as much data as we output. */ - hash.w[0] ^= hash.w[3]; - hash.w[1] ^= hash.w[4]; - hash.w[2] ^= rol32(hash.w[2], 16); - - memcpy(out, &hash, EXTRACT_SIZE); - memzero_explicit(&hash, sizeof(hash)); + out->w[0] ^= out->w[3]; + out->w[1] ^= out->w[4]; + out->w[2] ^= rol32(out->w[2], 16); } static ssize_t _extract_entropy(struct entropy_store *r, void *buf, size_t nbytes, int fips) { - ssize_t ret = 0, i; - __u8 tmp[EXTRACT_SIZE]; - unsigned long flags; + ssize_t ret = 0; + union extract_buf tmp; while (nbytes) { - extract_buf(r, tmp); + ssize_t i; + + extract_buf(r, &tmp); if (fips) { + unsigned long flags; + spin_lock_irqsave(&r->lock, flags); - if (!memcmp(tmp, r->last_data, EXTRACT_SIZE)) + if (unlikely(!r->last_data_init)) + r->last_data_init = 1; + else if (!memcmp(tmp.b, r->last_data, EXTRACT_SIZE)) panic("Hardware RNG duplicated output!\n"); - memcpy(r->last_data, tmp, EXTRACT_SIZE); + memcpy(r->last_data, tmp.b, EXTRACT_SIZE); spin_unlock_irqrestore(&r->lock, flags); } i = min_t(int, nbytes, EXTRACT_SIZE); - memcpy(buf, tmp, i); + memcpy(buf, tmp.b, i); nbytes -= i; buf += i; ret += i; } /* Wipe data just returned from memory */ - memzero_explicit(tmp, sizeof(tmp)); + memzero_explicit(&tmp, sizeof(tmp)); return ret; } @@ -1646,24 +1656,13 @@ static ssize_t _extract_entropy(struct entropy_store *r, void *buf, static ssize_t extract_entropy(struct entropy_store *r, void *buf, size_t nbytes, int min, int reserved) { - __u8 tmp[EXTRACT_SIZE]; - unsigned long flags; - - /* if last_data isn't primed, we need EXTRACT_SIZE extra bytes */ - if (fips_enabled) { - spin_lock_irqsave(&r->lock, flags); - if (!r->last_data_init) { - r->last_data_init = 1; - spin_unlock_irqrestore(&r->lock, flags); - trace_extract_entropy(r->name, EXTRACT_SIZE, - ENTROPY_BITS(r), _RET_IP_); - xfer_secondary_pool(r, EXTRACT_SIZE); - extract_buf(r, tmp); - spin_lock_irqsave(&r->lock, flags); - memcpy(r->last_data, tmp, EXTRACT_SIZE); - } - spin_unlock_irqrestore(&r->lock, flags); - } + /* + * If last_data isn't primed, prime it. We don't lock for the + * check, so there's a tiny chance two CPUs race and do this + * redundantly, but that's harmless. + */ + if (fips_enabled && unlikely(!r->last_data_init)) + _extract_entropy(r, buf, 1, fips_enabled); trace_extract_entropy(r->name, nbytes, ENTROPY_BITS(r), _RET_IP_); xfer_secondary_pool(r, nbytes); @@ -1680,7 +1679,7 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf, size_t nbytes) { ssize_t ret = 0, i; - __u8 tmp[EXTRACT_SIZE]; + union extract_buf tmp; int large_request = (nbytes > 256); trace_extract_entropy_user(r->name, nbytes, ENTROPY_BITS(r), _RET_IP_); @@ -1702,9 +1701,9 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf, schedule(); } - extract_buf(r, tmp); + extract_buf(r, &tmp); i = min_t(int, nbytes, EXTRACT_SIZE); - if (copy_to_user(buf, tmp, i)) { + if (copy_to_user(buf, tmp.b, i)) { ret = -EFAULT; break; } @@ -1715,7 +1714,7 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf, } /* Wipe data just returned from memory */ - memzero_explicit(tmp, sizeof(tmp)); + memzero_explicit(&tmp, sizeof(tmp)); return ret; } -- 2.26.0