lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 03 May 2016 10:50:25 +0200
From:	Stephan Mueller <smueller@...onox.de>
To:	Theodore Ts'o <tytso@....edu>
Cc:	linux-kernel@...r.kernel.org, herbert@...dor.apana.org.au,
	andi@...stfloor.org, sandyinchina@...il.com,
	cryptography@...edaemon.net, jsd@...n.com, hpa@...or.com,
	linux-crypto@...r.kernel.org
Subject: Re: [PATCH 1/3] random: replace non-blocking pool with a Chacha20-based CRNG

Am Montag, 2. Mai 2016, 02:26:51 schrieb Theodore Ts'o:

Hi Theodore,

> The CRNG is faster, and we don't pretend to track entropy usage in the
> CRNG any more.

In general, I have no concerns with this approach either. And thank you that 
some of my concerns are addressed.

There are few more concerns left open. I would suggest I would write them up 
with a proposal on how to address them.

Some comments inlne:
> 
> Signed-off-by: Theodore Ts'o <tytso@....edu>
> ---
>  crypto/chacha20_generic.c |  61 ----------
>  drivers/char/random.c     | 282
> ++++++++++++++++++++++++++++++++++------------ include/crypto/chacha20.h | 
>  1 +
>  lib/Makefile              |   2 +-
>  lib/chacha20.c            |  79 +++++++++++++
>  5 files changed, 294 insertions(+), 131 deletions(-)
>  create mode 100644 lib/chacha20.c
> 
> diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c
> index da9c899..1cab831 100644
> --- a/crypto/chacha20_generic.c
> +++ b/crypto/chacha20_generic.c
> @@ -15,72 +15,11 @@
>  #include <linux/module.h>
>  #include <crypto/chacha20.h>
> 
> -static inline u32 rotl32(u32 v, u8 n)
> -{
> -	return (v << n) | (v >> (sizeof(v) * 8 - n));
> -}
> -
>  static inline u32 le32_to_cpuvp(const void *p)
>  {
>  	return le32_to_cpup(p);
>  }
> 
> -static void chacha20_block(u32 *state, void *stream)
> -{
> -	u32 x[16], *out = stream;
> -	int i;
> -
> -	for (i = 0; i < ARRAY_SIZE(x); i++)
> -		x[i] = state[i];
> -
> -	for (i = 0; i < 20; i += 2) {
> -		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],  16);
> -		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],  16);
> -		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],  16);
> -		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],  16);
> -
> -		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],  12);
> -		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],  12);
> -		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10], 12);
> -		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11], 12);
> -
> -		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],   8);
> -		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],   8);
> -		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],   8);
> -		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],   8);
> -
> -		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],   7);
> -		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],   7);
> -		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10],  7);
> -		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11],  7);
> -
> -		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],  16);
> -		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],  16);
> -		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],  16);
> -		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],  16);
> -
> -		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10], 12);
> -		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11], 12);
> -		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],  12);
> -		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],  12);
> -
> -		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],   8);
> -		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],   8);
> -		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],   8);
> -		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],   8);
> -
> -		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10],  7);
> -		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11],  7);
> -		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],   7);
> -		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],   7);
> -	}
> -
> -	for (i = 0; i < ARRAY_SIZE(x); i++)
> -		out[i] = cpu_to_le32(x[i] + state[i]);
> -
> -	state[12]++;
> -}
> -
>  static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src,
>  			     unsigned int bytes)
>  {
> diff --git a/drivers/char/random.c b/drivers/char/random.c
> index b583e53..95f4451 100644
> --- a/drivers/char/random.c
> +++ b/drivers/char/random.c
> @@ -260,6 +260,7 @@
>  #include <linux/irq.h>
>  #include <linux/syscalls.h>
>  #include <linux/completion.h>
> +#include <crypto/chacha20.h>
> 
>  #include <asm/processor.h>
>  #include <asm/uaccess.h>
> @@ -412,6 +413,15 @@ static struct fasync_struct *fasync;
>  static DEFINE_SPINLOCK(random_ready_list_lock);
>  static LIST_HEAD(random_ready_list);
> 
> +/*
> + * crng_init =  0 --> Uninitialized
> + *		2 --> Initialized
> + *		3 --> Initialized from input_pool
> + */
> +static int crng_init = 0;

shouldn't that be an atomic_t ?

> +#define crng_ready() (likely(crng_init >= 2))
> +static void process_random_ready_list(void);
> +
>  /**********************************************************************
>   *
>   * OS independent entropy store.   Here are the functions which handle
> @@ -441,10 +451,13 @@ struct entropy_store {
>  	__u8 last_data[EXTRACT_SIZE];
>  };
> 
> +static ssize_t extract_entropy(struct entropy_store *r, void *buf,
> +			       size_t nbytes, int min, int rsvd);
> +
> +static int crng_reseed(struct entropy_store *r);
>  static void push_to_pool(struct work_struct *work);
>  static __u32 input_pool_data[INPUT_POOL_WORDS];
>  static __u32 blocking_pool_data[OUTPUT_POOL_WORDS];
> -static __u32 nonblocking_pool_data[OUTPUT_POOL_WORDS];
> 
>  static struct entropy_store input_pool = {
>  	.poolinfo = &poolinfo_table[0],
> @@ -465,16 +478,6 @@ static struct entropy_store blocking_pool = {
>  					push_to_pool),
>  };
> 
> -static struct entropy_store nonblocking_pool = {
> -	.poolinfo = &poolinfo_table[1],
> -	.name = "nonblocking",
> -	.pull = &input_pool,
> -	.lock = __SPIN_LOCK_UNLOCKED(nonblocking_pool.lock),
> -	.pool = nonblocking_pool_data,
> -	.push_work = __WORK_INITIALIZER(nonblocking_pool.push_work,
> -					push_to_pool),
> -};
> -
>  static __u32 const twist_table[8] = {
>  	0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158,
>  	0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 };
> @@ -677,12 +680,6 @@ retry:
>  	if (!r->initialized && r->entropy_total > 128) {
>  		r->initialized = 1;
>  		r->entropy_total = 0;
> -		if (r == &nonblocking_pool) {
> -			prandom_reseed_late();
> -			process_random_ready_list();
> -			wake_up_all(&urandom_init_wait);
> -			pr_notice("random: %s pool is initialized\n", r-
>name);
> -		}
>  	}
> 
>  	trace_credit_entropy_bits(r->name, nbits,
> @@ -692,30 +689,27 @@ retry:
>  	if (r == &input_pool) {
>  		int entropy_bits = entropy_count >> ENTROPY_SHIFT;
> 
> +		if (crng_init < 3 && entropy_bits >= 128) {
> +			(void) crng_reseed(r);
> +			entropy_bits = r->entropy_count >> ENTROPY_SHIFT;
> +		}
> +
>  		/* should we wake readers? */
>  		if (entropy_bits >= random_read_wakeup_bits) {
>  			wake_up_interruptible(&random_read_wait);
>  			kill_fasync(&fasync, SIGIO, POLL_IN);
>  		}
>  		/* If the input pool is getting full, send some
> -		 * entropy to the two output pools, flipping back and
> -		 * forth between them, until the output pools are 75%
> -		 * full.
> +		 * entropy to the blocking pool until it is 75% full.
>  		 */
>  		if (entropy_bits > random_write_wakeup_bits &&
>  		    r->initialized &&
>  		    r->entropy_total >= 2*random_read_wakeup_bits) {
> -			static struct entropy_store *last = &blocking_pool;
>  			struct entropy_store *other = &blocking_pool;
> 
> -			if (last == &blocking_pool)
> -				other = &nonblocking_pool;
>  			if (other->entropy_count <=
> -			    3 * other->poolinfo->poolfracbits / 4)
> -				last = other;
> -			if (last->entropy_count <=
> -			    3 * last->poolinfo->poolfracbits / 4) {
> -				schedule_work(&last->push_work);
> +			    3 * other->poolinfo->poolfracbits / 4) {
> +				schedule_work(&other->push_work);
>  				r->entropy_total = 0;
>  			}
>  		}
> @@ -735,6 +729,158 @@ static void credit_entropy_bits_safe(struct
> entropy_store *r, int nbits)
> 
>  /*********************************************************************
>   *
> + * CRNG using CHACHA20
> + *
> + *********************************************************************/
> +
> +#define CRNG_RESEED_INTERVAL (300*HZ)
> +
> +struct crng_state {
> +	__u32		state[16];
> +	unsigned long	init_time;
> +	spinlock_t	lock;
> +};
> +
> +struct crng_state primary_crng = {
> +	.lock = __SPIN_LOCK_UNLOCKED(primary_crng.lock),
> +};
> +static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
> +
> +static void _initialize_crng(struct crng_state *crng)
> +{
> +	int		i;
> +	unsigned long	rv;

Why do you use unsigned long here? I thought the state[i] is unsigned int.
> +
> +	memcpy(&crng->state[0], "expand 32-byte k", 16);
> +	for (i = 4; i < 16; i++) {
> +		if (!arch_get_random_seed_long(&rv) &&
> +		    !arch_get_random_long(&rv))
> +			rv = random_get_entropy();
> +		crng->state[i] ^= rv;
> +	}
> +	crng->init_time = jiffies - CRNG_RESEED_INTERVAL;

Would it make sense to add the ChaCha20 self test vectors from RFC7539 here to 
test that the ChaCha20 works?

> +}
> +
> +static void initialize_crng(struct crng_state *crng)
> +{
> +	_initialize_crng(crng);
> +	spin_lock_init(&crng->lock);
> +}
> +
> +static int crng_fast_load(__u32 pool[4])
> +{
> +	int	i;
> +	__u32	*p;
> +
> +	if (!spin_trylock(&primary_crng.lock))
> +		return 0;
> +	if (crng_ready()) {
> +		spin_unlock(&primary_crng.lock);
> +		return 0;
> +	}
> +	p = &primary_crng.state[4];
> +	if (crng_init == 1)
> +		p += 4;
> +	for (i=0; i < 4; i++)
> +		*p ^= pool[i];
> +	if (crng_init++ >= 2)
> +		wake_up_interruptible(&crng_init_wait);

Don't we have a race here with the crng_init < 3 check in crng_reseed 
considering multi-core systems?

> +	pr_notice("random: crng_init %d\n", crng_init);
> +	spin_unlock(&primary_crng.lock);
> +	return 1;
> +}
> +
> +/* Returns 1 on success */
> +static int crng_reseed(struct entropy_store *r)
> +{
> +	unsigned long	flags;
> +	int		ret = 0;
> +	int		i, num, num_words;
> +	__u32		tmp[16];
> +
> +	spin_lock_irqsave(&primary_crng.lock, flags);
> +	num = extract_entropy(r, tmp, 32, 16, 0);
> +	if (num == 0)
> +		goto out;
> +	if (num < 16 || num > 32) {
> +		WARN_ON(1);
> +		pr_err("crng_reseed: num is %d?!?\n", num);
> +	}
> +	num_words = (num + 3) / 4;
> +	for (i = 0; i < num_words; i++)
> +		primary_crng.state[i+4] ^= tmp[i];
> +	primary_crng.init_time = jiffies;
> +	if (crng_init < 3) {

Shouldn't that one be if (crng_init < 3 && num >= 16) ?

> +		crng_init = 3;
> +		process_random_ready_list();
> +		wake_up_interruptible(&crng_init_wait);
> +		pr_notice("random: crng_init 3\n");

Would it make sense to be more descriptive here to allow readers of dmesg to 
understand the output?

> +	}
> +	ret = 1;
> +out:
> +	spin_unlock_irqrestore(&primary_crng.lock, flags);

memzero_explicit of tmp?

> +	return ret;
> +}
> +
> +static inline void crng_wait_ready(void)
> +{
> +	wait_event_interruptible(crng_init_wait, crng_ready());
> +}
> +
> +static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE])
> +{
> +	unsigned long v, flags;
> +	struct crng_state *crng = &primary_crng;
> +
> +	if (crng_init > 2 &&
> +	    time_after(jiffies, crng->init_time + CRNG_RESEED_INTERVAL))
> +		crng_reseed(&input_pool);
> +	spin_lock_irqsave(&crng->lock, flags);
> +	if (arch_get_random_long(&v))
> +		crng->state[14] ^= v;

Again, unsigned int?

What is the purpose to only cover the 2nd 32 bit value of the nonce with 
arch_get_random?

> +	chacha20_block(&crng->state[0], out);
> +	if (crng->state[12] == 0)
> +		crng->state[13]++;

state[12]++? Or why do you increment the nonce?

> +	spin_unlock_irqrestore(&crng->lock, flags);
> +}
> +
> +static ssize_t extract_crng_user(void __user *buf, size_t nbytes)
> +{
> +	ssize_t ret = 0, i;
> +	__u8 tmp[CHACHA20_BLOCK_SIZE];
> +	int large_request = (nbytes > 256);
> +
> +	while (nbytes) {
> +		if (large_request && need_resched()) {
> +			if (signal_pending(current)) {
> +				if (ret == 0)
> +					ret = -ERESTARTSYS;
> +				break;
> +			}
> +			schedule();
> +		}
> +
> +		extract_crng(tmp);

Now I have to wear my (ugly) FIPS heat: we need that code from the current 
implementation here:

                if (fips_enabled) {
                        spin_lock_irqsave(&r->lock, flags);
                        if (!memcmp(tmp, r->last_data, EXTRACT_SIZE))
                                panic("Hardware RNG duplicated output!\n");
                        memcpy(r->last_data, tmp, EXTRACT_SIZE);
                        spin_unlock_irqrestore(&r->lock, flags);
                }


> +		i = min_t(int, nbytes, CHACHA20_BLOCK_SIZE);
> +		if (copy_to_user(buf, tmp, i)) {
> +			ret = -EFAULT;
> +			break;
> +		}
> +
> +		nbytes -= i;
> +		buf += i;
> +		ret += i;
> +	}
> +
> +	/* Wipe data just written to memory */
> +	memzero_explicit(tmp, sizeof(tmp));
> +
> +	return ret;
> +}
> +
> +
> +/*********************************************************************
> + *
>   * Entropy input management
>   *
>   *********************************************************************/
> @@ -749,12 +895,12 @@ struct timer_rand_state {
>  #define INIT_TIMER_RAND_STATE { INITIAL_JIFFIES, };
> 
>  /*
> - * Add device- or boot-specific data to the input and nonblocking
> - * pools to help initialize them to unique values.
> + * Add device- or boot-specific data to the input pool to help
> + * initialize it.
>   *
> - * None of this adds any entropy, it is meant to avoid the
> - * problem of the nonblocking pool having similar initial state
> - * across largely identical devices.
> + * None of this adds any entropy; it is meant to avoid the problem of
> + * the entropy pool having similar initial state across largely
> + * identical devices.
>   */
>  void add_device_randomness(const void *buf, unsigned int size)
>  {
> @@ -766,11 +912,6 @@ void add_device_randomness(const void *buf, unsigned
> int size) _mix_pool_bytes(&input_pool, buf, size);
>  	_mix_pool_bytes(&input_pool, &time, sizeof(time));
>  	spin_unlock_irqrestore(&input_pool.lock, flags);
> -
> -	spin_lock_irqsave(&nonblocking_pool.lock, flags);
> -	_mix_pool_bytes(&nonblocking_pool, buf, size);
> -	_mix_pool_bytes(&nonblocking_pool, &time, sizeof(time));
> -	spin_unlock_irqrestore(&nonblocking_pool.lock, flags);
>  }
>  EXPORT_SYMBOL(add_device_randomness);
> 
> @@ -801,7 +942,7 @@ static void add_timer_randomness(struct timer_rand_state
> *state, unsigned num) sample.jiffies = jiffies;
>  	sample.cycles = random_get_entropy();
>  	sample.num = num;
> -	r = nonblocking_pool.initialized ? &input_pool : &nonblocking_pool;
> +	r = &input_pool;
>  	mix_pool_bytes(r, &sample, sizeof(sample));
> 
>  	/*
> @@ -921,7 +1062,13 @@ void add_interrupt_randomness(int irq, int irq_flags)
>  	    !time_after(now, fast_pool->last + HZ))
>  		return;
> 
> -	r = nonblocking_pool.initialized ? &input_pool : &nonblocking_pool;
> +	if (!crng_ready() && crng_fast_load(fast_pool->pool)) {
> +		fast_pool->count = 0;
> +		fast_pool->last = now;
> +		return;
> +	}
> +
> +	r = &input_pool;
>  	if (!spin_trylock(&r->lock))
>  		return;
> 
> @@ -964,9 +1111,6 @@ EXPORT_SYMBOL_GPL(add_disk_randomness);
>   *
>   *********************************************************************/
> 
> -static ssize_t extract_entropy(struct entropy_store *r, void *buf,
> -			       size_t nbytes, int min, int rsvd);
> -
>  /*
>   * This utility inline function is responsible for transferring entropy
>   * from the primary pool to the secondary extraction pool. We make
> @@ -1252,15 +1396,26 @@ static ssize_t extract_entropy_user(struct
> entropy_store *r, void __user *buf, */
>  void get_random_bytes(void *buf, int nbytes)
>  {
> +	__u8 tmp[CHACHA20_BLOCK_SIZE];
> +
>  #if DEBUG_RANDOM_BOOT > 0
> -	if (unlikely(nonblocking_pool.initialized == 0))
> +	if (!crng_ready())
>  		printk(KERN_NOTICE "random: %pF get_random_bytes called "
> -		       "with %d bits of entropy available\n",
> -		       (void *) _RET_IP_,
> -		       nonblocking_pool.entropy_total);
> +		       "with crng_init = %d\n", (void *) _RET_IP_, crng_init);
>  #endif
>  	trace_get_random_bytes(nbytes, _RET_IP_);
> -	extract_entropy(&nonblocking_pool, buf, nbytes, 0, 0);
> +
> +	while (nbytes >= CHACHA20_BLOCK_SIZE) {
> +		extract_crng(buf);
> +		buf += CHACHA20_BLOCK_SIZE;
> +		nbytes -= CHACHA20_BLOCK_SIZE;
> +	}
> +
> +	if (nbytes > 0) {
> +		extract_crng(tmp);
> +		memcpy(buf, tmp, nbytes);
> +		memzero_explicit(tmp, nbytes);
> +	}
>  }
>  EXPORT_SYMBOL(get_random_bytes);
> 
> @@ -1278,7 +1433,7 @@ int add_random_ready_callback(struct
> random_ready_callback *rdy) unsigned long flags;
>  	int err = -EALREADY;
> 
> -	if (likely(nonblocking_pool.initialized))
> +	if (crng_ready())
>  		return err;
> 
>  	owner = rdy->owner;
> @@ -1286,7 +1441,7 @@ int add_random_ready_callback(struct
> random_ready_callback *rdy) return -ENOENT;
> 
>  	spin_lock_irqsave(&random_ready_list_lock, flags);
> -	if (nonblocking_pool.initialized)
> +	if (crng_ready())
>  		goto out;
> 
>  	owner = NULL;
> @@ -1350,7 +1505,7 @@ void get_random_bytes_arch(void *buf, int nbytes)
>  	}
> 
>  	if (nbytes)
> -		extract_entropy(&nonblocking_pool, p, nbytes, 0, 0);
> +		get_random_bytes(p, nbytes);
>  }
>  EXPORT_SYMBOL(get_random_bytes_arch);
> 
> @@ -1395,7 +1550,7 @@ static int rand_initialize(void)
>  {
>  	init_std_data(&input_pool);
>  	init_std_data(&blocking_pool);
> -	init_std_data(&nonblocking_pool);
> +	_initialize_crng(&primary_crng);
>  	return 0;
>  }
>  early_initcall(rand_initialize);
> @@ -1459,16 +1614,10 @@ urandom_read(struct file *file, char __user *buf,
> size_t nbytes, loff_t *ppos) {
>  	int ret;
> 
> -	if (unlikely(nonblocking_pool.initialized == 0))
> -		printk_once(KERN_NOTICE "random: %s urandom read "
> -			    "with %d bits of entropy available\n",
> -			    current->comm, nonblocking_pool.entropy_total);
> -
> +	crng_wait_ready();

Just for clarification: are you now blocking /dev/urandom until the CRNG is 
filled? That would be a big win.

>  	nbytes = min_t(size_t, nbytes, INT_MAX >> (ENTROPY_SHIFT + 3));
> -	ret = extract_entropy_user(&nonblocking_pool, buf, nbytes);
> -
> -	trace_urandom_read(8 * nbytes, ENTROPY_BITS(&nonblocking_pool),
> -			   ENTROPY_BITS(&input_pool));
> +	ret = extract_crng_user(buf, nbytes);
> +	trace_urandom_read(8 * nbytes, 0, ENTROPY_BITS(&input_pool));
>  	return ret;
>  }
> 
> @@ -1514,10 +1663,7 @@ static ssize_t random_write(struct file *file, const
> char __user *buffer, {
>  	size_t ret;
> 
> -	ret = write_pool(&blocking_pool, buffer, count);
> -	if (ret)
> -		return ret;
> -	ret = write_pool(&nonblocking_pool, buffer, count);
> +	ret = write_pool(&input_pool, buffer, count);
>  	if (ret)
>  		return ret;
> 
> @@ -1568,7 +1714,6 @@ static long random_ioctl(struct file *f, unsigned int
> cmd, unsigned long arg) if (!capable(CAP_SYS_ADMIN))
>  			return -EPERM;
>  		input_pool.entropy_count = 0;
> -		nonblocking_pool.entropy_count = 0;
>  		blocking_pool.entropy_count = 0;
>  		return 0;
>  	default:
> @@ -1610,11 +1755,10 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf,
> size_t, count, if (flags & GRND_RANDOM)
>  		return _random_read(flags & GRND_NONBLOCK, buf, count);
> 
> -	if (unlikely(nonblocking_pool.initialized == 0)) {
> +	if (!crng_ready()) {
>  		if (flags & GRND_NONBLOCK)
>  			return -EAGAIN;
> -		wait_event_interruptible(urandom_init_wait,
> -					 nonblocking_pool.initialized);
> +		crng_wait_ready();
>  		if (signal_pending(current))
>  			return -ERESTARTSYS;
>  	}
> diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h
> index 274bbae..20d20f68 100644
> --- a/include/crypto/chacha20.h
> +++ b/include/crypto/chacha20.h
> @@ -16,6 +16,7 @@ struct chacha20_ctx {
>  	u32 key[8];
>  };
> 
> +void chacha20_block(u32 *state, void *stream);
>  void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv);
>  int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key,
>  			   unsigned int keysize);
> diff --git a/lib/Makefile b/lib/Makefile
> index 7bd6fd4..9ba27cd 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -22,7 +22,7 @@ KCOV_INSTRUMENT_hweight.o := n
>  lib-y := ctype.o string.o vsprintf.o cmdline.o \
>  	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
>  	 idr.o int_sqrt.o extable.o \
> -	 sha1.o md5.o irq_regs.o argv_split.o \
> +	 sha1.o chacha20.o md5.o irq_regs.o argv_split.o \
>  	 proportions.o flex_proportions.o ratelimit.o show_mem.o \
>  	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
>  	 earlycpio.o seq_buf.o nmi_backtrace.o
> diff --git a/lib/chacha20.c b/lib/chacha20.c
> new file mode 100644
> index 0000000..250ceed
> --- /dev/null
> +++ b/lib/chacha20.c
> @@ -0,0 +1,79 @@
> +/*
> + * ChaCha20 256-bit cipher algorithm, RFC7539
> + *
> + * Copyright (C) 2015 Martin Willi
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/export.h>
> +#include <linux/bitops.h>
> +#include <linux/cryptohash.h>
> +#include <asm/unaligned.h>
> +#include <crypto/chacha20.h>
> +
> +static inline u32 rotl32(u32 v, u8 n)
> +{
> +	return (v << n) | (v >> (sizeof(v) * 8 - n));
> +}
> +
> +extern void chacha20_block(u32 *state, void *stream)
> +{
> +	u32 x[16], *out = stream;
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(x); i++)
> +		x[i] = state[i];
> +
> +	for (i = 0; i < 20; i += 2) {
> +		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],  16);
> +		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],  16);
> +		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],  16);
> +		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],  16);
> +
> +		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],  12);
> +		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],  12);
> +		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10], 12);
> +		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11], 12);
> +
> +		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],   8);
> +		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],   8);
> +		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],   8);
> +		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],   8);
> +
> +		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],   7);
> +		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],   7);
> +		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10],  7);
> +		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11],  7);
> +
> +		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],  16);
> +		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],  16);
> +		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],  16);
> +		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],  16);
> +
> +		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10], 12);
> +		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11], 12);
> +		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],  12);
> +		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],  12);
> +
> +		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],   8);
> +		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],   8);
> +		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],   8);
> +		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],   8);
> +
> +		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10],  7);
> +		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11],  7);
> +		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],   7);
> +		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],   7);
> +	}
> +
> +	for (i = 0; i < ARRAY_SIZE(x); i++)
> +		out[i] = cpu_to_le32(x[i] + state[i]);
> +
> +	state[12]++;
> +}
> +EXPORT_SYMBOL(chacha20_block);


Ciao
Stephan

Powered by blists - more mailing lists