lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 10 Aug 2020 23:04:55 +0200
From:   Willy Tarreau <w@....eu>
To:     Linus Torvalds <torvalds@...ux-foundation.org>,
        George Spelvin <lkml@....org>, Florian Westphal <fw@...len.de>
Cc:     Netdev <netdev@...r.kernel.org>, Amit Klein <aksecurity@...il.com>,
        Eric Dumazet <edumazet@...gle.com>,
        "Jason A. Donenfeld" <Jason@...c4.com>,
        Andrew Lutomirski <luto@...nel.org>,
        Kees Cook <keescook@...omium.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        Peter Zijlstra <peterz@...radead.org>,
        "Theodore Ts'o" <tytso@....edu>,
        Marc Plumb <lkml.mplumb@...il.com>,
        Stephen Hemminger <stephen@...workplumber.org>
Subject: Re: [DRAFT PATCH] random32: make prandom_u32() output unpredictable

Linus, George, Florian,

would something in this vein be OK in your opinion ?

- update_process_times still updates the noise
- we don't touch the fast_pool anymore
- we don't read any TSC on hot paths
- we update the noise in xmit from jiffies and a few pointer values instead

I've applied it on top of George's patch rebased to mainline for simplicity.
I've used a separate per_cpu noise variable to keep the net_rand_state static
with its __latent_entropy.

With this I'm even getting very slightly better performance than with
the previous patch (195.7 - 197.8k cps).

What could be improved is the way the input values are mixed (just
added hence commutative for now). I didn't want to call a siphash
round on the hot paths, but just shifting the previous noise value
before adding would work, such as the following for example:

  void prandom_u32_add_noise(a, b, c, d)
  {
  	unsigned long *noise = get_cpu_ptr(&net_rand_noise);

  #if BITS_PER_LONG == 64
	*noise = rol64(*noise, 7) + a + b + c + d;
  #else
	*noise = rol32(*noise, 7) + a + b + c + d;
  #endif
  	put_cpu_ptr(&net_rand_noise);

  }

Thanks,
Willy

---


diff --git a/drivers/char/random.c b/drivers/char/random.c
index d20ba1b104ca..2a41b21623ae 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1277,7 +1277,6 @@ void add_interrupt_randomness(int irq, int irq_flags)
 
 	fast_mix(fast_pool);
 	add_interrupt_bench(cycles);
-	this_cpu_add(net_rand_state.s1, fast_pool->pool[cycles & 3]);
 
 	if (unlikely(crng_init == 0)) {
 		if ((fast_pool->count >= 64) &&
diff --git a/include/linux/prandom.h b/include/linux/prandom.h
index aa16e6468f91..e2b4990f2126 100644
--- a/include/linux/prandom.h
+++ b/include/linux/prandom.h
@@ -20,7 +20,7 @@ struct rnd_state {
 	__u32 s1, s2, s3, s4;
 };
 
-DECLARE_PER_CPU(struct rnd_state, net_rand_state);
+DECLARE_PER_CPU(unsigned long, net_rand_noise);
 
 u32 prandom_u32_state(struct rnd_state *state);
 void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes);
@@ -67,6 +67,7 @@ static inline void prandom_seed_state(struct rnd_state *state, u64 seed)
 	state->s2 = __seed(i,   8U);
 	state->s3 = __seed(i,  16U);
 	state->s4 = __seed(i, 128U);
+	__this_cpu_add(net_rand_noise, i);
 }
 
 /* Pseudo random number generator from numerical recipes. */
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index ae5029f984a8..2f07c569af77 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1721,7 +1721,7 @@ void update_process_times(int user_tick)
 	 * non-constant value that's not affine to the number of calls to make
 	 * sure it's updated when there's some activity (we don't care in idle).
 	 */
-	this_cpu_add(net_rand_state.s1, rol32(jiffies, 24) + user_tick);
+	__this_cpu_add(net_rand_noise, rol32(jiffies, 24) + user_tick);
 }
 
 /**
diff --git a/lib/random32.c b/lib/random32.c
index 2b048e2ea99f..d74cf1db8cc9 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -320,6 +320,8 @@ struct siprand_state {
 };
 
 static DEFINE_PER_CPU(struct siprand_state, net_rand_state) __latent_entropy;
+DEFINE_PER_CPU(unsigned long, net_rand_noise);
+EXPORT_SYMBOL(net_rand_noise);
 
 #if BITS_PER_LONG == 64
 /*
@@ -334,7 +336,7 @@ static DEFINE_PER_CPU(struct siprand_state, net_rand_state) __latent_entropy;
 #define K0 (0x736f6d6570736575 ^ 0x6c7967656e657261 )
 #define K1 (0x646f72616e646f6d ^ 0x7465646279746573 )
 
-#elif BITS_PER_LONG == 23
+#elif BITS_PER_LONG == 32
 /*
  * On 32-bit machines, we use HSipHash, a reduced-width version of SipHash.
  * This is weaker, but 32-bit machines are not used for high-traffic
@@ -374,9 +376,12 @@ static DEFINE_PER_CPU(struct siprand_state, net_rand_state) __latent_entropy;
 static u32 siprand_u32(struct siprand_state *s)
 {
 	unsigned long v0 = s->v[0], v1 = s->v[1], v2 = s->v[2], v3 = s->v[3];
+	unsigned long n = __this_cpu_read(net_rand_noise);
 
+	v3 ^= n;
 	SIPROUND(v0, v1, v2, v3);
 	SIPROUND(v0, v1, v2, v3);
+	v0 ^= n;
 	s->v[0] = v0;  s->v[1] = v1;  s->v[2] = v2;  s->v[3] = v3;
 	return v1 + v3;
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index 7df6c9617321..55a2471cd75b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -144,6 +144,7 @@
 #include <linux/indirect_call_wrapper.h>
 #include <net/devlink.h>
 #include <linux/pm_runtime.h>
+#include <linux/prandom.h>
 
 #include "net-sysfs.h"
 
@@ -3557,6 +3558,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
 		dev_queue_xmit_nit(skb, dev);
 
 	len = skb->len;
+	__this_cpu_add(net_rand_noise, (long)skb + (long)dev + (long)txq + len + jiffies);
 	trace_net_dev_start_xmit(skb, dev);
 	rc = netdev_start_xmit(skb, dev, txq, more);
 	trace_net_dev_xmit(skb, rc, dev, len);
@@ -4129,6 +4131,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
 			if (!skb)
 				goto out;
 
+			__this_cpu_add(net_rand_noise, (long)skb + (long)dev + (long)txq + jiffies);
 			HARD_TX_LOCK(dev, txq, cpu);
 
 			if (!netif_xmit_stopped(txq)) {
@@ -4194,6 +4197,7 @@ int dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
 
 	skb_set_queue_mapping(skb, queue_id);
 	txq = skb_get_tx_queue(dev, skb);
+	__this_cpu_add(net_rand_noise, (long)skb + (long)dev + (long)txq + jiffies);
 
 	local_bh_disable();
 

Powered by blists - more mailing lists