lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200224204105.GA24543@agluck-desk2.amr.corp.intel.com>
Date:   Mon, 24 Feb 2020 12:41:05 -0800
From:   "Luck, Tony" <tony.luck@...el.com>
To:     "Theodore Y. Ts'o" <tytso@....edu>
Cc:     "Jason A. Donenfeld" <Jason@...c4.com>,
        Tony Luck <tony.luck@...il.com>,
        Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        Linux Kernel Mailing List <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH] random: always use batched entropy for
 get_random_u{32,64}

On Fri, Feb 21, 2020 at 07:41:33PM -0500, Theodore Y. Ts'o wrote:
> On Fri, Feb 21, 2020 at 09:08:19PM +0100, Jason A. Donenfeld wrote:
> > On Thu, Feb 20, 2020 at 11:29 PM Tony Luck <tony.luck@...il.com> wrote:
> > >
> > > Could we just disable interrupts and pre-emption around the entropy extraction?
> > 
> > Probably, yes... We can address this in a separate patch.
> 
> No, we can't; take a look at invalidate_batched_entropy(), where we
> need invalidate all of per-cpu batched entropy from a single CPU after
> we have initialized the the CRNG.
> 
> Since most of the time after CRNG initialization, the spinlock for
> each CPU will be on that CPU's cacheline, the time to take and release
> the spinlock is not going to be material.

So we could get rid of the spin lock by replacing with a "bool"
that is written when we want to do an invalidate on the next call
(where it is read and cleared).

For me it makes a 15 cycle difference (56 vs. 71) for the fast
case when we are just picking a value from the array. The slow
path when we do extract_crng() is barely changed (731 vs 736 cycles).

But I took the "do lazily" comment above invalidate_batched_entropy()
very literally and didn't add any fences to make sure that readers
of need_invalidate see the store ASAP. So a close race where the
invalidate request would have won control of the spin lock might
not get processed until a subsequent call.

If you think a fence is needed, the the advantage will be lost
and the below patch is worthless.

-Tony

diff --git a/drivers/char/random.c b/drivers/char/random.c
index a6b77a850ddd..6fb222996ea4 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -2144,7 +2144,7 @@ struct batched_entropy {
 		u32 entropy_u32[CHACHA_BLOCK_SIZE / sizeof(u32)];
 	};
 	unsigned int position;
-	spinlock_t batch_lock;
+	bool need_invalidate;
 };
 
 /*
@@ -2155,9 +2155,7 @@ struct batched_entropy {
  * wait_for_random_bytes() should be called and return 0 at least once at any
  * point prior.
  */
-static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = {
-	.batch_lock	= __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock),
-};
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64);
 
 u64 get_random_u64(void)
 {
@@ -2168,21 +2166,23 @@ u64 get_random_u64(void)
 
 	warn_unseeded_randomness(&previous);
 
+	local_irq_save(flags);
+	preempt_disable();
 	batch = raw_cpu_ptr(&batched_entropy_u64);
-	spin_lock_irqsave(&batch->batch_lock, flags);
-	if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
+	if (batch->need_invalidate ||
+	    batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
 		extract_crng((u8 *)batch->entropy_u64);
 		batch->position = 0;
+		batch->need_invalidate = false;
 	}
 	ret = batch->entropy_u64[batch->position++];
-	spin_unlock_irqrestore(&batch->batch_lock, flags);
+	preempt_enable();
+	local_irq_restore(flags);
 	return ret;
 }
 EXPORT_SYMBOL(get_random_u64);
 
-static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = {
-	.batch_lock	= __SPIN_LOCK_UNLOCKED(batched_entropy_u32.lock),
-};
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32);
 u32 get_random_u32(void)
 {
 	u32 ret;
@@ -2192,14 +2192,18 @@ u32 get_random_u32(void)
 
 	warn_unseeded_randomness(&previous);
 
+	local_irq_save(flags);
+	preempt_disable();
 	batch = raw_cpu_ptr(&batched_entropy_u32);
-	spin_lock_irqsave(&batch->batch_lock, flags);
-	if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) {
+	if (batch->need_invalidate ||
+	    batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) {
 		extract_crng((u8 *)batch->entropy_u32);
 		batch->position = 0;
+		batch->need_invalidate = false;
 	}
 	ret = batch->entropy_u32[batch->position++];
-	spin_unlock_irqrestore(&batch->batch_lock, flags);
+	preempt_enable();
+	local_irq_restore(flags);
 	return ret;
 }
 EXPORT_SYMBOL(get_random_u32);
@@ -2217,14 +2221,10 @@ static void invalidate_batched_entropy(void)
 		struct batched_entropy *batched_entropy;
 
 		batched_entropy = per_cpu_ptr(&batched_entropy_u32, cpu);
-		spin_lock_irqsave(&batched_entropy->batch_lock, flags);
-		batched_entropy->position = 0;
-		spin_unlock(&batched_entropy->batch_lock);
+		batched_entropy->need_invalidate = true;
 
 		batched_entropy = per_cpu_ptr(&batched_entropy_u64, cpu);
-		spin_lock(&batched_entropy->batch_lock);
-		batched_entropy->position = 0;
-		spin_unlock_irqrestore(&batched_entropy->batch_lock, flags);
+		batched_entropy->need_invalidate = true;
 	}
 }
 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ