[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251127092226.1439196-12-ardb+git@google.com>
Date: Thu, 27 Nov 2025 10:22:31 +0100
From: Ard Biesheuvel <ardb+git@...gle.com>
To: linux-hardening@...r.kernel.org
Cc: linux-arm-kernel@...ts.infradead.org, linux-kernel@...r.kernel.org,
Ard Biesheuvel <ardb@...nel.org>, Kees Cook <kees@...nel.org>, Ryan Roberts <ryan.roberts@....com>,
Will Deacon <will@...nel.org>, Arnd Bergmann <arnd@...db.de>, Jeremy Linton <jeremy.linton@....com>,
Catalin Marinas <Catalin.Marinas@....com>, Mark Rutland <mark.rutland@....com>,
"Jason A. Donenfeld" <Jason@...c4.com>
Subject: [RFC/RFT PATCH 4/6] random: Use a lockless fast path for get_random_uXX()
From: Ard Biesheuvel <ardb@...nel.org>
Currently, the implementations of the get_random_uXX() API protect their
critical section with a local lock and disabling interrupts, to ensure
that the code does not race with itself when called from interrupt
context.
Given that the fast path does nothing more than read a single uXX
quantity from a linear buffer and bump the position pointer, poking the
hardware registers to disable and re-enable interrupts is
disproportionately costly, and best avoided.
There are two conditions under which the batched entropy buffer is
replenished, which is what forms the critical section:
- the buffer is exhausted
- the base_crng generation counter has incremented.
By combining the position and generation counters into a single u64, we
can use compare and exchange to implement the fast path without taking
the local lock or disabling interrupts. By constructing the expected and
next values carefully, the compare and exchange will only succeed if
- we did not race with ourselves, i.e., the compare and exchange
increments the position counter by exactly 1;
- the buffer is not exhausted
- the generation counter equals the base_crng generation counter.
Only if the compare and exchange fails is the original slow path taken,
and only in that case do we take the local lock. This results in a
considerable speedup (3-5x) when benchmarking get_random_u8() in a tight
loop.
Signed-off-by: Ard Biesheuvel <ardb@...nel.org>
---
drivers/char/random.c | 44 ++++++++++++++------
1 file changed, 31 insertions(+), 13 deletions(-)
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 0e04bc60d034..71bd74871540 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -496,6 +496,12 @@ static ssize_t get_random_bytes_user(struct iov_iter *iter)
* should be called and return 0 at least once at any point prior.
*/
+#ifdef __LITTLE_ENDIAN
+#define LOHI(lo, hi) lo, hi
+#else
+#define LOHI(lo, hi) hi, lo
+#endif
+
#define DEFINE_BATCHED_ENTROPY(type) \
struct batch_ ##type { \
/* \
@@ -507,8 +513,12 @@ struct batch_ ##type { \
*/ \
type entropy[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(type))]; \
local_lock_t lock; \
- unsigned int generation; \
- unsigned int position; \
+ union { \
+ struct { \
+ unsigned int LOHI(position, generation); \
+ }; \
+ u64 posgen; \
+ }; \
}; \
\
static DEFINE_PER_CPU(struct batch_ ##type, batched_entropy_ ##type) = { \
@@ -522,6 +532,7 @@ type get_random_ ##type(void) \
unsigned long flags; \
struct batch_ ##type *batch; \
unsigned int next_gen; \
+ u64 next; \
\
warn_unseeded_randomness(); \
\
@@ -530,21 +541,28 @@ type get_random_ ##type(void) \
return ret; \
} \
\
- local_lock_irqsave(&batched_entropy_ ##type.lock, flags); \
- batch = raw_cpu_ptr(&batched_entropy_##type); \
+ batch = &get_cpu_var(batched_entropy_##type); \
\
next_gen = (unsigned int)READ_ONCE(base_crng.generation); \
- if (batch->position >= ARRAY_SIZE(batch->entropy) || \
- next_gen != batch->generation) { \
- _get_random_bytes(batch->entropy, sizeof(batch->entropy)); \
- batch->position = 0; \
- batch->generation = next_gen; \
+ next = (u64)next_gen << 32; \
+ if (likely(batch->position < ARRAY_SIZE(batch->entropy))) { \
+ next |= batch->position + 1; /* next-1 is bogus otherwise */ \
+ ret = batch->entropy[batch->position]; \
+ } \
+ if (cmpxchg64_local(&batch->posgen, next, next - 1) != next - 1) { \
+ local_lock_irqsave(&batched_entropy_ ##type.lock, flags); \
+ if (batch->position >= ARRAY_SIZE(batch->entropy) || \
+ next_gen != batch->generation) { \
+ _get_random_bytes(batch->entropy, sizeof(batch->entropy));\
+ batch->position = 0; \
+ batch->generation = next_gen; \
+ } \
+ ret = batch->entropy[batch->position++]; \
+ local_unlock_irqrestore(&batched_entropy_ ##type.lock, flags); \
} \
\
- ret = batch->entropy[batch->position]; \
- batch->entropy[batch->position] = 0; \
- ++batch->position; \
- local_unlock_irqrestore(&batched_entropy_ ##type.lock, flags); \
+ batch->entropy[batch->position - 1] = 0; \
+ put_cpu_var(batched_entropy_##type); \
return ret; \
} \
EXPORT_SYMBOL(get_random_ ##type);
--
2.52.0.107.ga0afd4fd5b-goog
Powered by blists - more mailing lists