lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250502085223.1316925-5-ankur.a.arora@oracle.com>
Date: Fri,  2 May 2025 01:52:20 -0700
From: Ankur Arora <ankur.a.arora@...cle.com>
To: linux-kernel@...r.kernel.org, linux-arch@...r.kernel.org,
        linux-arm-kernel@...ts.infradead.org, bpf@...r.kernel.org
Cc: arnd@...db.de, catalin.marinas@....com, will@...nel.org,
        peterz@...radead.org, akpm@...ux-foundation.org, mark.rutland@....com,
        harisokn@...zon.com, cl@...two.org, ast@...nel.org, memxor@...il.com,
        zhenglifeng1@...wei.com, xueshuai@...ux.alibaba.com,
        joao.m.martins@...cle.com, boris.ostrovsky@...cle.com,
        konrad.wilk@...cle.com
Subject: [PATCH v2 4/7] arm64: barrier: add coarse wait for smp_cond_load_relaxed_timewait()

smp_cond_load_relaxed_timewait() waits on a conditional variable
until a timeout expires. This waiting is via some mix of looping
around, dereferencing an address, or waiting in a WFE until the CPU
gets an event due to a store to the address, or because of periodic
events from the event-stream.

Define __smp_cond_timewait_coarse() for usecases where the caller can
tolerate a relatively large overshoot. This allows us to minimize the
time spent spinning at the cost of spending extra time in the WFE
state.

This would result in a worst case delay of ARCH_TIMER_EVT_STREAM_PERIOD_US
and a spin period of no more than SMP_TIMEWAIT_CHECK_US.

Cc: Will Deacon <will@...nel.org>
Cc: Catalin Marinas <catalin.marinas@....com>
Cc: Kumar Kartikeya Dwivedi <memxor@...il.com>
Cc: Alexei Starovoitov <ast@...nel.org>
Signed-off-by: Ankur Arora <ankur.a.arora@...cle.com>
---
 arch/arm64/include/asm/barrier.h | 66 ++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index eaeb78dd48c0..f4a184a96933 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -10,6 +10,7 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/kasan-checks.h>
+#include <linux/minmax.h>
 
 #include <asm/alternative-macros.h>
 
@@ -219,6 +220,71 @@ do {									\
 #define __smp_timewait_store(ptr, val)		\
 		__cmpwait_relaxed(ptr, val)
 
+/*
+ * Redefine ARCH_TIMER_EVT_STREAM_PERIOD_US locally to avoid include hell.
+ */
+#define __ARCH_TIMER_EVT_STREAM_PERIOD_US 100UL
+extern bool arch_timer_evtstrm_available(void);
+
+/*
+ * For coarse grained waits, allow overshoot by the event-stream period.
+ * Defined without reference to ARCH_TIMER_EVT_STREAM_PERIOD_US to avoid
+ * include hell.
+ */
+#define	SMP_TIMEWAIT_SLACK_COARSE_US	__ARCH_TIMER_EVT_STREAM_PERIOD_US
+
+#define SMP_TIMEWAIT_SPIN_BASE		16
+#define SMP_TIMEWAIT_CHECK_US		2UL
+
+static inline u64 ___cond_timewait(u64 now, u64 prev, u64 end,
+				      u32 *spin, bool *wait, u64 slack)
+{
+	bool wfet = alternative_has_cap_unlikely(ARM64_HAS_WFXT);
+	bool wfe, ev = arch_timer_evtstrm_available();
+	u64 evt_period = __ARCH_TIMER_EVT_STREAM_PERIOD_US;
+	u64 remaining = end - now;
+
+	if (now >= end)
+		return 0;
+
+	/*
+	 * Use WFE if there's enough slack to get an event-stream wakeup even
+	 * if we don't come out of the WFE due to natural causes.
+	 */
+	wfe = ev && ((remaining + slack) > evt_period);
+
+	if (wfe || wfet) {
+		*wait = true;
+		*spin = 0;
+		return now;
+	}
+
+	/*
+	 * Our wait period is shorter than our best granularity. Spin.
+	 *
+	 * A time-check is expensive but not too expensive. Scale the
+	 * spin-count so we stay close to the fine-grained slack period.
+	 */
+	*wait = false;
+	if ((now - prev) < SMP_TIMEWAIT_CHECK_US)
+		*spin <<= 1;
+	else
+		*spin = max((*spin >> 1) + (*spin >> 2), SMP_TIMEWAIT_SPIN_BASE);
+	return now;
+}
+
+/*
+ * Coarse wait_policy: minimizes the duration spent spinning at the cost of
+ * potentially spending the available slack in a WFE wait state.
+ *
+ * The resultant worst case timeout delay is SMP_TIMEWAIT_SLACK_COARSE_US
+ * (same as ARCH_TIMER_EVT_STREAM_PERIOD_US) and a spin period of no more
+ * than SMP_TIMEWAIT_CHECK_US.
+ */
+#define __smp_cond_timewait_coarse(now, prev, end, spin, wait)		\
+	___cond_timewait(now, prev, end, spin, wait,			\
+			    SMP_TIMEWAIT_SLACK_COARSE_US)
+
 #include <asm-generic/barrier.h>
 
 #endif	/* __ASSEMBLY__ */
-- 
2.43.5


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ