lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1584677604-32707-3-git-send-email-kyung.min.park@intel.com>
Date:   Thu, 19 Mar 2020 21:13:24 -0700
From:   Kyung Min Park <kyung.min.park@...el.com>
To:     x86@...nel.org, linux-kernel@...r.kernel.org
Cc:     tglx@...utronix.de, mingo@...hat.com, hpa@...or.com,
        gregkh@...uxfoundation.org, ak@...ux.intel.com,
        tony.luck@...el.com, ashok.raj@...el.com, ravi.v.shankar@...el.com,
        fenghua.yu@...el.com, kyung.min.park@...el.com
Subject: [PATCH v2 2/2] x86/delay: Introduce TPAUSE delay

TPAUSE instructs the processor to enter an implementation-dependent
optimized state. The instruction execution wakes up when the time-stamp
counter reaches or exceeds the implicit EDX:EAX 64-bit input value.
The instruction execution also wakes up due to the expiration of
the operating system time-limit or by an external interrupt
or exceptions such as a debug exception or a machine check exception.

TPAUSE offers a choice of two lower power states:
 1. Light-weight power/performance optimized state C0.1
 2. Improved power/performance optimized state C0.2
This way, it can save power with low wake-up latency in comparison to
spinloop based delay. The selection between the two is governed by the
input register.

TPAUSE is available on processors with X86_FEATURE_WAITPKG.

Reviewed-by: Tony Luck <tony.luck@...el.com>
Co-developed-by: Fenghua Yu <fenghua.yu@...el.com>
Signed-off-by: Fenghua Yu <fenghua.yu@...el.com>
Signed-off-by: Kyung Min Park <kyung.min.park@...el.com>
---
 arch/x86/include/asm/mwait.h | 17 +++++++++++++++++
 arch/x86/lib/delay.c         | 27 ++++++++++++++++++++++++++-
 2 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index aaf6643..fd59db0 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -22,6 +22,8 @@
 #define MWAITX_ECX_TIMER_ENABLE		BIT(1)
 #define MWAITX_MAX_WAIT_CYCLES		UINT_MAX
 #define MWAITX_DISABLE_CSTATES		0xf0
+#define TPAUSE_C01_STATE		1
+#define TPAUSE_C02_STATE		0
 
 static inline void __monitor(const void *eax, unsigned long ecx,
 			     unsigned long edx)
@@ -120,4 +122,19 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
 	current_clr_polling();
 }
 
+/*
+ * Caller can specify whether to enter C0.1 (low latency, less
+ * power saving) or C0.2 state (saves more power, but longer wakeup
+ * latency). This may be overridden by the IA32_UMWAIT_CONTROL MSR
+ * which can force requests for C0.2 to be downgraded to C0.1.
+ */
+static inline void __tpause(unsigned int ecx, unsigned int edx,
+			    unsigned int eax)
+{
+	/* "tpause %ecx, %edx, %eax;" */
+	asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1\t\n"
+		     :
+		     : "c"(ecx), "d"(edx), "a"(eax));
+}
+
 #endif /* _ASM_X86_MWAIT_H */
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index e6db855..5f11f0a 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -97,6 +97,27 @@ static void delay_tsc(u64 cycles)
 }
 
 /*
+ * On Intel the TPAUSE instruction waits until any of:
+ * 1) the TSC counter exceeds the value provided in EAX:EDX
+ * 2) global timeout in IA32_UMWAIT_CONTROL is exceeded
+ * 3) an external interrupt occurs
+ */
+static void delay_halt_tpause(u64 start, u64 cycles)
+{
+	u64 until = start + cycles;
+	unsigned int eax, edx;
+
+	eax = (unsigned int)(until & 0xffffffff);
+	edx = (unsigned int)(until >> 32);
+
+	/*
+	 * Hard code the deeper (C0.2) sleep state because exit latency is
+	 * small compared to the "microseconds" that usleep() will delay.
+	 */
+	__tpause(TPAUSE_C02_STATE, edx, eax);
+}
+
+/*
  * On some AMD platforms, MWAITX has a configurable 32-bit timer, that
  * counts with TSC frequency. The input value is the number of TSC cycles
  * to wait. MWAITX will also exit when the timer expires.
@@ -152,8 +173,12 @@ static void delay_halt(u64 __cycles)
 
 void use_tsc_delay(void)
 {
-	if (delay_fn == delay_loop)
+	if (static_cpu_has(X86_FEATURE_WAITPKG)) {
+		delay_halt_fn = delay_halt_tpause;
+		delay_fn = delay_halt;
+	} else if (delay_fn == delay_loop) {
 		delay_fn = delay_tsc;
+	}
 }
 
 void use_mwaitx_delay(void)
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ