[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1586561395-50914-4-git-send-email-kyung.min.park@intel.com>
Date: Fri, 10 Apr 2020 16:29:55 -0700
From: Kyung Min Park <kyung.min.park@...el.com>
To: x86@...nel.org, linux-kernel@...r.kernel.org
Cc: tglx@...utronix.de, mingo@...hat.com, hpa@...or.com,
gregkh@...uxfoundation.org, ak@...ux.intel.com,
tony.luck@...el.com, ashok.raj@...el.com, ravi.v.shankar@...el.com,
fenghua.yu@...el.com, kyung.min.park@...el.com
Subject: [PATCH v3 3/3] x86/delay: Introduce TPAUSE delay
TPAUSE instructs the processor to enter an implementation-dependent
optimized state. The instruction execution wakes up when the time-stamp
counter reaches or exceeds the implicit EDX:EAX 64-bit input value.
The instruction execution also wakes up due to the expiration of
the operating system time-limit or by an external interrupt
or exceptions such as a debug exception or a machine check exception.
TPAUSE offers a choice of two lower power states:
1. Light-weight power/performance optimized state C0.1
2. Improved power/performance optimized state C0.2
This way, it can save power with low wake-up latency in comparison to
spinloop based delay. The selection between the two is governed by the
input register.
TPAUSE is available on processors with X86_FEATURE_WAITPKG.
Reviewed-by: Tony Luck <tony.luck@...el.com>
Co-developed-by: Fenghua Yu <fenghua.yu@...el.com>
Signed-off-by: Fenghua Yu <fenghua.yu@...el.com>
Signed-off-by: Kyung Min Park <kyung.min.park@...el.com>
---
arch/x86/include/asm/delay.h | 1 +
arch/x86/include/asm/mwait.h | 16 ++++++++++++++++
arch/x86/kernel/time.c | 3 +++
arch/x86/lib/delay.c | 27 +++++++++++++++++++++++++++
4 files changed, 47 insertions(+)
diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h
index bb91d7c..b57048f 100644
--- a/arch/x86/include/asm/delay.h
+++ b/arch/x86/include/asm/delay.h
@@ -5,6 +5,7 @@
#include <asm-generic/delay.h>
void __init use_tsc_delay(void);
+void __init use_tpause_delay(void);
void use_mwaitx_delay(void);
#endif /* _ASM_X86_DELAY_H */
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index a43b35b..b00596b 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -22,6 +22,8 @@
#define MWAITX_ECX_TIMER_ENABLE BIT(1)
#define MWAITX_MAX_WAIT_CYCLES UINT_MAX
#define MWAITX_DISABLE_CSTATES 0xf0
+#define TPAUSE_C01_STATE 1
+#define TPAUSE_C02_STATE 0
u32 get_umwait_control_msr(void);
@@ -122,4 +124,18 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
current_clr_polling();
}
+/*
+ * Caller can specify whether to enter C0.1 (low latency, less
+ * power saving) or C0.2 state (saves more power, but longer wakeup
+ * latency). This may be overridden by the IA32_UMWAIT_CONTROL MSR
+ * which can force requests for C0.2 to be downgraded to C0.1.
+ */
+static inline void __tpause(u32 ecx, u32 edx, u32 eax)
+{
+ /* "tpause %ecx, %edx, %eax;" */
+ asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1\t\n"
+ :
+ : "c"(ecx), "d"(edx), "a"(eax));
+}
+
#endif /* _ASM_X86_MWAIT_H */
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 106e7f8..371a6b3 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -103,6 +103,9 @@ static __init void x86_late_time_init(void)
*/
x86_init.irqs.intr_mode_init();
tsc_init();
+
+ if (static_cpu_has(X86_FEATURE_WAITPKG))
+ use_tpause_delay();
}
/*
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index fe91dc1..65d15df 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -97,6 +97,27 @@ static void delay_tsc(u64 cycles)
}
/*
+ * On Intel the TPAUSE instruction waits until any of:
+ * 1) the TSC counter exceeds the value provided in EDX:EAX
+ * 2) global timeout in IA32_UMWAIT_CONTROL is exceeded
+ * 3) an external interrupt occurs
+ */
+static void delay_halt_tpause(u64 start, u64 cycles)
+{
+ u64 until = start + cycles;
+ u32 eax, edx;
+
+ eax = lower_32_bits(until);
+ edx = upper_32_bits(until);
+
+ /*
+ * Hard code the deeper (C0.2) sleep state because exit latency is
+ * small compared to the "microseconds" that usleep() will delay.
+ */
+ __tpause(TPAUSE_C02_STATE, edx, eax);
+}
+
+/*
* On some AMD platforms, MWAITX has a configurable 32-bit timer, that
* counts with TSC frequency. The input value is the number of TSC cycles
* to wait. MWAITX will also exit when the timer expires.
@@ -156,6 +177,12 @@ void __init use_tsc_delay(void)
delay_fn = delay_tsc;
}
+void __init use_tpause_delay(void)
+{
+ delay_halt_fn = delay_halt_tpause;
+ delay_fn = delay_halt;
+}
+
void use_mwaitx_delay(void)
{
delay_halt_fn = delay_halt_mwaitx;
--
2.7.4
Powered by blists - more mailing lists