lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1432022472-2224-3-git-send-email-ray.huang@amd.com>
Date:	Tue, 19 May 2015 16:01:10 +0800
From:	Huang Rui <ray.huang@....com>
To:	Borislav Petkov <bp@...e.de>, Len Brown <lenb@...nel.org>,
	"Rafael J. Wysocki" <rjw@...ysocki.net>,
	Thomas Gleixner <tglx@...utronix.de>
CC:	<x86@...nel.org>, <linux-kernel@...r.kernel.org>,
	Fengguang Wu <fengguang.wu@...el.com>,
	Aaron Lu <aaron.lu@...el.com>, Tony Li <tony.li@....com>,
	Huang Rui <ray.huang@....com>
Subject: [RFC PATCH 2/4] x86, mwaitt: introduce mwaitx idle with a configurable timer

MWAITX/MWAIT does not let the cpu core go into C1 state on AMD processors.
The cpu core still consumes less power while waiting, and has faster exit
from waiting than "Halt". This patch implements an interface using the
kernel parameter "idle=" to configure mwaitx type and timer value.

If "idle=mwaitx", the timeout will be set as the maximum value
((2^64 - 1) * TSC cycle).
If "idle=mwaitx,100", the timeout will be set as 100ns.
If the processor doesn't support MWAITX, then halt is used.

Signed-off-by: Huang Rui <ray.huang@....com>
---
 arch/x86/include/asm/mwait.h     |  2 +
 arch/x86/include/asm/processor.h |  2 +-
 arch/x86/kernel/process.c        | 79 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index b91136f..c4e51e7 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -14,6 +14,8 @@
 #define CPUID5_ECX_INTERRUPT_BREAK	0x2
 
 #define MWAIT_ECX_INTERRUPT_BREAK	0x1
+#define MWAITX_ECX_TIMER_ENABLE		0x2
+#define MWAITX_EBX_WAIT_TIMEOUT		0xffffffff
 
 static inline void __monitor(const void *eax, unsigned long ecx,
 			     unsigned long edx)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 23ba676..0f60e94 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -733,7 +733,7 @@ extern unsigned long		boot_option_idle_override;
 extern bool			amd_e400_c1e_detected;
 
 enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
-			 IDLE_POLL};
+			 IDLE_POLL, IDLE_MWAITX};
 
 extern void enable_sep_cpu(void);
 extern int sysenter_setup(void);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 6e338e3..9d68193 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -30,6 +30,7 @@
 #include <asm/debugreg.h>
 #include <asm/nmi.h>
 #include <asm/tlbflush.h>
+#include <asm/x86_init.h>
 
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -276,6 +277,7 @@ unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
 EXPORT_SYMBOL(boot_option_idle_override);
 
 static void (*x86_idle)(void);
+static unsigned long idle_param;
 
 #ifndef CONFIG_SMP
 static inline void play_dead(void)
@@ -444,6 +446,17 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
 	return 1;
 }
 
+static int not_support_mwaitx(const struct cpuinfo_x86 *c)
+{
+	if (c->x86_vendor != X86_VENDOR_AMD)
+		return 1;
+
+	if (!cpu_has(c, X86_FEATURE_MWAITT))
+		return 1;
+
+	return 0;
+}
+
 /*
  * MONITOR/MWAIT with no hints, used for default default C1 state.
  * This invokes MWAIT with interrutps enabled and no flags,
@@ -470,12 +483,45 @@ static void mwait_idle(void)
 	__current_clr_polling();
 }
 
+/*
+ * AMD Excavator processors support the new MONITORX/MWAITX instructions.
+ * The function is similar to mwait but with a timer. On AMD platforms
+ * mwaitx does not let the core go into C1 state. This provides for a
+ * faster waiting exit speed. The user can configure the idle method and
+ * timer value via the idle kernel parameter.
+ */
+static void mwaitx_idle(void)
+{
+	unsigned long ebx, ecx;
+
+	ebx = idle_param;
+	ecx = MWAITX_ECX_TIMER_ENABLE;
+
+	if (!current_set_polling_and_test()) {
+		__monitorx((void *)&current_thread_info()->flags, 0, 0);
+		if (!need_resched())
+			__sti_mwaitx(0, ebx, ecx);
+		else
+			local_irq_enable();
+	} else {
+		local_irq_enable();
+	}
+	__current_clr_polling();
+}
+
 void select_idle_routine(const struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
 	if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1)
 		pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
 #endif
+
+	if (boot_option_idle_override == IDLE_MWAITX &&
+	    not_support_mwaitx(c)) {
+		pr_warn_once("WARNING: mwaitx not supported, using default idle support\n");
+		x86_idle = default_idle;
+	}
+
 	if (x86_idle || boot_option_idle_override == IDLE_POLL)
 		return;
 
@@ -499,6 +545,8 @@ void __init init_amd_e400_c1e_mask(void)
 
 static int __init idle_setup(char *str)
 {
+	unsigned long timeout, tsc_freq;
+
 	if (!str)
 		return -EINVAL;
 
@@ -524,6 +572,37 @@ static int __init idle_setup(char *str)
 		 * of boot_option_idle_override.
 		 */
 		boot_option_idle_override = IDLE_NOMWAIT;
+	} else if (!strncmp(str, "mwaitx", 6)) {
+		/*
+		 * If the boot option of "idle=mwaitx" is added, it means
+		 * that mwaitx will be enabled if current processor
+		 * supports it. If not supported, use default_idle.
+		 */
+		x86_idle = mwaitx_idle;
+		boot_option_idle_override = IDLE_MWAITX;
+		str += 6;
+		if (str && (str[0] == ',')) {
+			if (kstrtoul(str + 1, 0, &timeout)) {
+				pr_warn_once("WARNING: timer value should be numerical\n");
+				return -1;
+			}
+
+			tsc_freq = x86_platform.calibrate_tsc();
+			if (!tsc_freq) {
+				pr_warn_once("WARNING: can not calculate TSC khz\n");
+				return -1;
+			}
+
+			/*
+			 * TSC loops (EBX input) = Timer(nsec) *
+			 * TSC freq(khz) / 1000000
+			 */
+			timeout = timeout * tsc_freq;
+			do_div(timeout, 1000000);
+
+			idle_param = timeout;
+		} else
+			idle_param = MWAITX_EBX_WAIT_TIMEOUT;
 	} else
 		return -1;
 
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ