[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20181207183934.GF9385@zn.tnic>
Date: Fri, 7 Dec 2018 19:39:34 +0100
From: Borislav Petkov <bp@...en8.de>
To: Guenter Roeck <linux@...ck-us.net>
Cc: X86 ML <x86@...nel.org>, LKML <linux-kernel@...r.kernel.org>,
Andy Lutomirski <luto@...nel.org>,
"H. Peter Anvin" <hpa@...or.com>,
John Stultz <john.stultz@...aro.org>,
Thomas Lendacky <Thomas.Lendacky@....com>
Subject: Re: [PATCH] x86/TSC: Use RDTSCP
Ok,
I have something which looks like it works here, even with the pentium3
qemu CPU. I'll be hammering on it in the coming days but if you wanna
give it a try, here's a conglomerate patch:
---
>From 0c6adce02d2e7f3b5bbdc4cbe3eb3dae99448def Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@...e.de>
Date: Fri, 7 Dec 2018 18:54:23 +0100
Subject: [PATCH] WIP
Signed-off-by: Borislav Petkov <bp@...e.de>
---
arch/x86/include/asm/alternative.h | 26 +++++++++++++++++++++++++-
arch/x86/include/asm/msr.h | 16 ++++++++++++++--
arch/x86/kernel/alternative.c | 4 ++--
3 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index ea9886651c39..db8ebe5dd5be 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -114,6 +114,16 @@ static inline int alternatives_text_reserved(void *start, void *end)
"(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \
alt_end_marker ":\n"
+#define OLDINSTR_3(oldinsn, n1, n2, n3) \
+ "# ALT: oldinstr\n" \
+ "661:\n\t" oldinsn "\n662:\n" \
+ "# ALT: padding\n" \
+ ".skip -((" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \
+ " - (" alt_slen ")) > 0) * " \
+ "(" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \
+ " - (" alt_slen ")), 0x90\n" \
+ alt_end_marker ":\n"
+
#define ALTINSTR_ENTRY(feature, num) \
" .long 661b - .\n" /* label */ \
" .long " b_replacement(num)"f - .\n" /* new instruction */ \
@@ -122,7 +132,8 @@ static inline int alternatives_text_reserved(void *start, void *end)
" .byte " alt_rlen(num) "\n" /* replacement len */ \
" .byte " alt_pad_len "\n" /* pad len */
-#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \
+#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \
+ "# ALT: replacement " #num "\n" \
b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t"
/* alternative assembly primitive: */
@@ -146,6 +157,19 @@ static inline int alternatives_text_reserved(void *start, void *end)
ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
".popsection\n"
+#define ALTERNATIVE_3(oldinsn, newinsn1, feat1, newinsn2, feat2, newinsn3, feat3) \
+ OLDINSTR_3(oldinsn, 1, 2, 3) \
+ ".pushsection .altinstructions,\"a\"\n" \
+ ALTINSTR_ENTRY(feat1, 1) \
+ ALTINSTR_ENTRY(feat2, 2) \
+ ALTINSTR_ENTRY(feat3, 3) \
+ ".popsection\n" \
+ ".pushsection .altinstr_replacement, \"ax\"\n" \
+ ALTINSTR_REPLACEMENT(newinsn1, feat1, 1) \
+ ALTINSTR_REPLACEMENT(newinsn2, feat2, 2) \
+ ALTINSTR_REPLACEMENT(newinsn3, feat3, 3) \
+ ".popsection\n"
+
/*
* Alternative instructions for different CPU types or capabilities.
*
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 91e4cf189914..5cc3930cb465 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -217,6 +217,8 @@ static __always_inline unsigned long long rdtsc(void)
*/
static __always_inline unsigned long long rdtsc_ordered(void)
{
+ DECLARE_ARGS(val, low, high);
+
/*
* The RDTSC instruction is not ordered relative to memory
* access. The Intel SDM and the AMD APM are both vague on this
@@ -227,9 +229,19 @@ static __always_inline unsigned long long rdtsc_ordered(void)
* ordering guarantees as reading from a global memory location
* that some other imaginary CPU is updating continuously with a
* time stamp.
+ *
+ * Thus, use the preferred barrier on the respective CPU, aiming for
+ * RDTSCP as the default.
*/
- barrier_nospec();
- return rdtsc();
+ asm volatile(ALTERNATIVE_3("rdtsc",
+ "mfence; rdtsc", X86_FEATURE_MFENCE_RDTSC,
+ "lfence; rdtsc", X86_FEATURE_LFENCE_RDTSC,
+ "rdtscp", X86_FEATURE_RDTSCP)
+ : EAX_EDX_RET(val, low, high)
+ /* RDTSCP clobbers ECX with MSR_TSC_AUX. */
+ :: "ecx");
+
+ return EAX_EDX_VAL(val, low, high);
}
static inline unsigned long long native_read_pmc(int counter)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index ebeac487a20c..d458c7973c56 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -393,10 +393,10 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
continue;
}
- DPRINTK("feat: %d*32+%d, old: (%px len: %d), repl: (%px, len: %d), pad: %d",
+ DPRINTK("feat: %d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d",
a->cpuid >> 5,
a->cpuid & 0x1f,
- instr, a->instrlen,
+ instr, instr, a->instrlen,
replacement, a->replacementlen, a->padlen);
DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
--
2.19.1
--
Regards/Gruss,
Boris.
Good mailing practices for 400: avoid top-posting and trim the reply.
Powered by blists - more mailing lists