[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1422987390-17878-7-git-send-email-bp@alien8.de>
Date: Tue, 3 Feb 2015 19:16:24 +0100
From: Borislav Petkov <bp@...en8.de>
To: X86 ML <x86@...nel.org>
Cc: LKML <linux-kernel@...r.kernel.org>
Subject: [PATCH v1 06/12] x86, copy_page_64.S: Use generic ALTERNATIVE macro
From: Borislav Petkov <bp@...e.de>
... instead of the semi-version with the spelled out sections.
What is more, make the REP_GOOD version be the default copy_page()
version as the majority of the relevant x86 CPUs do set
X86_FEATURE_REP_GOOD. Thus, copy_page gets compiled to:
ffffffff8130af80 <copy_page>:
ffffffff8130af80: e9 0b 00 00 00 jmpq ffffffff8130af90 <copy_page_regs>
ffffffff8130af85: b9 00 02 00 00 mov $0x200,%ecx
ffffffff8130af8a: f3 48 a5 rep movsq %ds:(%rsi),%es:(%rdi)
ffffffff8130af8d: c3 retq
ffffffff8130af8e: 66 90 xchg %ax,%ax
ffffffff8130af90 <copy_page_regs>:
...
and after the alternatives have run, the JMP to the old, unrolled
version gets NOPed out:
ffffffff8130af80 <copy_page>:
ffffffff8130af80: 66 66 90 xchg %ax,%ax
ffffffff8130af83: 66 90 xchg %ax,%ax
ffffffff8130af85: b9 00 02 00 00 mov $0x200,%ecx
ffffffff8130af8a: f3 48 a5 rep movsq %ds:(%rsi),%es:(%rdi)
ffffffff8130af8d: c3 retq
On modern uarches, those NOPs are cheaper than the unconditional JMP
previously.
Signed-off-by: Borislav Petkov <bp@...e.de>
---
arch/x86/lib/copy_page_64.S | 37 ++++++++++++-------------------------
1 file changed, 12 insertions(+), 25 deletions(-)
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 176cca67212b..8239dbcbf984 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -2,23 +2,26 @@
#include <linux/linkage.h>
#include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>
+/*
+ * Some CPUs run faster using the string copy instructions (sane microcode).
+ * It is also a lot simpler. Use this when possible. But, don't use streaming
+ * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the
+ * prefetch distance based on SMP/UP.
+ */
ALIGN
-copy_page_rep:
+ENTRY(copy_page)
CFI_STARTPROC
+ ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
movl $4096/8, %ecx
rep movsq
ret
CFI_ENDPROC
-ENDPROC(copy_page_rep)
-
-/*
- * Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD.
- * Could vary the prefetch distance based on SMP/UP.
-*/
+ENDPROC(copy_page)
-ENTRY(copy_page)
+ENTRY(copy_page_regs)
CFI_STARTPROC
subq $2*8, %rsp
CFI_ADJUST_CFA_OFFSET 2*8
@@ -90,21 +93,5 @@ ENTRY(copy_page)
addq $2*8, %rsp
CFI_ADJUST_CFA_OFFSET -2*8
ret
-.Lcopy_page_end:
CFI_ENDPROC
-ENDPROC(copy_page)
-
- /* Some CPUs run faster using the string copy instructions.
- It is also a lot simpler. Use this when possible */
-
-#include <asm/cpufeature.h>
-
- .section .altinstr_replacement,"ax"
-1: .byte 0xeb /* jmp <disp8> */
- .byte (copy_page_rep - copy_page) - (2f - 1b) /* offset */
-2:
- .previous
- .section .altinstructions,"a"
- altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD, \
- .Lcopy_page_end-copy_page, 2b-1b
- .previous
+ENDPROC(copy_page_regs)
--
2.2.0.33.gc18b867
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists