[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <tip-1405ae250ec86802b32ca9f7aea977a5ab551b22@git.kernel.org>
Date: Wed, 22 Apr 2009 18:00:31 GMT
From: tip-bot for Andi Kleen <andi@...stfloor.org>
To: linux-tip-commits@...r.kernel.org
Cc: linux-kernel@...r.kernel.org, hpa@...or.com, mingo@...hat.com,
andi@...stfloor.org, ak@...ux.intel.com, tglx@...utronix.de
Subject: [tip:x86/asm] x86: use __builtin_memcpy() on 32 bits
Commit-ID: 1405ae250ec86802b32ca9f7aea977a5ab551b22
Gitweb: http://git.kernel.org/tip/1405ae250ec86802b32ca9f7aea977a5ab551b22
Author: Andi Kleen <andi@...stfloor.org>
AuthorDate: Wed, 22 Apr 2009 10:45:15 +0200
Committer: H. Peter Anvin <hpa@...or.com>
CommitDate: Wed, 22 Apr 2009 10:55:20 -0700
x86: use __builtin_memcpy() on 32 bits
Modern gccs have own heuristics to decide whether string functions
should be inlined or not. This used to be not the case with old gccs,
but Linux doesn't support them anymore. The 64bit kernel always did it
this way. Just define memcpy to __builtin_memcpy and gcc should do the
right thing. Also supply a out of line memcpy that gcc can fall back
to when it decides not to inline.
First this fixes the
arch/x86/include/asm/string_32.h:75: warning: array subscript is above array bounds
warnings which have been creeping up recently by just
removing that code.
Then trusting gcc actually makes the kernel smaller by nearly 3K:
5503146 529444 1495040 7527630 72dcce vmlinux
5500373 529444 1495040 7524857 72d1f9 vmlinux-string
Also it removes some quite ugly code and will likely speed up
compilation by a tiny bit by having less inline code to process
for every file.
It did some quick boot tests and everything worked as expected.
I left the 3dnow case alone for now.
[ Impact: fixes warning, reduces code size ]
Signed-off-by: Andi Kleen <ak@...ux.intel.com>
LKML-Reference: <8763gxoz50.fsf_-_@...il.nowhere.org>
Signed-off-by: H. Peter Anvin <hpa@...or.com>
---
arch/x86/include/asm/string_32.h | 127 ++-----------------------------------
arch/x86/lib/memcpy_32.c | 16 +++++
2 files changed, 23 insertions(+), 120 deletions(-)
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h
index 0e0e3ba..29fff54 100644
--- a/arch/x86/include/asm/string_32.h
+++ b/arch/x86/include/asm/string_32.h
@@ -29,121 +29,10 @@ extern char *strchr(const char *s, int c);
#define __HAVE_ARCH_STRLEN
extern size_t strlen(const char *s);
-static __always_inline void *__memcpy(void *to, const void *from, size_t n)
-{
- int d0, d1, d2;
- asm volatile("rep ; movsl\n\t"
- "movl %4,%%ecx\n\t"
- "andl $3,%%ecx\n\t"
- "jz 1f\n\t"
- "rep ; movsb\n\t"
- "1:"
- : "=&c" (d0), "=&D" (d1), "=&S" (d2)
- : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from)
- : "memory");
- return to;
-}
-
-/*
- * This looks ugly, but the compiler can optimize it totally,
- * as the count is constant.
- */
-static __always_inline void *__constant_memcpy(void *to, const void *from,
- size_t n)
-{
- long esi, edi;
- if (!n)
- return to;
-
- switch (n) {
- case 1:
- *(char *)to = *(char *)from;
- return to;
- case 2:
- *(short *)to = *(short *)from;
- return to;
- case 4:
- *(int *)to = *(int *)from;
- return to;
-
- case 3:
- *(short *)to = *(short *)from;
- *((char *)to + 2) = *((char *)from + 2);
- return to;
- case 5:
- *(int *)to = *(int *)from;
- *((char *)to + 4) = *((char *)from + 4);
- return to;
- case 6:
- *(int *)to = *(int *)from;
- *((short *)to + 2) = *((short *)from + 2);
- return to;
- case 8:
- *(int *)to = *(int *)from;
- *((int *)to + 1) = *((int *)from + 1);
- return to;
- }
-
- esi = (long)from;
- edi = (long)to;
- if (n >= 5 * 4) {
- /* large block: use rep prefix */
- int ecx;
- asm volatile("rep ; movsl"
- : "=&c" (ecx), "=&D" (edi), "=&S" (esi)
- : "0" (n / 4), "1" (edi), "2" (esi)
- : "memory"
- );
- } else {
- /* small block: don't clobber ecx + smaller code */
- if (n >= 4 * 4)
- asm volatile("movsl"
- : "=&D"(edi), "=&S"(esi)
- : "0"(edi), "1"(esi)
- : "memory");
- if (n >= 3 * 4)
- asm volatile("movsl"
- : "=&D"(edi), "=&S"(esi)
- : "0"(edi), "1"(esi)
- : "memory");
- if (n >= 2 * 4)
- asm volatile("movsl"
- : "=&D"(edi), "=&S"(esi)
- : "0"(edi), "1"(esi)
- : "memory");
- if (n >= 1 * 4)
- asm volatile("movsl"
- : "=&D"(edi), "=&S"(esi)
- : "0"(edi), "1"(esi)
- : "memory");
- }
- switch (n % 4) {
- /* tail */
- case 0:
- return to;
- case 1:
- asm volatile("movsb"
- : "=&D"(edi), "=&S"(esi)
- : "0"(edi), "1"(esi)
- : "memory");
- return to;
- case 2:
- asm volatile("movsw"
- : "=&D"(edi), "=&S"(esi)
- : "0"(edi), "1"(esi)
- : "memory");
- return to;
- default:
- asm volatile("movsw\n\tmovsb"
- : "=&D"(edi), "=&S"(esi)
- : "0"(edi), "1"(esi)
- : "memory");
- return to;
- }
-}
-
#define __HAVE_ARCH_MEMCPY
+extern void *__memcpy(void *to, const void *from, size_t n);
+
#ifdef CONFIG_X86_USE_3DNOW
#include <asm/mmx.h>
@@ -155,7 +44,7 @@ static __always_inline void *__constant_memcpy(void *to, const void *from,
static inline void *__constant_memcpy3d(void *to, const void *from, size_t len)
{
if (len < 512)
- return __constant_memcpy(to, from, len);
+ return __memcpy(to, from, len);
return _mmx_memcpy(to, from, len);
}
@@ -168,20 +57,18 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len)
#define memcpy(t, f, n) \
(__builtin_constant_p((n)) \
- ? __constant_memcpy3d((t), (f), (n)) \
+ ? __builtin_memcpy((t), (f), (n)) \
: __memcpy3d((t), (f), (n)))
#else
/*
* No 3D Now!
+ *
+ * Let gcc figure it out.
*/
-#define memcpy(t, f, n) \
- (__builtin_constant_p((n)) \
- ? __constant_memcpy((t), (f), (n)) \
- : __memcpy((t), (f), (n)))
-
+#define memcpy(t, f, n) __builtin_memcpy(t,f,n)
#endif
#define __HAVE_ARCH_MEMMOVE
diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c
index 5415a9d..16dc123 100644
--- a/arch/x86/lib/memcpy_32.c
+++ b/arch/x86/lib/memcpy_32.c
@@ -4,6 +4,22 @@
#undef memcpy
#undef memset
+void *__memcpy(void *to, const void *from, size_t n)
+{
+ int d0, d1, d2;
+ asm volatile("rep ; movsl\n\t"
+ "movl %4,%%ecx\n\t"
+ "andl $3,%%ecx\n\t"
+ "jz 1f\n\t"
+ "rep ; movsb\n\t"
+ "1:"
+ : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+ : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from)
+ : "memory");
+ return to;
+}
+EXPORT_SYMBOL(__memcpy);
+
void *memcpy(void *to, const void *from, size_t n)
{
#ifdef CONFIG_X86_USE_3DNOW
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists