lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 22 Apr 2009 16:49:08 -0700
From:	Joe Damato <ice799@...il.com>
To:	Andi Kleen <andi@...stfloor.org>
Cc:	Ingo Molnar <mingo@...e.hu>, Jeff Garzik <jeff@...zik.org>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	LKML <linux-kernel@...r.kernel.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	Ingo Molnar <mingo@...hat.com>,
	"H. Peter Anvin" <hpa@...or.com>, x86@...nel.org
Subject: Re: [PATCH] X86-32: Let gcc decide whether to inline memcpy was Re: 
	New x86 warning

On Wed, Apr 22, 2009 at 1:45 AM, Andi Kleen <andi@...stfloor.org> wrote:
> Ingo Molnar <mingo@...e.hu> writes:
>
>> * Jeff Garzik <jeff@...zik.org> wrote:
>>
>>> On x86-32, this warning now appears for me in 2.6.30-rc3, and did
>>> not appear in 2.6.29.
>>>
>>> drivers/acpi/acpica/tbfadt.c: In function 'acpi_tb_create_local_fadt':
>>> /spare/repo/linux-2.6/arch/x86/include/asm/string_32.h:75: warning:
>>> array subscript is above array bounds
>>
>> Last i checked it was a GCC bounds check bogosity. All attempts to
>> work it around or annotate it sanely (without changing the assembly
>> code) failed. (new ideas welcome)
>>
>> The closest i came was the hacklet below to the assembly code. [with
>> an intentionally corrupted patch header to make it harder to apply
>> accidentally.]
>
> Modern gcc (and that is all that is supported now) should be able to
> generate this code on its own already.  So if you call __builtin_* it
> will  just work (that is what 64bit does) without that explicit code.
>
> Here's a patch that does that with some numbers. It gives about 3k
> smaller kernels on my configuration.
>
> IMHO it's long overdue to do this for 32bit too.
>
> It's a very attractive patch because it removes a lot of code:

I think this patch is great. Perhaps this would be a good time to also
clean out memset for x86_32? (If needed, I can start a new email
thread with this patch) I've built and booted this on my x86_32
hardware.

[danger: a newb's patch below]

Joe

--

Define memset to __builtin_memset and gcc should do the right thing.
Keep around the generic memset routine that gcc can use when it does
not inline. Removes a bunch of ugly code, too.

Signed-off-by: Joe Damato <ice799@...il.com>
---
 arch/x86/include/asm/string_32.h |  117 +-------------------------------------
 arch/x86/lib/memcpy_32.c         |   12 ++++
 2 files changed, 15 insertions(+), 114 deletions(-)

diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h
index 29fff54..aedee80 100644
--- a/arch/x86/include/asm/string_32.h
+++ b/arch/x86/include/asm/string_32.h
@@ -30,7 +30,6 @@ extern char *strchr(const char *s, int c);
 extern size_t strlen(const char *s);

 #define __HAVE_ARCH_MEMCPY
-
 extern void *__memcpy(void *to, const void *from, size_t n);

 #ifdef CONFIG_X86_USE_3DNOW
@@ -79,42 +78,8 @@ void *memmove(void *dest, const void *src, size_t n);
 #define __HAVE_ARCH_MEMCHR
 extern void *memchr(const void *cs, int c, size_t count);

-static inline void *__memset_generic(void *s, char c, size_t count)
-{
-	int d0, d1;
-	asm volatile("rep\n\t"
-		     "stosb"
-		     : "=&c" (d0), "=&D" (d1)
-		     : "a" (c), "1" (s), "0" (count)
-		     : "memory");
-	return s;
-}
-
-/* we might want to write optimized versions of these later */
-#define __constant_count_memset(s, c, count) __memset_generic((s),
(c), (count))
-
-/*
- * memset(x, 0, y) is a reasonably common thing to do, so we want to fill
- * things 32 bits at a time even when we don't know the size of the
- * area at compile-time..
- */
-static __always_inline
-void *__constant_c_memset(void *s, unsigned long c, size_t count)
-{
-	int d0, d1;
-	asm volatile("rep ; stosl\n\t"
-		     "testb $2,%b3\n\t"
-		     "je 1f\n\t"
-		     "stosw\n"
-		     "1:\ttestb $1,%b3\n\t"
-		     "je 2f\n\t"
-		     "stosb\n"
-		     "2:"
-		     : "=&c" (d0), "=&D" (d1)
-		     : "a" (c), "q" (count), "0" (count/4), "1" ((long)s)
-		     : "memory");
-	return s;
-}
+#define __HAVE_ARCH_MEMSET
+extern void *__memset(void *s, char c, size_t count);

 /* Added by Gertjan van Wingerde to make minix and sysv module work */
 #define __HAVE_ARCH_STRNLEN
@@ -124,83 +89,7 @@ extern size_t strnlen(const char *s, size_t count);
 #define __HAVE_ARCH_STRSTR
 extern char *strstr(const char *cs, const char *ct);

-/*
- * This looks horribly ugly, but the compiler can optimize it totally,
- * as we by now know that both pattern and count is constant..
- */
-static __always_inline
-void *__constant_c_and_count_memset(void *s, unsigned long pattern,
-				    size_t count)
-{
-	switch (count) {
-	case 0:
-		return s;
-	case 1:
-		*(unsigned char *)s = pattern & 0xff;
-		return s;
-	case 2:
-		*(unsigned short *)s = pattern & 0xffff;
-		return s;
-	case 3:
-		*(unsigned short *)s = pattern & 0xffff;
-		*((unsigned char *)s + 2) = pattern & 0xff;
-		return s;
-	case 4:
-		*(unsigned long *)s = pattern;
-		return s;
-	}
-
-#define COMMON(x)							\
-	asm volatile("rep ; stosl"					\
-		     x							\
-		     : "=&c" (d0), "=&D" (d1)				\
-		     : "a" (eax), "0" (count/4), "1" ((long)s)	\
-		     : "memory")
-
-	{
-		int d0, d1;
-#if __GNUC__ == 4 && __GNUC_MINOR__ == 0
-		/* Workaround for broken gcc 4.0 */
-		register unsigned long eax asm("%eax") = pattern;
-#else
-		unsigned long eax = pattern;
-#endif
-
-		switch (count % 4) {
-		case 0:
-			COMMON("");
-			return s;
-		case 1:
-			COMMON("\n\tstosb");
-			return s;
-		case 2:
-			COMMON("\n\tstosw");
-			return s;
-		default:
-			COMMON("\n\tstosw\n\tstosb");
-			return s;
-		}
-	}
-
-#undef COMMON
-}
-
-#define __constant_c_x_memset(s, c, count)			\
-	(__builtin_constant_p(count)				\
-	 ? __constant_c_and_count_memset((s), (c), (count))	\
-	 : __constant_c_memset((s), (c), (count)))
-
-#define __memset(s, c, count)				\
-	(__builtin_constant_p(count)			\
-	 ? __constant_count_memset((s), (c), (count))	\
-	 : __memset_generic((s), (c), (count)))
-
-#define __HAVE_ARCH_MEMSET
-#define memset(s, c, count)						\
-	(__builtin_constant_p(c)					\
-	 ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \
-				 (count))				\
-	 : __memset((s), (c), (count)))
+#define memset(s, c, count) __builtin_memset(s, c, count)

 /*
  * find the first occurrence of byte 'c', or 1 past the area if none
diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c
index 16dc123..16b10d9 100644
--- a/arch/x86/lib/memcpy_32.c
+++ b/arch/x86/lib/memcpy_32.c
@@ -30,6 +30,18 @@ void *memcpy(void *to, const void *from, size_t n)
 }
 EXPORT_SYMBOL(memcpy);

+void *__memset(void *s, char c, size_t count)
+{
+	int d0, d1;
+	asm volatile("rep\n\t"
+		     "stosb"
+		     : "=&c" (d0), "=&D" (d1)
+		     : "a" (c), "1" (s), "0" (count)
+		     : "memory");
+	return s;
+}
+EXPORT_SYMBOL(__memset);
+
 void *memset(void *s, int c, size_t count)
 {
 	return __memset(s, c, count);
-- 
1.6.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ