lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <46C69D26.3000200@cosmosbay.com>
Date:	Sat, 18 Aug 2007 09:17:58 +0200
From:	Eric Dumazet <dada1@...mosbay.com>
To:	Stephen Hemminger <shemminger@...ux-foundation.org>
CC:	Andi Kleen <ak@...e.de>, discuss@...-64.org,
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH] x86-64: memset optimization

Stephen Hemminger a écrit :
> Optimize uses of memset with small constant offsets.
> This will generate smaller code, and avoid the slow rep/string instructions.
> Code copied from i386 with a little cleanup.
> 

You obviously didnt test it, did you ?

How can you be sure this is going to speedup things then ?

> Signed-off-by: Stephen Hemminger <shemminger@...ux-foundation.org>
> 
> --- a/include/asm-x86_64/string.h	2007-08-17 15:14:32.000000000 -0700
> +++ b/include/asm-x86_64/string.h	2007-08-17 15:36:30.000000000 -0700
> @@ -42,9 +42,51 @@ extern void *__memcpy(void *to, const vo
>  		 __ret = __builtin_memcpy((dst),(src),__len);	\
>  	   __ret; }) 
>  #endif
> -
>  #define __HAVE_ARCH_MEMSET
> -void *memset(void *s, int c, size_t n);
> +void *__memset(void *s, int c, size_t n);
> +
> +/* Optimize for cases of trivial memset's
> + * Compiler should optimize away all but the case used.
> + */
> +static __always_inline void *
> +__constant_c_and_count_memset(void *s, int c, size_t count)
> +{
> +	unsigned long pattern = 0x01010101UL * (unsigned char) c;

Main difference between x86_64 and i386 is sizeof(long) being 8 instead of 4

Why not let gcc do its job about memset() ?

On x86_64 at least, modern gcc are smart enough.

> +
> +	switch (count) {
> +	case 0:
> +		return s;
> +	case 1:
> +		*(unsigned char *)s = pattern;
> +		return s;
> +	case 2:
> +		*(unsigned short *)s = pattern;
> +		return s;
> +	case 3:
> +		*(unsigned short *)s = pattern;
> +		*(2+(unsigned char *)s) = pattern;
> +		return s;
> +	case 4:
> +		*(unsigned long *)s = pattern;
> +		return s;
> +	case 6:
> +		*(unsigned long *)s = pattern;
> +		*(2+(unsigned short *)s) = pattern;
> +		return s;
> +	case 8:
> +		*(unsigned long *)s = pattern;
> +		*(1+(unsigned long *)s) = pattern;
> +		return s;
> +	default:
> +		return __memset(s, c, count);
> +	}
> +}
> +#define memset(s, c, count)					\
> +	(__builtin_constant_p(c)				\
> +	 ? __constant_c_and_count_memset((s),(c),(count))	\
> +	 : __memset((s),(c),(count)))
> +
> +
>  
>  #define __HAVE_ARCH_MEMMOVE
>  void * memmove(void * dest,const void *src,size_t count);
> --- a/arch/x86_64/kernel/x8664_ksyms.c	2007-08-17 15:14:32.000000000 -0700
> +++ b/arch/x86_64/kernel/x8664_ksyms.c	2007-08-17 15:44:58.000000000 -0700
> @@ -48,10 +48,12 @@ EXPORT_SYMBOL(__read_lock_failed);
>  #undef memmove
>  
>  extern void * memset(void *,int,__kernel_size_t);
> +extern void * __memset(void *,int,__kernel_size_t);
>  extern void * memcpy(void *,const void *,__kernel_size_t);
>  extern void * __memcpy(void *,const void *,__kernel_size_t);
>  
>  EXPORT_SYMBOL(memset);
> +EXPORT_SYMBOL(__memset);
>  EXPORT_SYMBOL(memcpy);
>  EXPORT_SYMBOL(__memcpy);
>  

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ