lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <c9840e69-aa7f-4ba5-a981-e330f4fba297@email.android.com>
Date:	Fri, 02 Nov 2012 10:30:08 -0700
From:	"H. Peter Anvin" <hpa@...or.com>
To:	Jan Beulich <JBeulich@...e.com>, mingo@...e.hu, tglx@...utronix.de
CC:	Konrad Rzeszutek Wilk <konrad.wilk@...cle.com>,
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH 3/3, v2] x86/xor: make virtualization friendly

Aren't we actually talking just about PV here?

If so the test is wrong.

Jan Beulich <JBeulich@...e.com> wrote:

>In virtualized environments, the CR0.TS management needed here can be a
>lot slower than anticipated by the original authors of this code, which
>particularly means that in such cases forcing the use of SSE- (or MMX-)
>based implementations is not desirable - actual measurements should
>always be done in that case.
>
>For consistency, pull into the shared (32- and 64-bit) header not only
>the inclusion of the generic code, but also that of the AVX variants.
>
>Signed-off-by: Jan Beulich <jbeulich@...e.com>
>Cc: Konrad Rzeszutek Wilk <konrad.wilk@...cle.com>
>
>---
> arch/x86/include/asm/xor.h    |    8 +++++++-
> arch/x86/include/asm/xor_32.h |   22 ++++++++++------------
> arch/x86/include/asm/xor_64.h |   10 ++++++----
> 3 files changed, 23 insertions(+), 17 deletions(-)
>
>--- 3.7-rc3-x86-xor.orig/arch/x86/include/asm/xor.h
>+++ 3.7-rc3-x86-xor/arch/x86/include/asm/xor.h
>@@ -487,6 +487,12 @@ static struct xor_block_template xor_blo
> 
> #undef XOR_CONSTANT_CONSTRAINT
> 
>+/* Also try the AVX routines */
>+#include <asm/xor_avx.h>
>+
>+/* Also try the generic routines. */
>+#include <asm-generic/xor.h>
>+
> #ifdef CONFIG_X86_32
> # include <asm/xor_32.h>
> #else
>@@ -494,6 +500,6 @@ static struct xor_block_template xor_blo
> #endif
> 
> #define XOR_SELECT_TEMPLATE(FASTEST) \
>-	AVX_SELECT(FASTEST)
>+	(cpu_has_hypervisor ? (FASTEST) : AVX_SELECT(FASTEST))
> 
> #endif /* _ASM_X86_XOR_H */
>--- 3.7-rc3-x86-xor.orig/arch/x86/include/asm/xor_32.h
>+++ 3.7-rc3-x86-xor/arch/x86/include/asm/xor_32.h
>@@ -537,12 +537,6 @@ static struct xor_block_template xor_blo
> 	.do_5 = xor_sse_5,
> };
> 
>-/* Also try the AVX routines */
>-#include <asm/xor_avx.h>
>-
>-/* Also try the generic routines.  */
>-#include <asm-generic/xor.h>
>-
>/* We force the use of the SSE xor block because it can write around
>L2.
>  We may also be able to load into the L1 only depending on how the cpu
>    deals with a load to a line that is being prefetched.  */
>@@ -553,15 +547,19 @@ do {							\
> 	if (cpu_has_xmm) {				\
> 		xor_speed(&xor_block_pIII_sse);		\
> 		xor_speed(&xor_block_sse_pf64);		\
>-	} else if (cpu_has_mmx) {			\
>+		if (!cpu_has_hypervisor)		\
>+			break;				\
>+	}						\
>+	if (cpu_has_mmx) {				\
> 		xor_speed(&xor_block_pII_mmx);		\
> 		xor_speed(&xor_block_p5_mmx);		\
>-	} else {					\
>-		xor_speed(&xor_block_8regs);		\
>-		xor_speed(&xor_block_8regs_p);		\
>-		xor_speed(&xor_block_32regs);		\
>-		xor_speed(&xor_block_32regs_p);		\
>+		if (!cpu_has_hypervisor)		\
>+			break;				\
> 	}						\
>+	xor_speed(&xor_block_8regs);			\
>+	xor_speed(&xor_block_8regs_p);			\
>+	xor_speed(&xor_block_32regs);			\
>+	xor_speed(&xor_block_32regs_p);			\
> } while (0)
> 
> #endif /* _ASM_X86_XOR_32_H */
>--- 3.7-rc3-x86-xor.orig/arch/x86/include/asm/xor_64.h
>+++ 3.7-rc3-x86-xor/arch/x86/include/asm/xor_64.h
>@@ -9,10 +9,6 @@ static struct xor_block_template xor_blo
> 	.do_5 = xor_sse_5,
> };
> 
>-
>-/* Also try the AVX routines */
>-#include <asm/xor_avx.h>
>-
>/* We force the use of the SSE xor block because it can write around
>L2.
>  We may also be able to load into the L1 only depending on how the cpu
>    deals with a load to a line that is being prefetched.  */
>@@ -22,6 +18,12 @@ do {						\
> 	AVX_XOR_SPEED;				\
> 	xor_speed(&xor_block_sse_pf64);		\
> 	xor_speed(&xor_block_sse);		\
>+	if (cpu_has_hypervisor) {		\
>+		xor_speed(&xor_block_8regs);	\
>+		xor_speed(&xor_block_8regs_p);	\
>+		xor_speed(&xor_block_32regs);	\
>+		xor_speed(&xor_block_32regs_p);	\
>+	}					\
> } while (0)
> 
> #endif /* _ASM_X86_XOR_64_H */

-- 
Sent from my mobile phone. Please excuse brevity and lack of formatting.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ