linux-kernel - Re: [PATCH 3/3, v2] x86/xor: make virtualization friendly

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite for Android: free password hash cracker in your pocket

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-Id: <5097909002000078000A64C2@nat28.tlf.novell.com>
Date:	Mon, 05 Nov 2012 09:10:24 +0000
From:	"Jan Beulich" <JBeulich@...e.com>
To:	"H. Peter Anvin" <hpa@...or.com>
Cc:	<mingo@...e.hu>, <tglx@...utronix.de>,
	"Konrad Rzeszutek Wilk" <konrad.wilk@...cle.com>,
	<linux-kernel@...r.kernel.org>
Subject: Re: [PATCH 3/3, v2] x86/xor: make virtualization friendly

>>> On 02.11.12 at 18:30, "H. Peter Anvin" <hpa@...or.com> wrote:
> Aren't we actually talking just about PV here?
> 
> If so the test is wrong.

No - this equally can affect "fully" virtualized guests (where the
CR0.TS accesses can involve VMEXIT-s).

Jan

> Jan Beulich <JBeulich@...e.com> wrote:
> 
>>In virtualized environments, the CR0.TS management needed here can be a
>>lot slower than anticipated by the original authors of this code, which
>>particularly means that in such cases forcing the use of SSE- (or MMX-)
>>based implementations is not desirable - actual measurements should
>>always be done in that case.
>>
>>For consistency, pull into the shared (32- and 64-bit) header not only
>>the inclusion of the generic code, but also that of the AVX variants.
>>
>>Signed-off-by: Jan Beulich <jbeulich@...e.com>
>>Cc: Konrad Rzeszutek Wilk <konrad.wilk@...cle.com>
>>
>>---
>> arch/x86/include/asm/xor.h    |    8 +++++++-
>> arch/x86/include/asm/xor_32.h |   22 ++++++++++------------
>> arch/x86/include/asm/xor_64.h |   10 ++++++----
>> 3 files changed, 23 insertions(+), 17 deletions(-)
>>
>>--- 3.7-rc3-x86-xor.orig/arch/x86/include/asm/xor.h
>>+++ 3.7-rc3-x86-xor/arch/x86/include/asm/xor.h
>>@@ -487,6 +487,12 @@ static struct xor_block_template xor_blo
>> 
>> #undef XOR_CONSTANT_CONSTRAINT
>> 
>>+/* Also try the AVX routines */
>>+#include <asm/xor_avx.h>
>>+
>>+/* Also try the generic routines. */
>>+#include <asm-generic/xor.h>
>>+
>> #ifdef CONFIG_X86_32
>> # include <asm/xor_32.h>
>> #else
>>@@ -494,6 +500,6 @@ static struct xor_block_template xor_blo
>> #endif
>> 
>> #define XOR_SELECT_TEMPLATE(FASTEST) \
>>-	AVX_SELECT(FASTEST)
>>+	(cpu_has_hypervisor ? (FASTEST) : AVX_SELECT(FASTEST))
>> 
>> #endif /* _ASM_X86_XOR_H */
>>--- 3.7-rc3-x86-xor.orig/arch/x86/include/asm/xor_32.h
>>+++ 3.7-rc3-x86-xor/arch/x86/include/asm/xor_32.h
>>@@ -537,12 +537,6 @@ static struct xor_block_template xor_blo
>> 	.do_5 = xor_sse_5,
>> };
>> 
>>-/* Also try the AVX routines */
>>-#include <asm/xor_avx.h>
>>-
>>-/* Also try the generic routines.  */
>>-#include <asm-generic/xor.h>
>>-
>>/* We force the use of the SSE xor block because it can write around
>>L2.
>>  We may also be able to load into the L1 only depending on how the cpu
>>    deals with a load to a line that is being prefetched.  */
>>@@ -553,15 +547,19 @@ do {							\
>> 	if (cpu_has_xmm) {				\
>> 		xor_speed(&xor_block_pIII_sse);		\
>> 		xor_speed(&xor_block_sse_pf64);		\
>>-	} else if (cpu_has_mmx) {			\
>>+		if (!cpu_has_hypervisor)		\
>>+			break;				\
>>+	}						\
>>+	if (cpu_has_mmx) {				\
>> 		xor_speed(&xor_block_pII_mmx);		\
>> 		xor_speed(&xor_block_p5_mmx);		\
>>-	} else {					\
>>-		xor_speed(&xor_block_8regs);		\
>>-		xor_speed(&xor_block_8regs_p);		\
>>-		xor_speed(&xor_block_32regs);		\
>>-		xor_speed(&xor_block_32regs_p);		\
>>+		if (!cpu_has_hypervisor)		\
>>+			break;				\
>> 	}						\
>>+	xor_speed(&xor_block_8regs);			\
>>+	xor_speed(&xor_block_8regs_p);			\
>>+	xor_speed(&xor_block_32regs);			\
>>+	xor_speed(&xor_block_32regs_p);			\
>> } while (0)
>> 
>> #endif /* _ASM_X86_XOR_32_H */
>>--- 3.7-rc3-x86-xor.orig/arch/x86/include/asm/xor_64.h
>>+++ 3.7-rc3-x86-xor/arch/x86/include/asm/xor_64.h
>>@@ -9,10 +9,6 @@ static struct xor_block_template xor_blo
>> 	.do_5 = xor_sse_5,
>> };
>> 
>>-
>>-/* Also try the AVX routines */
>>-#include <asm/xor_avx.h>
>>-
>>/* We force the use of the SSE xor block because it can write around
>>L2.
>>  We may also be able to load into the L1 only depending on how the cpu
>>    deals with a load to a line that is being prefetched.  */
>>@@ -22,6 +18,12 @@ do {						\
>> 	AVX_XOR_SPEED;				\
>> 	xor_speed(&xor_block_sse_pf64);		\
>> 	xor_speed(&xor_block_sse);		\
>>+	if (cpu_has_hypervisor) {		\
>>+		xor_speed(&xor_block_8regs);	\
>>+		xor_speed(&xor_block_8regs_p);	\
>>+		xor_speed(&xor_block_32regs);	\
>>+		xor_speed(&xor_block_32regs_p);	\
>>+	}					\
>> } while (0)
>> 
>> #endif /* _ASM_X86_XOR_64_H */
> 
> -- 
> Sent from my mobile phone. Please excuse brevity and lack of formatting.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/