[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <5097909002000078000A64C2@nat28.tlf.novell.com>
Date: Mon, 05 Nov 2012 09:10:24 +0000
From: "Jan Beulich" <JBeulich@...e.com>
To: "H. Peter Anvin" <hpa@...or.com>
Cc: <mingo@...e.hu>, <tglx@...utronix.de>,
"Konrad Rzeszutek Wilk" <konrad.wilk@...cle.com>,
<linux-kernel@...r.kernel.org>
Subject: Re: [PATCH 3/3, v2] x86/xor: make virtualization friendly
>>> On 02.11.12 at 18:30, "H. Peter Anvin" <hpa@...or.com> wrote:
> Aren't we actually talking just about PV here?
>
> If so the test is wrong.
No - this equally can affect "fully" virtualized guests (where the
CR0.TS accesses can involve VMEXIT-s).
Jan
> Jan Beulich <JBeulich@...e.com> wrote:
>
>>In virtualized environments, the CR0.TS management needed here can be a
>>lot slower than anticipated by the original authors of this code, which
>>particularly means that in such cases forcing the use of SSE- (or MMX-)
>>based implementations is not desirable - actual measurements should
>>always be done in that case.
>>
>>For consistency, pull into the shared (32- and 64-bit) header not only
>>the inclusion of the generic code, but also that of the AVX variants.
>>
>>Signed-off-by: Jan Beulich <jbeulich@...e.com>
>>Cc: Konrad Rzeszutek Wilk <konrad.wilk@...cle.com>
>>
>>---
>> arch/x86/include/asm/xor.h | 8 +++++++-
>> arch/x86/include/asm/xor_32.h | 22 ++++++++++------------
>> arch/x86/include/asm/xor_64.h | 10 ++++++----
>> 3 files changed, 23 insertions(+), 17 deletions(-)
>>
>>--- 3.7-rc3-x86-xor.orig/arch/x86/include/asm/xor.h
>>+++ 3.7-rc3-x86-xor/arch/x86/include/asm/xor.h
>>@@ -487,6 +487,12 @@ static struct xor_block_template xor_blo
>>
>> #undef XOR_CONSTANT_CONSTRAINT
>>
>>+/* Also try the AVX routines */
>>+#include <asm/xor_avx.h>
>>+
>>+/* Also try the generic routines. */
>>+#include <asm-generic/xor.h>
>>+
>> #ifdef CONFIG_X86_32
>> # include <asm/xor_32.h>
>> #else
>>@@ -494,6 +500,6 @@ static struct xor_block_template xor_blo
>> #endif
>>
>> #define XOR_SELECT_TEMPLATE(FASTEST) \
>>- AVX_SELECT(FASTEST)
>>+ (cpu_has_hypervisor ? (FASTEST) : AVX_SELECT(FASTEST))
>>
>> #endif /* _ASM_X86_XOR_H */
>>--- 3.7-rc3-x86-xor.orig/arch/x86/include/asm/xor_32.h
>>+++ 3.7-rc3-x86-xor/arch/x86/include/asm/xor_32.h
>>@@ -537,12 +537,6 @@ static struct xor_block_template xor_blo
>> .do_5 = xor_sse_5,
>> };
>>
>>-/* Also try the AVX routines */
>>-#include <asm/xor_avx.h>
>>-
>>-/* Also try the generic routines. */
>>-#include <asm-generic/xor.h>
>>-
>>/* We force the use of the SSE xor block because it can write around
>>L2.
>> We may also be able to load into the L1 only depending on how the cpu
>> deals with a load to a line that is being prefetched. */
>>@@ -553,15 +547,19 @@ do { \
>> if (cpu_has_xmm) { \
>> xor_speed(&xor_block_pIII_sse); \
>> xor_speed(&xor_block_sse_pf64); \
>>- } else if (cpu_has_mmx) { \
>>+ if (!cpu_has_hypervisor) \
>>+ break; \
>>+ } \
>>+ if (cpu_has_mmx) { \
>> xor_speed(&xor_block_pII_mmx); \
>> xor_speed(&xor_block_p5_mmx); \
>>- } else { \
>>- xor_speed(&xor_block_8regs); \
>>- xor_speed(&xor_block_8regs_p); \
>>- xor_speed(&xor_block_32regs); \
>>- xor_speed(&xor_block_32regs_p); \
>>+ if (!cpu_has_hypervisor) \
>>+ break; \
>> } \
>>+ xor_speed(&xor_block_8regs); \
>>+ xor_speed(&xor_block_8regs_p); \
>>+ xor_speed(&xor_block_32regs); \
>>+ xor_speed(&xor_block_32regs_p); \
>> } while (0)
>>
>> #endif /* _ASM_X86_XOR_32_H */
>>--- 3.7-rc3-x86-xor.orig/arch/x86/include/asm/xor_64.h
>>+++ 3.7-rc3-x86-xor/arch/x86/include/asm/xor_64.h
>>@@ -9,10 +9,6 @@ static struct xor_block_template xor_blo
>> .do_5 = xor_sse_5,
>> };
>>
>>-
>>-/* Also try the AVX routines */
>>-#include <asm/xor_avx.h>
>>-
>>/* We force the use of the SSE xor block because it can write around
>>L2.
>> We may also be able to load into the L1 only depending on how the cpu
>> deals with a load to a line that is being prefetched. */
>>@@ -22,6 +18,12 @@ do { \
>> AVX_XOR_SPEED; \
>> xor_speed(&xor_block_sse_pf64); \
>> xor_speed(&xor_block_sse); \
>>+ if (cpu_has_hypervisor) { \
>>+ xor_speed(&xor_block_8regs); \
>>+ xor_speed(&xor_block_8regs_p); \
>>+ xor_speed(&xor_block_32regs); \
>>+ xor_speed(&xor_block_32regs_p); \
>>+ } \
>> } while (0)
>>
>> #endif /* _ASM_X86_XOR_64_H */
>
> --
> Sent from my mobile phone. Please excuse brevity and lack of formatting.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists