[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e500fe9a-5c62-8304-92f1-18a2ee185fe4@amd.com>
Date: Fri, 24 Apr 2020 14:04:15 -0500
From: Tom Lendacky <thomas.lendacky@....com>
To: Balbir Singh <sblbir@...zon.com>, tglx@...utronix.de,
linux-kernel@...r.kernel.org
Cc: jpoimboe@...hat.com, tony.luck@...el.com, keescook@...omium.org,
benh@...nel.crashing.org, x86@...nel.org, dave.hansen@...el.com
Subject: Re: [PATCH v4 2/6] arch/x86/kvm: Refactor tlbflush and l1d flush
On 4/23/20 9:01 AM, Balbir Singh wrote:
> Refactor the existing assembly bits into smaller helper functions
> and also abstract L1D_FLUSH into a helper function. Use these
> functions in kvm for L1D flushing.
>
> Reviewed-by: Kees Cook <keescook@...omium.org>
> Signed-off-by: Balbir Singh <sblbir@...zon.com>
> ---
> arch/x86/include/asm/cacheflush.h | 3 ++
> arch/x86/kernel/l1d_flush.c | 54 +++++++++++++++++++++++++++++++
> arch/x86/kvm/vmx/vmx.c | 29 ++---------------
> 3 files changed, 60 insertions(+), 26 deletions(-)
>
> diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
> index bac56fcd9790..21cc3b28fa63 100644
> --- a/arch/x86/include/asm/cacheflush.h
> +++ b/arch/x86/include/asm/cacheflush.h
> @@ -8,7 +8,10 @@
>
> #define L1D_CACHE_ORDER 4
> void clflush_cache_range(void *addr, unsigned int size);
> +void l1d_flush_populate_tlb(void *l1d_flush_pages);
> void *l1d_flush_alloc_pages(void);
> void l1d_flush_cleanup_pages(void *l1d_flush_pages);
> +void l1d_flush_sw(void *l1d_flush_pages);
> +int l1d_flush_hw(void);
>
> #endif /* _ASM_X86_CACHEFLUSH_H */
> diff --git a/arch/x86/kernel/l1d_flush.c b/arch/x86/kernel/l1d_flush.c
> index d605878c8f28..5871794f890d 100644
> --- a/arch/x86/kernel/l1d_flush.c
> +++ b/arch/x86/kernel/l1d_flush.c
> @@ -34,3 +34,57 @@ void l1d_flush_cleanup_pages(void *l1d_flush_pages)
> free_pages((unsigned long)l1d_flush_pages, L1D_CACHE_ORDER);
> }
> EXPORT_SYMBOL_GPL(l1d_flush_cleanup_pages);
> +
> +/*
> + * Not all users of l1d flush would want to populate the TLB first
> + * split out the function so that callers can optionally flush the L1D
> + * cache via sw without prefetching the TLB.
> + */
> +void l1d_flush_populate_tlb(void *l1d_flush_pages)
> +{
> + int size = PAGE_SIZE << L1D_CACHE_ORDER;
> +
> + asm volatile(
> + /* First ensure the pages are in the TLB */
> + "xorl %%eax, %%eax\n"
> + ".Lpopulate_tlb:\n\t"
> + "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
> + "addl $4096, %%eax\n\t"
> + "cmpl %%eax, %[size]\n\t"
> + "jne .Lpopulate_tlb\n\t"
> + "xorl %%eax, %%eax\n\t"
> + "cpuid\n\t"
> + :: [flush_pages] "r" (l1d_flush_pages),
> + [size] "r" (size)
> + : "eax", "ebx", "ecx", "edx");
> +}
> +EXPORT_SYMBOL_GPL(l1d_flush_populate_tlb);
> +
> +int l1d_flush_hw(void)
> +{
> + if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
> + wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
> + return 0;
> + }
> + return -ENOTSUPP;
> +}
> +EXPORT_SYMBOL_GPL(l1d_flush_hw);
> +
> +void l1d_flush_sw(void *l1d_flush_pages)
> +{
> + int size = PAGE_SIZE << L1D_CACHE_ORDER;
> +
> + asm volatile(
> + /* Fill the cache */
> + "xorl %%eax, %%eax\n"
> + ".Lfill_cache:\n"
> + "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
> + "addl $64, %%eax\n\t"
> + "cmpl %%eax, %[size]\n\t"
> + "jne .Lfill_cache\n\t"
> + "lfence\n"
> + :: [flush_pages] "r" (l1d_flush_pages),
> + [size] "r" (size)
> + : "eax", "ecx");
> +}
If the answer to my previous question in patch 1/6 about the allocation
being twice the size is yes, then could you allocate the flush pages the
same size as the cache and just write it twice? Wouldn't that accomplish
the same goal and provide a performance improvement with (mostly) now
present L1D entries of the flush pages? Also, can't you unroll this loop a
bit and operate on multiple entries in each loop, reducing the overall
looping compares and jumps as an optimization?
Thanks,
Tom
> +EXPORT_SYMBOL_GPL(l1d_flush_sw);
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 225aa8219bac..786d1615a09f 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -5983,8 +5983,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu,
> */
> static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
> {
> - int size = PAGE_SIZE << L1D_CACHE_ORDER;
> -
> /*
> * This code is only executed when the the flush mode is 'cond' or
> * 'always'
> @@ -6013,32 +6011,11 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
>
> vcpu->stat.l1d_flush++;
>
> - if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
> - wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
> + if (!l1d_flush_hw())
> return;
> - }
>
> - asm volatile(
> - /* First ensure the pages are in the TLB */
> - "xorl %%eax, %%eax\n"
> - ".Lpopulate_tlb:\n\t"
> - "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
> - "addl $4096, %%eax\n\t"
> - "cmpl %%eax, %[size]\n\t"
> - "jne .Lpopulate_tlb\n\t"
> - "xorl %%eax, %%eax\n\t"
> - "cpuid\n\t"
> - /* Now fill the cache */
> - "xorl %%eax, %%eax\n"
> - ".Lfill_cache:\n"
> - "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
> - "addl $64, %%eax\n\t"
> - "cmpl %%eax, %[size]\n\t"
> - "jne .Lfill_cache\n\t"
> - "lfence\n"
> - :: [flush_pages] "r" (vmx_l1d_flush_pages),
> - [size] "r" (size)
> - : "eax", "ebx", "ecx", "edx");
> + l1d_flush_populate_tlb(vmx_l1d_flush_pages);
> + l1d_flush_sw(vmx_l1d_flush_pages);
> }
>
> static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
>
Powered by blists - more mailing lists