linux-kernel - Re: [PATCH] x86: Align TLB invalidation info

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-Id: <BD28D2CF-CAD7-45A1-87D0-DE38F7B0EFE5@amacapital.net>
Date:   Wed, 31 Jan 2018 12:24:44 -0800
From:   Andy Lutomirski <luto@...capital.net>
To:     Nadav Amit <namit@...are.com>
Cc:     x86@...nel.org, Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...hat.com>,
        "H. Peter Anvin" <hpa@...or.com>, linux-kernel@...r.kernel.org,
        Peter Zijlstra <peterz@...radead.org>,
        Nadav Amit <nadav.amit@...il.com>,
        Andy Lutomirski <luto@...nel.org>,
        Dave Hansen <dave.hansen@...ux.intel.com>
Subject: Re: [PATCH] x86: Align TLB invalidation info



> On Jan 31, 2018, at 12:11 PM, Nadav Amit <namit@...are.com> wrote:
> 
> The TLB invalidation info is allocated on the stack, which might cause
> it to be unaligned. Since this information may be transferred to
> different cores for TLB shootdown, this might result in an additional
> cache-line bouncing between the cores.
> 
> GCC provides a way to deal with it by using
> __builtin_alloca_with_align(). Use it to avoid the bouncing cache lines.
> 

Eww.  How about __aligned?


> Signed-off-by: Nadav Amit <namit@...are.com>
> 
> Cc: Andy Lutomirski <luto@...nel.org>
> Cc: Dave Hansen <dave.hansen@...ux.intel.com>
> ---
> arch/x86/mm/tlb.c              | 21 +++++++++++----------
> include/linux/compiler-gcc.h   |  5 +++++
> include/linux/compiler_types.h |  4 ++++
> 3 files changed, 20 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
> index 5bfe61a5e8e3..bab7bb5d982f 100644
> --- a/arch/x86/mm/tlb.c
> +++ b/arch/x86/mm/tlb.c
> @@ -574,37 +574,38 @@ static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
> void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
>                unsigned long end, unsigned long vmflag)
> {
> +    struct flush_tlb_info *info;
>    int cpu;
> 
> -    struct flush_tlb_info info = {
> -        .mm = mm,
> -    };
> +    info = __alloca_with_align(sizeof(*info),
> +                   SMP_CACHE_BYTES * BITS_PER_BYTE);
> +    info->mm = mm;
> 
>    cpu = get_cpu();
> 
>    /* This is also a barrier that synchronizes with switch_mm(). */
> -    info.new_tlb_gen = inc_mm_tlb_gen(mm);
> +    info->new_tlb_gen = inc_mm_tlb_gen(mm);
> 
>    /* Should we flush just the requested range? */
>    if ((end != TLB_FLUSH_ALL) &&
>        !(vmflag & VM_HUGETLB) &&
>        ((end - start) >> PAGE_SHIFT) <= tlb_single_page_flush_ceiling) {
> -        info.start = start;
> -        info.end = end;
> +        info->start = start;
> +        info->end = end;
>    } else {
> -        info.start = 0UL;
> -        info.end = TLB_FLUSH_ALL;
> +        info->start = 0UL;
> +        info->end = TLB_FLUSH_ALL;
>    }
> 
>    if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
>        VM_WARN_ON(irqs_disabled());
>        local_irq_disable();
> -        flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN);
> +        flush_tlb_func_local(info, TLB_LOCAL_MM_SHOOTDOWN);
>        local_irq_enable();
>    }
> 
>    if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
> -        flush_tlb_others(mm_cpumask(mm), &info);
> +        flush_tlb_others(mm_cpumask(mm), info);
> 
>    put_cpu();
> }
> diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
> index 631354acfa72..aea9a2e69417 100644
> --- a/include/linux/compiler-gcc.h
> +++ b/include/linux/compiler-gcc.h
> @@ -314,6 +314,11 @@
> #define __designated_init __attribute__((designated_init))
> #endif
> 
> +#if GCC_VERSION >= 60100
> +#define __alloca_with_align(size, alignment)                \
> +    __builtin_alloca_with_align(size, alignment)
> +#endif
> +
> #endif    /* gcc version >= 40000 specific checks */
> 
> #if !defined(__noclone)
> diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
> index 6b79a9bba9a7..c71297d95c74 100644
> --- a/include/linux/compiler_types.h
> +++ b/include/linux/compiler_types.h
> @@ -271,4 +271,8 @@ struct ftrace_likely_data {
> # define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
> #endif
> 
> +#ifndef __alloca_with_align
> +#define __alloca_with_align(size, alignment) __builtin_alloca(size)
> +#endif
> +
> #endif /* __LINUX_COMPILER_TYPES_H */
> -- 
> 2.14.1
>