lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20180131201118.1694-1-namit@vmware.com>
Date:   Wed, 31 Jan 2018 12:11:18 -0800
From:   Nadav Amit <namit@...are.com>
To:     <x86@...nel.org>
CC:     Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...hat.com>,
        "H. Peter Anvin" <hpa@...or.com>, <linux-kernel@...r.kernel.org>,
        Peter Zijlstra <peterz@...radead.org>,
        Nadav Amit <nadav.amit@...il.com>,
        Nadav Amit <namit@...are.com>,
        Andy Lutomirski <luto@...nel.org>,
        Dave Hansen <dave.hansen@...ux.intel.com>
Subject: [PATCH] x86: Align TLB invalidation info

The TLB invalidation info is allocated on the stack, which might cause
it to be unaligned. Since this information may be transferred to
different cores for TLB shootdown, this might result in an additional
cache-line bouncing between the cores.

GCC provides a way to deal with it by using
__builtin_alloca_with_align(). Use it to avoid the bouncing cache lines.

Signed-off-by: Nadav Amit <namit@...are.com>

Cc: Andy Lutomirski <luto@...nel.org>
Cc: Dave Hansen <dave.hansen@...ux.intel.com>
---
 arch/x86/mm/tlb.c              | 21 +++++++++++----------
 include/linux/compiler-gcc.h   |  5 +++++
 include/linux/compiler_types.h |  4 ++++
 3 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 5bfe61a5e8e3..bab7bb5d982f 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -574,37 +574,38 @@ static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 				unsigned long end, unsigned long vmflag)
 {
+	struct flush_tlb_info *info;
 	int cpu;
 
-	struct flush_tlb_info info = {
-		.mm = mm,
-	};
+	info = __alloca_with_align(sizeof(*info),
+				   SMP_CACHE_BYTES * BITS_PER_BYTE);
+	info->mm = mm;
 
 	cpu = get_cpu();
 
 	/* This is also a barrier that synchronizes with switch_mm(). */
-	info.new_tlb_gen = inc_mm_tlb_gen(mm);
+	info->new_tlb_gen = inc_mm_tlb_gen(mm);
 
 	/* Should we flush just the requested range? */
 	if ((end != TLB_FLUSH_ALL) &&
 	    !(vmflag & VM_HUGETLB) &&
 	    ((end - start) >> PAGE_SHIFT) <= tlb_single_page_flush_ceiling) {
-		info.start = start;
-		info.end = end;
+		info->start = start;
+		info->end = end;
 	} else {
-		info.start = 0UL;
-		info.end = TLB_FLUSH_ALL;
+		info->start = 0UL;
+		info->end = TLB_FLUSH_ALL;
 	}
 
 	if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
 		VM_WARN_ON(irqs_disabled());
 		local_irq_disable();
-		flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN);
+		flush_tlb_func_local(info, TLB_LOCAL_MM_SHOOTDOWN);
 		local_irq_enable();
 	}
 
 	if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
-		flush_tlb_others(mm_cpumask(mm), &info);
+		flush_tlb_others(mm_cpumask(mm), info);
 
 	put_cpu();
 }
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 631354acfa72..aea9a2e69417 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -314,6 +314,11 @@
 #define __designated_init __attribute__((designated_init))
 #endif
 
+#if GCC_VERSION >= 60100
+#define __alloca_with_align(size, alignment)				\
+	__builtin_alloca_with_align(size, alignment)
+#endif
+
 #endif	/* gcc version >= 40000 specific checks */
 
 #if !defined(__noclone)
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 6b79a9bba9a7..c71297d95c74 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -271,4 +271,8 @@ struct ftrace_likely_data {
 # define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
 #endif
 
+#ifndef __alloca_with_align
+#define __alloca_with_align(size, alignment) __builtin_alloca(size)
+#endif
+
 #endif /* __LINUX_COMPILER_TYPES_H */
-- 
2.14.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ