lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220902130947.190618587@infradead.org>
Date:   Fri, 02 Sep 2022 15:06:33 +0200
From:   Peter Zijlstra <peterz@...radead.org>
To:     Thomas Gleixner <tglx@...utronix.de>
Cc:     linux-kernel@...r.kernel.org, peterz@...radead.org, x86@...nel.org,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        Tim Chen <tim.c.chen@...ux.intel.com>,
        Josh Poimboeuf <jpoimboe@...nel.org>,
        Andrew Cooper <Andrew.Cooper3@...rix.com>,
        Pawan Gupta <pawan.kumar.gupta@...ux.intel.com>,
        Johannes Wikner <kwikner@...z.ch>,
        Alyssa Milburn <alyssa.milburn@...ux.intel.com>,
        Jann Horn <jannh@...gle.com>, "H.J. Lu" <hjl.tools@...il.com>,
        Joao Moreira <joao.moreira@...el.com>,
        Joseph Nuzman <joseph.nuzman@...el.com>,
        Steven Rostedt <rostedt@...dmis.org>,
        Juergen Gross <jgross@...e.com>,
        Masami Hiramatsu <mhiramat@...nel.org>,
        Alexei Starovoitov <ast@...nel.org>,
        Daniel Borkmann <daniel@...earbox.net>,
        K Prateek Nayak <kprateek.nayak@....com>,
        Eric Dumazet <edumazet@...gle.com>
Subject: [PATCH v2 08/59] x86/build: Ensure proper function alignment

From: Thomas Gleixner <tglx@...utronix.de>

The Intel Architectures Optimization Reference Manual explains that
functions should be aligned at 16 bytes because for a lot of (Intel)
uarchs the I-fetch width is 16 bytes. The AMD Software Optimization
Guide (for recent chips) mentions a 32 byte I-fetch window but a 16
byte decode window.

Follow this advice and align functions to 16 bytes to optimize
instruction delivery to decode and reduce front-end bottlenecks.

Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
---
 arch/x86/Kconfig.cpu              |    6 ++++++
 arch/x86/Makefile                 |    4 ++++
 arch/x86/include/asm/linkage.h    |    7 ++++---
 include/asm-generic/vmlinux.lds.h |    7 ++++++-
 4 files changed, 20 insertions(+), 4 deletions(-)

--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -517,3 +517,9 @@ config CPU_SUP_VORTEX_32
 	  makes the kernel a tiny bit smaller.
 
 	  If unsure, say N.
+
+# Defined here so it is defined for UM too
+config FUNCTION_ALIGNMENT
+	int
+	default 16 if X86_64 || X86_ALIGNMENT_16
+	default 8
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -84,6 +84,10 @@ else
 KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
 endif
 
+ifneq ($(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B),y)
+KBUILD_CFLAGS += -falign-functions=$(CONFIG_FUNCTION_ALIGNMENT)
+endif
+
 ifeq ($(CONFIG_X86_32),y)
         BITS := 32
         UTS_MACHINE := i386
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -14,9 +14,10 @@
 
 #ifdef __ASSEMBLY__
 
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16)
-#define __ALIGN		.p2align 4, 0x90
-#define __ALIGN_STR	__stringify(__ALIGN)
+#if CONFIG_FUNCTION_ALIGNMENT == 16
+#define __ALIGN			.p2align 4, 0x90
+#define __ALIGN_STR		__stringify(__ALIGN)
+#define FUNCTION_ALIGNMENT	16
 #endif
 
 #if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -82,7 +82,12 @@
 #endif
 
 /* Align . to a 8 byte boundary equals to maximum function alignment. */
-#define ALIGN_FUNCTION()  . = ALIGN(8)
+#ifndef CONFIG_FUNCTION_ALIGNMENT
+#define __FUNCTION_ALIGNMENT	8
+#else
+#define __FUNCTION_ALIGNMENT	CONFIG_FUNCTION_ALIGNMENT
+#endif
+#define ALIGN_FUNCTION()  . = ALIGN(__FUNCTION_ALIGNMENT)
 
 /*
  * LD_DEAD_CODE_DATA_ELIMINATION option enables -fdata-sections, which


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ