lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20190704204737.5267-1-adobriyan@gmail.com>
Date:   Thu,  4 Jul 2019 23:47:33 +0300
From:   Alexey Dobriyan <adobriyan@...il.com>
To:     tglx@...utronix.de, mingo@...hat.com, bp@...en8.de, hpa@...or.com
Cc:     linux-kernel@...r.kernel.org, x86@...nel.org, adobriyan@...il.com
Subject: [PATCH 1/5] x86_64: -march=native support

I'm tired of rebasing it, so...

"-march=native" has been available in userspace for a long time and is
trivial to enable in Gentoo:

	$ grep -e ^CFLAGS /etc/portage/make.conf
	CFLAGS="-march=native -O2 -pipe"

Patchset enables kernel compile with "-march=native" and do additional
optimizations based on CPU detection. Unfortunately most of the fun is in
SSE2/AVX2 instructions and kernel can't use those. But I have ideas for
at least BMI2.

This is intended to be an alternative to old school MCORE2 options.
Gentoo also ships a patch unrolling all those individual -march= options
into kernel config options. This patch should deprecate it.

See the link for more information:

	https://www.shlomifish.org/humour/by-others/funroll-loops/Gentoo-is-Rice.html

Patch adds:
* -mgeneral-regs-only
	with -march=native all those shiny AVX42-666 instructions
	may suddenly became available

* small compile time partial CPUID detection,
* detect L1 cache shift at compile time,
* show "-march=native" line in /proc/config.gz,
* bump Kconfig "shell" output buffer to accomodate the option,

* inject individual MARCH_NATIVE options at compile time,
	see other patches.

Currently only Intel and gcc are supported.

Intel, because I never had and AMD box.

Gcc, because clang emits detailed "march=native" information in
a different way, so I need to think how to extract it reliably.

Size benchmarks, my trimmed down kernel:

	add/remove: 1/11 grow/shrink: 1856/5598 up/down: 14452/-65830 (-51378)
	Function                                     old     new   delta
	sha_transform                               4302    4606    +304
				...
	udf_write_fi                                1907    1023    -884
	Total: Before=7814760, After=7763382, chg -0.66%

This is mostly due to memset() un-unrolling.

In general, say, crypto and hash code becomes bigger because all those
rotations and shifts become RORX and SHLX instructions and those are 5+
bytes. Older compilers may also emit "REP RET" on generic kernels
because AMD, but upon detecting Intel those REP prefixes may go.

Users are advised to enable it and do their own benchmarks to decide if
it is worth the hassle.

Signed-off-by: Alexey Dobriyan <adobriyan@...il.com>
---
 Makefile                     |  4 ++
 arch/x86/Kconfig.cpu         | 20 +++++++++
 arch/x86/Makefile            |  1 +
 scripts/kconfig/.gitignore   |  1 +
 scripts/kconfig/Makefile     |  7 ++-
 scripts/kconfig/cpuid.c      | 85 ++++++++++++++++++++++++++++++++++++
 scripts/kconfig/preprocess.c |  2 +-
 scripts/march-native.sh      | 66 ++++++++++++++++++++++++++++
 8 files changed, 184 insertions(+), 2 deletions(-)
 create mode 100644 scripts/kconfig/cpuid.c
 create mode 100755 scripts/march-native.sh

diff --git a/Makefile b/Makefile
index 9ae48eef7095..9b0cfca01997 100644
--- a/Makefile
+++ b/Makefile
@@ -603,6 +603,10 @@ ifeq ($(dot-config),1)
 include include/config/auto.conf
 endif
 
+ifdef CONFIG_MARCH_NATIVE
+KBUILD_CFLAGS += -march=native
+endif
+
 ifeq ($(KBUILD_EXTMOD),)
 # Objects we will link into vmlinux / subdirs we need to visit
 init-y		:= init/
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 6adce15268bd..8b05816af329 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -287,8 +287,26 @@ config GENERIC_CPU
 	  Generic x86-64 CPU.
 	  Run equally well on all x86-64 CPUs.
 
+config MARCH_NATIVE
+	bool "-march=native"
+	depends on X86_64 && CC_IS_GCC
+	---help---
+	  Compile with -march=native.
+
+	  Optimise for the machine where compilation is done at. Resulting
+	  kernel and modules will not run reliably on a different machine
+	  unless exactly identical CPUs are used.
+
+	  Select only if you're self-compiling kernels and never share
+	  the binaries. If unsure, select "Generic x86_64".
+
 endchoice
 
+config MARCH_NATIVE_CC_FLAGS
+	string
+	depends on MARCH_NATIVE && CC_IS_GCC
+	default "$(shell,$(CC) -march=native -v -E -x c /dev/null 2>&1 | sed -ne '/^COLLECT_GCC_OPTIONS=/{n;p}' | awk '{$1=$1};1')"
+
 config X86_GENERIC
 	bool "Generic x86 support"
 	depends on X86_32
@@ -307,6 +325,7 @@ config X86_INTERNODE_CACHE_SHIFT
 	int
 	default "12" if X86_VSMP
 	default X86_L1_CACHE_SHIFT
+	depends on !MARCH_NATIVE
 
 config X86_L1_CACHE_SHIFT
 	int
@@ -314,6 +333,7 @@ config X86_L1_CACHE_SHIFT
 	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
 	default "4" if MELAN || M486 || MGEODEGX1
 	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
+	depends on !MARCH_NATIVE
 
 config X86_F00F_BUG
 	def_bool y
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 56e748a7679f..5d7355c88142 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -59,6 +59,7 @@ endif
 #
 KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
 KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
+KBUILD_CFLAGS += $(call cc-option,-mgeneral-regs-only)
 
 ifeq ($(CONFIG_X86_32),y)
         BITS := 32
diff --git a/scripts/kconfig/.gitignore b/scripts/kconfig/.gitignore
index b5bf92f66d11..411a885ad9b1 100644
--- a/scripts/kconfig/.gitignore
+++ b/scripts/kconfig/.gitignore
@@ -8,6 +8,7 @@
 # configuration programs
 #
 conf
+cpuid
 mconf
 nconf
 qconf
diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index 3f327e21f60e..26b3bcacb64a 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -65,8 +65,9 @@ simple-targets := oldconfig allnoconfig allyesconfig allmodconfig \
 	alldefconfig randconfig listnewconfig olddefconfig syncconfig
 PHONY += $(simple-targets)
 
-$(simple-targets): $(obj)/conf
+$(simple-targets): $(obj)/conf $(obj)/cpuid
 	$< $(silent) --$@ $(Kconfig)
+	$(Q)$(srctree)/scripts/march-native.sh $(CC) $(obj)/cpuid
 
 PHONY += savedefconfig defconfig
 
@@ -148,6 +149,10 @@ $(obj)/lexer.lex.o: $(obj)/parser.tab.h
 HOSTCFLAGS_lexer.lex.o	:= -I $(srctree)/$(src)
 HOSTCFLAGS_parser.tab.o	:= -I $(srctree)/$(src)
 
+# cpuid: -march=native, CONFIG_MARCH_NATIVE_* detection
+hostprogs-y	+= cpuid
+cpuid-objs	:= cpuid.o
+
 # conf: Used for defconfig, oldconfig and related targets
 hostprogs-y	+= conf
 conf-objs	:= conf.o $(common-objs)
diff --git a/scripts/kconfig/cpuid.c b/scripts/kconfig/cpuid.c
new file mode 100644
index 000000000000..81b292382e26
--- /dev/null
+++ b/scripts/kconfig/cpuid.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2017, 2019 Alexey Dobriyan <adobriyan@...il.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef __x86_64__
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static inline bool streq(const char *s1, const char *s2)
+{
+	return strcmp(s1, s2) == 0;
+}
+
+static inline void cpuid(uint32_t eax0, uint32_t *eax, uint32_t *ecx, uint32_t *edx, uint32_t *ebx)
+{
+	asm volatile (
+		"cpuid"
+		: "=a" (*eax), "=c" (*ecx), "=d" (*edx), "=b" (*ebx)
+		: "a" (eax0)
+	);
+}
+
+static inline void cpuid2(uint32_t eax0, uint32_t ecx0, uint32_t *eax, uint32_t *ecx, uint32_t *edx, uint32_t *ebx)
+{
+	asm volatile (
+		"cpuid"
+		: "=a" (*eax), "=c" (*ecx), "=d" (*edx), "=b" (*ebx)
+		: "a" (eax0), "c" (ecx0)
+	);
+}
+
+static uint32_t eax0_max;
+
+static void intel(void)
+{
+	uint32_t eax, ecx, edx, ebx;
+
+	if (eax0_max >= 1) {
+		cpuid(1, &eax, &ecx, &edx, &ebx);
+//		printf("%08x %08x %08x %08x\n", eax, ecx, edx, ebx);
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	const char *opt = argv[1];
+	uint32_t eax, ecx, edx, ebx;
+
+	if (argc != 2)
+		return EXIT_FAILURE;
+
+	cpuid(0, &eax, &ecx, &edx, &ebx);
+//	printf("%08x %08x %08x %08x\n", eax, ecx, edx, ebx);
+	eax0_max = eax;
+
+	if (ecx == 0x6c65746e && edx == 0x49656e69 && ebx == 0x756e6547) {
+		intel();
+	}
+
+#define _(x)	if (streq(opt, #x)) return x ? EXIT_SUCCESS : EXIT_FAILURE
+#undef _
+
+	return EXIT_FAILURE;
+}
+#else
+#include <stdlib.h>
+int main(void)
+{
+	return EXIT_FAILURE;
+}
+#endif
diff --git a/scripts/kconfig/preprocess.c b/scripts/kconfig/preprocess.c
index 592dfbfa9fb3..efe5e28bf814 100644
--- a/scripts/kconfig/preprocess.c
+++ b/scripts/kconfig/preprocess.c
@@ -140,7 +140,7 @@ static char *do_lineno(int argc, char *argv[])
 static char *do_shell(int argc, char *argv[])
 {
 	FILE *p;
-	char buf[256];
+	char buf[2048];
 	char *cmd;
 	size_t nread;
 	int i;
diff --git a/scripts/march-native.sh b/scripts/march-native.sh
new file mode 100755
index 000000000000..29a33c80b62b
--- /dev/null
+++ b/scripts/march-native.sh
@@ -0,0 +1,66 @@
+#!/bin/sh
+# Copyright (c) 2017-2019 Alexey Dobriyan <adobriyan@...il.com>
+if test "$(uname -m)" != "x86_64"; then
+	exit 0
+fi
+
+CC="$1"
+CPUID="$2"
+CONFIG=".config"
+AUTOCONF1="include/config/auto.conf"
+AUTOCONF2="include/generated/autoconf.h"
+
+if ! grep -q -e '^CONFIG_MARCH_NATIVE=y$' "$CONFIG"; then
+	sed -i -e '/^CONFIG_MARCH_NATIVE/d' "$AUTOCONF1" "$AUTOCONF2" >/dev/null 2>&1
+	exit 0
+fi
+
+if ! "$CC" -march=native -x c -c -o /dev/null /dev/null >/dev/null 2>&1; then
+	echo >&2 "error: unsupported '-march=native' compiler option"
+	exit 1
+fi
+
+_option() {
+	echo "$1=$2"		>>"$AUTOCONF1"
+	echo "#define $1 $2"	>>"$AUTOCONF2"
+}
+
+option() {
+	echo "$1=y"		>>"$AUTOCONF1"
+	echo "#define $1 1"	>>"$AUTOCONF2"
+}
+
+if test ! -f "$CONFIG" -o ! -f "$AUTOCONF1" -o ! -f "$AUTOCONF2"; then
+	exit 0
+fi
+
+COLLECT_GCC_OPTIONS=$(
+	"$CC" -march=native -v -E -x c /dev/null 2>&1	|\
+	sed -ne '/^COLLECT_GCC_OPTIONS=/{n;p}'		|\
+	awk '{$1=$1};1'
+)
+echo "-march=native: $COLLECT_GCC_OPTIONS"
+
+for i in $COLLECT_GCC_OPTIONS; do
+	case $i in
+		*/cc1|-E|-quiet|-v|/dev/null|--param|-fstack-protector*)
+			;;
+
+		l1-cache-line-size=64)
+			_option "CONFIG_X86_L1_CACHE_SHIFT"		6
+			_option "CONFIG_X86_INTERNODE_CACHE_SHIFT"	6
+			;;
+
+		l1-cache-size=*);;
+		l2-cache-size=*);;
+
+		-march=*);;
+		-mtune=*);;
+
+		-m*);;
+		-mno-*);;
+
+		*)
+			echo >&2 "warning: unexpected -march=native option '$i'"
+	esac
+done
-- 
2.21.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ