lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1288368098-26121-2-git-send-email-mitake@dcl.info.waseda.ac.jp>
Date:	Sat, 30 Oct 2010 01:01:38 +0900
From:	Hitoshi Mitake <mitake@....info.waseda.ac.jp>
To:	Ingo Molnar <mingo@...e.hu>
Cc:	linux-kernel@...r.kernel.org, mitake@....info.waseda.ac.jp,
	h.mitake@...il.com, "Ma Ling:" <ling.ma@...el.com>,
	Zhao Yakui <yakui.zhao@...el.com>,
	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	Arnaldo Carvalho de Melo <acme@...hat.com>,
	Paul Mackerras <paulus@...ba.org>,
	Frederic Weisbecker <fweisbec@...il.com>,
	Steven Rostedt <rostedt@...dmis.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	"H. Peter Anvin" <hpa@...or.com>
Subject: [PATCH 2/2] perf bench: add x86-64 specific benchmarks to perf bench mem memcpy

This patch adds new file: mem-memcpy-x86-64-asm.S
for x86-64 specific memcpy() benchmarking.
Added new benchmarks are,
 x86-64-rep:      memcpy() implemented with rep instruction
 x86-64-unrolled: unrolled memcpy()

Original idea of including the source files of kernel
for benchmarking is suggested by Ingo Molnar.
This is more effective than write-once programs for quantitative
evaluation of in-kernel, little and leaf functions called high frequently.
Because perf bench is in kernel source tree and executing it
on various hardwares, especially new model CPUs, is easy.

This way can also be used for other functions of kernel e.g. checksum functions.

Example of usage on Core i3 M330:

| % ./perf bench mem memcpy -l 500MB
| # Running mem/memcpy benchmark...
| # Copying 500MB Bytes from 0x7f911f94c010 to 0x7f913ed4d010 ...
|
|      578.732506 MB/Sec
| % ./perf bench mem memcpy -l 500MB -r x86-64-rep
| # Running mem/memcpy benchmark...
| # Copying 500MB Bytes from 0x7fb4b6fe4010 to 0x7fb4d63e5010 ...
|
|      738.184980 MB/Sec
| % ./perf bench mem memcpy -l 500MB -r x86-64-unrolled
| # Running mem/memcpy benchmark...
| # Copying 500MB Bytes from 0x7f6f2e668010 to 0x7f6f4da69010 ...
|
|      767.483269 MB/Sec

This shows clearly that unrolled memcpy() is efficient
than rep version and glibc's one :)

# checkpatch.pl warns about two externs in bench/mem-memcpy.c
# added by this patch. But I think it is no problem.

Signed-off-by: Hitoshi Mitake <mitake@....info.waseda.ac.jp>
Cc: Ma Ling: <ling.ma@...el.com>
Cc: Zhao Yakui <yakui.zhao@...el.com>
Cc: Peter Zijlstra <a.p.zijlstra@...llo.nl>
Cc: Arnaldo Carvalho de Melo <acme@...hat.com>
Cc: Paul Mackerras <paulus@...ba.org>
Cc: Frederic Weisbecker <fweisbec@...il.com>
Cc: Steven Rostedt <rostedt@...dmis.org>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: H. Peter Anvin <hpa@...or.com>
---
 tools/perf/Makefile                      |    8 ++++++++
 tools/perf/bench/mem-memcpy-x86-64-asm.S |    4 ++++
 tools/perf/bench/mem-memcpy.c            |   14 ++++++++++++++
 3 files changed, 26 insertions(+), 0 deletions(-)
 create mode 100644 tools/perf/bench/mem-memcpy-x86-64-asm.S

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index d1db0f6..540020e 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -183,9 +183,12 @@ ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
 # Additional ARCH settings for x86
 ifeq ($(ARCH),i386)
         ARCH := x86
+	ARCH_CFLAGS = -DARCH_X86_64
 endif
 ifeq ($(ARCH),x86_64)
         ARCH := x86
+	ARCH_CFLAGS = -DARCH_X86_64
+	ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S
 endif
 
 # CFLAGS and LDFLAGS are for the users to override from the command line.
@@ -417,6 +420,7 @@ LIB_H += util/probe-finder.h
 LIB_H += util/probe-event.h
 LIB_H += util/pstack.h
 LIB_H += util/cpumap.h
+LIB_H += $(ARCH_INCLUDE)
 
 LIB_OBJS += $(OUTPUT)util/abspath.o
 LIB_OBJS += $(OUTPUT)util/alias.o
@@ -472,6 +476,9 @@ BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
 # Benchmark modules
 BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o
 BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
+ifeq ($(ARCH),x86)
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
+endif
 BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
 
 BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
@@ -898,6 +905,7 @@ BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \
 LIB_OBJS += $(COMPAT_OBJS)
 
 ALL_CFLAGS += $(BASIC_CFLAGS)
+ALL_CFLAGS += $(ARCH_CFLAGS)
 ALL_LDFLAGS += $(BASIC_LDFLAGS)
 
 export TAR INSTALL DESTDIR SHELL_PATH
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
new file mode 100644
index 0000000..6246d94
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S
@@ -0,0 +1,4 @@
+
+#define PERF_BENCH
+
+#include "../../../arch/x86/lib/memcpy_64.S"
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index 38dae74..ba73f39 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -19,6 +19,11 @@
 #include <sys/time.h>
 #include <errno.h>
 
+#ifdef ARCH_X86_64
+extern void *memcpy_x86_64_unrolled(void *to, const void *from, size_t len);
+extern void *memcpy_x86_64_rep(void *to, const void *from, size_t len);
+#endif
+
 #define K 1024
 
 static const char	*length_str	= "1MB";
@@ -47,6 +52,15 @@ struct routine routines[] = {
 	{ "default",
 	  "Default memcpy() provided by glibc",
 	  memcpy },
+#ifdef ARCH_X86_64
+	{ "x86-64-unrolled",
+	  "unrolled memcpy() in arch/x86/lib/memcpy_64.S",
+	  memcpy_x86_64_unrolled },
+	{ "x86-64-rep",
+	  "memcpy() implemented with rep instruction"
+	  " in arch/x86/lib/memcpy_64.S",
+	  memcpy_x86_64_rep },
+#endif
 	{ NULL,
 	  NULL,
 	  NULL   }
-- 
1.7.1.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ