lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Date:	Wed,  8 Dec 2010 01:01:59 +0900
From:	Hitoshi Mitake <mitake@....info.waseda.ac.jp>
To:	Ingo Molnar <mingo@...e.hu>
Cc:	linux-kernel@...r.kernel.org, mitake@....info.waseda.ac.jp,
	h.mitake@...il.com, Miao Xie <miaox@...fujitsu.com>,
	Ma Ling <ling.ma@...el.com>, Zhao Yakui <yakui.zhao@...el.com>,
	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	Arnaldo Carvalho de Melo <acme@...hat.com>,
	Paul Mackerras <paulus@...ba.org>,
	Frederic Weisbecker <fweisbec@...il.com>,
	Steven Rostedt <rostedt@...dmis.org>,
	Andi Kleen <andi@...stfloor.org>
Subject: [PATCH] perf bench: Add options for specifying access alignment to "mem memcpy"

Hi Ingo,

Alignment of memory access can cause performance degradation
in simple memory copy. So this patch adds the option to
specify access alignment used when calling memcpy().

Current maximum alignment is 8 byte, should this value
can be configurable?

I'll test Miao Xie's patch with this option later.

Example of use:
| mitake@...1i:~/linux/.../tools/perf% ./perf bench mem memcpy -l 500MB -r x86-64-unrolled
| # Running mem/memcpy benchmark...
| # Copying 500MB Bytes ...
|
|      748.866217 MB/Sec
|        4.521793 GB/Sec (with prefault)
| mitake@...1i:~/linux/.../tools/perf% ./perf bench mem memcpy -l 500MB -r x86-64-unrolled -d 3
| # Running mem/memcpy benchmark...
| # Copying 500MB Bytes ...
|
|      769.653487 MB/Sec
|        3.518181 GB/Sec (with prefault)

In latter case, access to destination memory ragion is shifted 3 bytes,
and performance degradation is observed in prefaulted copy.

Signed-off-by: Hitoshi Mitake <mitake@....info.waseda.ac.jp>
Cc: Miao Xie <miaox@...fujitsu.com>
Cc: Ma Ling <ling.ma@...el.com>
Cc: Zhao Yakui <yakui.zhao@...el.com>
Cc: Peter Zijlstra <a.p.zijlstra@...llo.nl>
Cc: Arnaldo Carvalho de Melo <acme@...hat.com>
Cc: Paul Mackerras <paulus@...ba.org>
Cc: Frederic Weisbecker <fweisbec@...il.com>
Cc: Steven Rostedt <rostedt@...dmis.org>
Cc: Andi Kleen <andi@...stfloor.org>
---
 tools/perf/bench/mem-memcpy.c |   42 +++++++++++++++++++++++++++++-----------
 1 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index db82021..ac88f52 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -19,6 +19,7 @@
 #include <string.h>
 #include <sys/time.h>
 #include <errno.h>
+#include <unistd.h>
 
 #define K 1024
 
@@ -28,6 +29,8 @@ static bool		use_clock;
 static int		clock_fd;
 static bool		only_prefault;
 static bool		no_prefault;
+static int		src_align;
+static int		dst_align;
 
 static const struct option options[] = {
 	OPT_STRING('l', "length", &length_str, "1MB",
@@ -41,6 +44,10 @@ static const struct option options[] = {
 		    "Show only the result with page faults before memcpy()"),
 	OPT_BOOLEAN('n', "no-prefault", &no_prefault,
 		    "Show only the result without page faults before memcpy()"),
+	OPT_INTEGER('s', "src-alignment", &src_align,
+		    "Alignment of source memory region (in byte)"),
+	OPT_INTEGER('d', "dst-alignment", &dst_align,
+		    "Alignment of destination memory region (in byte)"),
 	OPT_END()
 };
 
@@ -79,6 +86,9 @@ static struct perf_event_attr clock_attr = {
 	.config		= PERF_COUNT_HW_CPU_CYCLES
 };
 
+/* Should this alignment be configurable? */
+#define ALIGNMENT 8
+
 static void init_clock(void)
 {
 	clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
@@ -108,27 +118,29 @@ static double timeval2double(struct timeval *ts)
 
 static void alloc_mem(void **dst, void **src, size_t length)
 {
-	*dst = zalloc(length);
-	if (!dst)
+	int ret;
+
+	ret = posix_memalign(dst, ALIGNMENT, length + ALIGNMENT - 1);
+	if (ret)
 		die("memory allocation failed - maybe length is too large?\n");
 
-	*src = zalloc(length);
-	if (!src)
+	ret = posix_memalign(src, ALIGNMENT, length + ALIGNMENT - 1);
+	if (ret)
 		die("memory allocation failed - maybe length is too large?\n");
 }
 
 static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
 {
 	u64 clock_start = 0ULL, clock_end = 0ULL;
-	void *src = NULL, *dst = NULL;
+	char *src = NULL, *dst = NULL;
 
-	alloc_mem(&src, &dst, len);
+	alloc_mem((void **)&src, (void **)&dst, len);
 
 	if (prefault)
-		fn(dst, src, len);
+		fn(dst + dst_align, src + src_align, len);
 
 	clock_start = get_clock();
-	fn(dst, src, len);
+	fn(dst + dst_align, src + src_align, len);
 	clock_end = get_clock();
 
 	free(src);
@@ -139,15 +151,15 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
 static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
 {
 	struct timeval tv_start, tv_end, tv_diff;
-	void *src = NULL, *dst = NULL;
+	char *src = NULL, *dst = NULL;
 
-	alloc_mem(&src, &dst, len);
+	alloc_mem((void **)&src, (void **)&dst, len);
 
 	if (prefault)
-		fn(dst, src, len);
+		fn(dst + dst_align, src + src_align, len);
 
 	BUG_ON(gettimeofday(&tv_start, NULL));
-	fn(dst, src, len);
+	fn(dst + dst_align, src + src_align, len);
 	BUG_ON(gettimeofday(&tv_end, NULL));
 
 	timersub(&tv_end, &tv_start, &tv_diff);
@@ -198,6 +210,12 @@ int bench_mem_memcpy(int argc, const char **argv,
 	if (only_prefault && no_prefault)
 		only_prefault = no_prefault = false;
 
+	if (ALIGNMENT <= src_align || ALIGNMENT <= dst_align) {
+		fprintf(stderr, "Alignment is too large,"
+			"it should be shorter than %d Byte\n", ALIGNMENT);
+		return 1;
+	}
+
 	for (i = 0; routines[i].name; i++) {
 		if (!strcmp(routines[i].name, routine))
 			break;
-- 
1.7.1.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ