lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 4 Apr 2017 13:17:39 -0300
From:   Arnaldo Carvalho de Melo <acme@...nel.org>
To:     Andi Kleen <andi@...stfloor.org>
Cc:     Jiri Olsa <jolsa@...nel.org>, linux-kernel@...r.kernel.org,
        Andi Kleen <ak@...ux.intel.com>,
        Namhyung Kim <namhyung@...nel.org>,
        David Ahern <dsahern@...il.com>,
        Ingo Molnar <mingo@...nel.org>, Wang Nan <wangnan0@...wei.com>
Subject: Re: [PATCH] perf, tools, script: Skip locking overhead in perf script

Em Thu, Mar 23, 2017 at 05:28:07PM -0700, Andi Kleen escreveu:
> From: Andi Kleen <ak@...ux.intel.com>
> 
> I was tired of seeing ~10% locking overhead in perf script while
> doing larger PT decodes. perf script doesn't need any locking

> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index f53f449d864d..b404644f559a 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c

Huh, should this be builtin-script? Anyway, below would be a more
complete patch, but I couldn't find improvement at least with a 50M
perf.data file with intel_pt data... will try some more later.

- Arnaldo


commit 42ee7ffd3f88047839837d9b35eef23740a86429
Author: Arnaldo Carvalho de Melo <acme@...hat.com>
Date:   Tue Apr 4 13:15:04 2017 -0300

    WIP
    
    Signed-off-by: Arnaldo Carvalho de Melo <acme@...hat.com>

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 46acc8ece41f..e6262d36f0de 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2493,6 +2493,8 @@ int cmd_script(int argc, const char **argv)
 		NULL
 	};
 
+	perf_set_singlethreaded();
+
 	setup_scripting();
 
 	argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage,
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 5c0ea11a8f0a..9319291414a5 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -75,6 +75,7 @@ libperf-y += data.o
 libperf-y += tsc.o
 libperf-y += cloexec.o
 libperf-y += call-path.o
+libperf-y += rwsem.o
 libperf-y += thread-stack.o
 libperf-$(CONFIG_AUXTRACE) += auxtrace.o
 libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index c1870ac365a3..08dd01fd2b44 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -476,7 +476,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip)
 static void maps__init(struct maps *maps)
 {
 	maps->entries = RB_ROOT;
-	pthread_rwlock_init(&maps->lock, NULL);
+	init_rwsem(&maps->lock);
 }
 
 void map_groups__init(struct map_groups *mg, struct machine *machine)
@@ -505,9 +505,9 @@ static void __maps__purge(struct maps *maps)
 
 static void maps__exit(struct maps *maps)
 {
-	pthread_rwlock_wrlock(&maps->lock);
+	down_write(&maps->lock);
 	__maps__purge(maps);
-	pthread_rwlock_unlock(&maps->lock);
+	up_write(&maps->lock);
 }
 
 void map_groups__exit(struct map_groups *mg)
@@ -574,7 +574,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
 	struct symbol *sym;
 	struct rb_node *nd;
 
-	pthread_rwlock_rdlock(&maps->lock);
+	down_read(&maps->lock);
 
 	for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
 		struct map *pos = rb_entry(nd, struct map, rb_node);
@@ -590,7 +590,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
 
 	sym = NULL;
 out:
-	pthread_rwlock_unlock(&maps->lock);
+	up_read(&maps->lock);
 	return sym;
 }
 
@@ -626,7 +626,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp)
 	size_t printed = 0;
 	struct rb_node *nd;
 
-	pthread_rwlock_rdlock(&maps->lock);
+	down_read(&maps->lock);
 
 	for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
 		struct map *pos = rb_entry(nd, struct map, rb_node);
@@ -638,7 +638,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp)
 		}
 	}
 
-	pthread_rwlock_unlock(&maps->lock);
+	up_read(&maps->lock);
 
 	return printed;
 }
@@ -670,7 +670,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp
 	struct rb_node *next;
 	int err = 0;
 
-	pthread_rwlock_wrlock(&maps->lock);
+	down_write(&maps->lock);
 
 	root = &maps->entries;
 	next = rb_first(root);
@@ -738,7 +738,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp
 
 	err = 0;
 out:
-	pthread_rwlock_unlock(&maps->lock);
+	up_write(&maps->lock);
 	return err;
 }
 
@@ -759,7 +759,7 @@ int map_groups__clone(struct thread *thread,
 	struct map *map;
 	struct maps *maps = &parent->maps[type];
 
-	pthread_rwlock_rdlock(&maps->lock);
+	down_read(&maps->lock);
 
 	for (map = maps__first(maps); map; map = map__next(map)) {
 		struct map *new = map__clone(map);
@@ -776,7 +776,7 @@ int map_groups__clone(struct thread *thread,
 
 	err = 0;
 out_unlock:
-	pthread_rwlock_unlock(&maps->lock);
+	up_read(&maps->lock);
 	return err;
 }
 
@@ -803,9 +803,9 @@ static void __maps__insert(struct maps *maps, struct map *map)
 
 void maps__insert(struct maps *maps, struct map *map)
 {
-	pthread_rwlock_wrlock(&maps->lock);
+	down_write(&maps->lock);
 	__maps__insert(maps, map);
-	pthread_rwlock_unlock(&maps->lock);
+	up_write(&maps->lock);
 }
 
 static void __maps__remove(struct maps *maps, struct map *map)
@@ -816,9 +816,9 @@ static void __maps__remove(struct maps *maps, struct map *map)
 
 void maps__remove(struct maps *maps, struct map *map)
 {
-	pthread_rwlock_wrlock(&maps->lock);
+	down_write(&maps->lock);
 	__maps__remove(maps, map);
-	pthread_rwlock_unlock(&maps->lock);
+	up_write(&maps->lock);
 }
 
 struct map *maps__find(struct maps *maps, u64 ip)
@@ -826,7 +826,7 @@ struct map *maps__find(struct maps *maps, u64 ip)
 	struct rb_node **p, *parent = NULL;
 	struct map *m;
 
-	pthread_rwlock_rdlock(&maps->lock);
+	down_read(&maps->lock);
 
 	p = &maps->entries.rb_node;
 	while (*p != NULL) {
@@ -842,7 +842,7 @@ struct map *maps__find(struct maps *maps, u64 ip)
 
 	m = NULL;
 out:
-	pthread_rwlock_unlock(&maps->lock);
+	up_read(&maps->lock);
 	return m;
 }
 
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index c8a5a644c0a9..fd76534a7807 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -9,6 +9,7 @@
 #include <stdio.h>
 #include <stdbool.h>
 #include <linux/types.h>
+#include "rwsem.h"
 
 enum map_type {
 	MAP__FUNCTION = 0,
@@ -61,7 +62,7 @@ struct kmap {
 
 struct maps {
 	struct rb_root	 entries;
-	pthread_rwlock_t lock;
+	struct rw_semaphore lock;
 };
 
 struct map_groups {
diff --git a/tools/perf/util/rwsem.c b/tools/perf/util/rwsem.c
new file mode 100644
index 000000000000..02b0f18b96eb
--- /dev/null
+++ b/tools/perf/util/rwsem.c
@@ -0,0 +1,27 @@
+#include "util.h"
+#include "rwsem.h"
+
+int init_rwsem(struct rw_semaphore *sem)
+{
+	return perf_singlethreaded ? 0 : pthread_rwlock_init(&sem->lock, NULL);
+}
+
+int down_read(struct rw_semaphore *sem)
+{
+	return perf_singlethreaded ? 0 : pthread_rwlock_rdlock(&sem->lock);
+}
+
+int up_read(struct rw_semaphore *sem)
+{
+	return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock);
+}
+
+int down_write(struct rw_semaphore *sem)
+{
+	return perf_singlethreaded ? 0 : pthread_rwlock_wrlock(&sem->lock);
+}
+
+int up_write(struct rw_semaphore *sem)
+{
+	return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock);
+}
diff --git a/tools/perf/util/rwsem.h b/tools/perf/util/rwsem.h
new file mode 100644
index 000000000000..c06afd42dc11
--- /dev/null
+++ b/tools/perf/util/rwsem.h
@@ -0,0 +1,18 @@
+#ifndef _PERF_RWSEM_H
+#define _PERF_RWSEM_H
+
+#include <pthread.h>
+
+struct rw_semaphore {
+	pthread_rwlock_t lock;
+};
+
+int init_rwsem(struct rw_semaphore *sem);
+
+int down_read(struct rw_semaphore *sem);
+int up_read(struct rw_semaphore *sem);
+
+int down_write(struct rw_semaphore *sem);
+int up_write(struct rw_semaphore *sem);
+
+#endif /* _PERF_RWSEM_H */
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 9b4d8ba22fed..d3caa5e5f845 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -210,7 +210,7 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
 	struct maps *maps = &mg->maps[type];
 	struct map *next, *curr;
 
-	pthread_rwlock_wrlock(&maps->lock);
+	down_write(&maps->lock);
 
 	curr = maps__first(maps);
 	if (curr == NULL)
@@ -228,7 +228,7 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
 	curr->end = ~0ULL;
 
 out_unlock:
-	pthread_rwlock_unlock(&maps->lock);
+	up_write(&maps->lock);
 }
 
 struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name)
@@ -1561,7 +1561,7 @@ struct map *map_groups__find_by_name(struct map_groups *mg,
 	struct maps *maps = &mg->maps[type];
 	struct map *map;
 
-	pthread_rwlock_rdlock(&maps->lock);
+	down_read(&maps->lock);
 
 	for (map = maps__first(maps); map; map = map__next(map)) {
 		if (map->dso && strcmp(map->dso->short_name, name) == 0)
@@ -1571,7 +1571,7 @@ struct map *map_groups__find_by_name(struct map_groups *mg,
 	map = NULL;
 
 out_unlock:
-	pthread_rwlock_unlock(&maps->lock);
+	up_read(&maps->lock);
 	return map;
 }
 
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index dcdb87a5d0a1..4da6e776a189 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -259,7 +259,7 @@ static int __thread__prepare_access(struct thread *thread)
 		struct maps *maps = &thread->mg->maps[i];
 		struct map *map;
 
-		pthread_rwlock_rdlock(&maps->lock);
+		down_read(&maps->lock);
 
 		for (map = maps__first(maps); map; map = map__next(map)) {
 			err = unwind__prepare_access(thread, map, &initialized);
@@ -267,7 +267,7 @@ static int __thread__prepare_access(struct thread *thread)
 				break;
 		}
 
-		pthread_rwlock_unlock(&maps->lock);
+		up_read(&maps->lock);
 	}
 
 	return err;
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index d8b45cea54d0..a016880a55e6 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -20,6 +20,13 @@
 #include "callchain.h"
 #include "strlist.h"
 
+bool perf_singlethreaded;
+
+void perf_set_singlethreaded(void)
+{
+	perf_singlethreaded = true;
+}
+
 #define CALLCHAIN_PARAM_DEFAULT			\
 	.mode		= CHAIN_GRAPH_ABS,	\
 	.min_percent	= 0.5,			\
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 7cf5752b38fd..e59aa319a2e1 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -380,4 +380,7 @@ struct inline_node {
 struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr);
 void inline_node__delete(struct inline_node *node);
 
+extern bool perf_singlethreaded;
+void perf_set_singlethreaded(void);
+
 #endif /* GIT_COMPAT_UTIL_H */

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ