lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Mon, 01 Feb 2010 02:17:35 -0600
From:	Tom Zanussi <tzanussi@...il.com>
To:	Keiichi KII <k-keiichi@...jp.nec.com>
Cc:	linux-kernel@...r.kernel.org, lwoodman@...hat.com,
	linux-mm@...ck.org, mingo@...e.hu, riel@...hat.com,
	rostedt@...dmis.org, akpm@...ux-foundation.org, fweisbec@...il.com,
	Munehiro Ikeda <m-ikeda@...jp.nec.com>,
	Atsushi Tsuji <a-tsuji@...jp.nec.com>
Subject: Re: [RFC PATCH -tip 2/2 v2] add a scripts for pagecache usage per
 process

Hi,

On Mon, 2010-01-25 at 17:16 -0500, Keiichi KII wrote:
> (2010年01月23日 03:21), Tom Zanussi wrote:
> > Hi,
> > 
> > On Fri, 2010-01-22 at 19:08 -0500, Keiichi KII wrote:
> >> The scripts are implemented based on the trace stream scripting support.
> >> And the scripts implement the following.
> >>  - how many pagecaches each process has per each file
> >>  - how many pages are cached per each file
> >>  - how many pagecaches each process shares
> >>
> > 
> > Nice, it looks like a very useful script - I gave it a quick try and it
> > seems to work well...
> > 
> > The only problem I see, nothing to do with your script and nothing you
> > can do anything about at the moment, is that the record step generates a
> > huge amount of data, which of course makes the event processing take
> > awhile.  A lot of it appears to be due to perf itself - being able to
> > filter out the perf-generated events in the kernel would make a big
> > difference, I think; you normally don't want to see those anyway...
> 
> Yes, right. I don't want to process the data of perf itself.
> I will try to find any way to solve this problem. 
> 

Here's one way, using the tracepoint filters - it does make a big
difference in this case.

Before (using the new -P option, which includes perf in the trace
data):  

root@...picana:~# perf record -c 1 -f -a -M -R -e filemap:add_to_page_cache -e filemap:find_get_page -e filemap:remove_from_page_cache -P sleep 5
[ perf record: Woken up 0 times to write data ]
[ perf record: Captured and wrote 71.201 MB perf.data (~3110815 samples) ]

After (filters out events generated by perf):

root@...picana:~# perf record -c 1 -f -a -M -R -e filemap:add_to_page_cache -e filemap:find_get_page -e filemap:remove_from_page_cache sleep 5
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.309 MB perf.data (~13479 samples) ]

Tom

[PATCH] perf record: filter out perf process tracepoint events

The perf process itself can generate a lot of trace data, which most
of the time isn't of any interest.  This patch adds a predicate to the
kernel tracepoint filter of each recorded event type which effectively
screens out any event generated by perf.

Assuming the common case would be to ignore perf, this makes it the
default; the old behavior can be selected by using 'perf record -P'.

Signed-off-by: Tom Zanussi <tzanussi@...il.com>
---
 tools/perf/builtin-record.c    |   44 ++++++++++++++++++++++++++++++++++++++++
 tools/perf/util/parse-events.h |    3 +-
 2 files changed, 46 insertions(+), 1 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index eea5691..5fa113a 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -49,6 +49,8 @@ static int			no_samples			=      0;
 static int			sample_address			=      0;
 static int			multiplex			=      0;
 static int			multiplex_fd			=     -1;
+static int			include_perf			=      0;
+static char			exclude_perf_pred[MAX_FILTER_STR_VAL];
 
 static long			samples				=      0;
 static struct timeval		last_read;
@@ -236,6 +238,37 @@ static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int n
 	return h_attr;
 }
 
+static char *add_exclude_perf_pred(char *old_filter)
+{
+	int len = strlen(exclude_perf_pred);
+	char *filter;
+
+	if (old_filter != NULL)
+		len = len + strlen(" && ()") + strlen(old_filter);
+
+	if (len >= MAX_FILTER_STR_VAL) {
+		fprintf(stderr, "excluding perf exceeds max filter length,"
+			" use -P instead\n");
+		exit(-1);
+	}
+
+	filter = malloc(len + 1);
+	if (!filter) {
+		fprintf(stderr, "not enough memory to hold filter string\n");
+		exit(-1);
+	}
+
+	strcpy(filter, exclude_perf_pred);
+
+	if (old_filter) {
+		strcat(filter, " && (");
+		strcat(filter, old_filter);
+		strcat(filter, ")");
+	}
+
+	return filter;
+}
+
 static void create_counter(int counter, int cpu, pid_t pid)
 {
 	char *filter = filters[counter];
@@ -375,6 +408,12 @@ try_again:
 		}
 	}
 
+	if (include_perf == 0 && cpu == 0) {
+		filters[counter] = add_exclude_perf_pred(filter);
+		free(filter);
+		filter = filters[counter];
+	}
+
 	if (filter != NULL) {
 		ret = ioctl(fd[nr_cpu][counter],
 			    PERF_EVENT_IOC_SET_FILTER, filter);
@@ -677,6 +716,8 @@ static const struct option options[] = {
 		    "don't sample"),
 	OPT_BOOLEAN('M', "multiplex", &multiplex,
 		    "multiplex counter output in a single channel"),
+	OPT_BOOLEAN('P', "include-perf", &include_perf,
+		    "include perf in trace data (normally excluded)"),
 	OPT_END()
 };
 
@@ -716,5 +757,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
 		attrs[counter].sample_period = default_interval;
 	}
 
+	if (include_perf == 0)
+		sprintf(exclude_perf_pred, "common_pid != %d", getpid());
+
 	return __cmd_record(argc, argv);
 }
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index b8c1f64..d43d9b0 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -25,7 +25,8 @@ extern const char *__event_name(int type, u64 config);
 extern int parse_events(const struct option *opt, const char *str, int unset);
 extern int parse_filter(const struct option *opt, const char *str, int unset);
 
-#define EVENTS_HELP_MAX (128*1024)
+#define EVENTS_HELP_MAX		(128*1024)
+#define MAX_FILTER_STR_VAL	256
 
 extern void print_events(void);
 
-- 
1.6.4.GIT



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ