lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 05 Jul 2011 09:35:34 +0800
From:	Lin Ming <ming.m.lin@...el.com>
To:	Andi Kleen <andi@...stfloor.org>
Cc:	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	Ingo Molnar <mingo@...e.hu>,
	Stephane Eranian <eranian@...gle.com>,
	Arnaldo Carvalho de Melo <acme@...stprotocols.net>,
	linux-kernel <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH 4/4] perf, tool: Add new command "perf mem"

On Tue, 2011-07-05 at 06:00 +0800, Andi Kleen wrote:
> > diff --git a/tools/perf/Makefile b/tools/perf/Makefile
> > index 032ba63..221d1d8 100644
> > --- a/tools/perf/Makefile
> > +++ b/tools/perf/Makefile
> > @@ -372,6 +372,7 @@ BUILTIN_OBJS += $(OUTPUT)builtin-lock.o
> >  BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o
> >  BUILTIN_OBJS += $(OUTPUT)builtin-test.o
> >  BUILTIN_OBJS += $(OUTPUT)builtin-inject.o
> > +BUILTIN_OBJS += $(OUTPUT)builtin-mem.o
> 
> File seems to be missing in the patch. Forgot a git add?
> 
> Also need a manpage for it.

Ah, sorry for the missing. Here it is.

>From 6fb31b6fb63d73624c6bffbe81a013ca915da077 Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@...el.com>
Date: Mon, 4 Jul 2011 07:33:36 +0000
Subject: [PATCH] perf, tool: Add new command "perf mem"

Adds new command "perf mem" to monitor memory load/store events.

$ perf mem

 usage: perf mem [<options>] {record <command> |report}

    -t, --type <type>     memory operations(load/store)
    -L, --latency <n>     latency to sample(only for load op)

Signed-off-by: Lin Ming <ming.m.lin@...el.com>
---
 tools/perf/Documentation/perf-mem.txt |   38 +++++
 tools/perf/Makefile                   |    1 +
 tools/perf/builtin-mem.c              |  269 +++++++++++++++++++++++++++++++++
 tools/perf/builtin-record.c           |    8 +
 tools/perf/builtin-script.c           |    6 +-
 tools/perf/builtin.h                  |    1 +
 tools/perf/perf.c                     |    1 +
 tools/perf/util/event.h               |    2 +
 tools/perf/util/evsel.c               |   10 ++
 tools/perf/util/parse-events.c        |   40 ++++-
 tools/perf/util/parse-events.h        |    2 +-
 11 files changed, 368 insertions(+), 10 deletions(-)
 create mode 100644 tools/perf/Documentation/perf-mem.txt
 create mode 100644 tools/perf/builtin-mem.c

diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
new file mode 100644
index 0000000..8ee5794
--- /dev/null
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -0,0 +1,38 @@
+perf-mem(1)
+===========
+
+NAME
+----
+perf-mem - Monitor memory load/store operation
+
+SYNOPSIS
+--------
+[verse]
+'perf mem' -t load [-L <n>] record <command>
+'perf mem' -t store record <command>
+'perf mem' -t load report
+'perf mem' -t store report
+
+DESCRIPTION
+-----------
+"perf mem -t <TYPE> record" runs a command and gathers memory operation data
+from it, into perf.data.
+
+"perf mem -t <TYPE> report" displays the result.
+
+OPTIONS
+-------
+<command>...::
+	Any command you can specify in a shell.
+
+-t::
+--type=::
+	Select the memory operation type: load or store
+
+-L::
+--latency=::
+	Select the memory load latency to sample. Only used for memory load operation.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 032ba63..221d1d8 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -372,6 +372,7 @@ BUILTIN_OBJS += $(OUTPUT)builtin-lock.o
 BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o
 BUILTIN_OBJS += $(OUTPUT)builtin-test.o
 BUILTIN_OBJS += $(OUTPUT)builtin-inject.o
+BUILTIN_OBJS += $(OUTPUT)builtin-mem.o
 
 PERFLIBS = $(LIB_FILE)
 
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
new file mode 100644
index 0000000..d00fedf
--- /dev/null
+++ b/tools/perf/builtin-mem.c
@@ -0,0 +1,269 @@
+#include "builtin.h"
+#include "perf.h"
+
+#include "util/parse-options.h"
+#include "util/trace-event.h"
+
+static char			const *input_name		= "perf.data";
+static const char		*mem_operation;
+static int			latency_value			= 3;
+
+#define MEM_OPEARTION_LOAD	"load"
+#define MEM_OPERATION_STORE	"store"
+
+static const char * const mem_usage[] = {
+	"perf mem [<options>] {record <command> |report}",
+	NULL
+};
+
+static const struct option mem_options[] = {
+	OPT_STRING('t', "type", &mem_operation, "type", "memory operations(load/store)"),
+	OPT_INTEGER('L', "latency", &latency_value, "latency to sample(only for load op)"),
+	OPT_END()
+};
+
+static int __cmd_record(int argc, const char **argv)
+{
+	int rec_argc, i = 0, j;
+	const char **rec_argv;
+	char event[20];
+
+	rec_argc = argc + 4;
+	rec_argv = calloc(rec_argc + 1, sizeof(char *));
+	rec_argv[i++] = strdup("record");
+	rec_argv[i++] = strdup("-l");
+	rec_argv[i++] = strdup("-d");
+	rec_argv[i++] = strdup("-e");
+	if (!strcmp(mem_operation, MEM_OPEARTION_LOAD))
+		sprintf(event, "mem-load:%04x:p", latency_value);
+	else
+		sprintf(event, "mem-store:p");
+	rec_argv[i++] = strdup(event);
+	for (j = 1; j < argc; j++, i++)
+		rec_argv[i] = argv[j];
+
+	BUG_ON(i != rec_argc);
+
+	return cmd_record(i, rec_argv, NULL);
+}
+
+#define LEN 56
+struct perf_mem_data {
+	char name[LEN];
+	u64 count;
+	u64 latency;
+};
+
+static struct perf_mem_data load_data[7][4][4] = {
+ [MEM_LOAD_L1] = {
+	[MEM_LOAD_LOCAL >> 2] = {
+		[MEM_LOAD_MODIFIED >> 4] = {
+			"L1-local", 0, 0
+		},
+	},
+ },
+ [MEM_LOAD_L2] = {
+	[MEM_LOAD_SNOOP >> 2] = {
+		[MEM_LOAD_MODIFIED >> 4] = {
+			"L2-snoop", 0, 0
+		},
+	},
+	[MEM_LOAD_LOCAL >> 2] = {
+		[MEM_LOAD_MODIFIED >> 4] = {
+			"L2-local", 0, 0
+		},
+	},
+ },
+ [MEM_LOAD_L3] = {
+	[MEM_LOAD_SNOOP >> 2] = {
+		[MEM_LOAD_MODIFIED >> 4] = {
+			"L3-snoop, found M", 0, 0
+		},
+		[MEM_LOAD_SHARED >> 4] = {
+			"L3-snoop, found no M", 0, 0
+		},
+		[MEM_LOAD_INVALID >> 4] = {
+			"L3-snoop, no coherency actions", 0, 0
+		},
+	},
+ },
+ [MEM_LOAD_RAM] = {
+	[MEM_LOAD_SNOOP >> 2] = {
+		[MEM_LOAD_SHARED >> 4] = {
+			"L3-miss, snoop, shared", 0, 0
+		},
+	},
+	[MEM_LOAD_LOCAL >> 2] = {
+		[MEM_LOAD_EXCLUSIVE >> 4] = {
+			"L3-miss, local, exclusive", 0, 0
+		},
+		[MEM_LOAD_SHARED >> 4] = {
+			"L3-miss, local, shared", 0, 0
+		},
+	},
+	[MEM_LOAD_REMOTE >> 2] = {
+		[MEM_LOAD_EXCLUSIVE >> 4] = {
+			"L3-miss, remote, exclusive", 0, 0
+		},
+		[MEM_LOAD_SHARED >> 4] = {
+			"L3-miss, remote, shared", 0, 0
+		},
+	},
+ },
+ [MEM_LOAD_UNKNOWN + 4] = {
+	[MEM_LOAD_TOGGLE] = {
+		[0] = {
+			"Unknown L3", 0, 0
+		},
+	},
+ },
+ [MEM_LOAD_IO + 4] = {
+	[MEM_LOAD_TOGGLE] = {
+		[0] = {
+			"IO", 0, 0
+		},
+	},
+ },
+ [MEM_LOAD_UNCACHED + 4] = {
+	[MEM_LOAD_TOGGLE] = {
+		[0] = {
+			"Uncached", 0, 0
+		},
+	},
+ },
+};
+
+static struct perf_mem_data store_data[6] = {
+	{"data-cache hit", 0, 0},
+	{"data-cache miss", 0, 0},
+	{"STLB hit", 0, 0},
+	{"STLB miss", 0, 0},
+	{"Locked access", 0, 0},
+	{"Unlocked access", 0, 0},
+};
+
+static void dump_load_data(void)
+{
+	int i, j, k;
+
+	printf("Memory load operation statistics\n");
+	printf("================================\n");
+	for (i = 0; i < 7; i++)
+		for (j = 0; j < 4; j++)
+			for (k = 0; k < 4; k++) {
+				if (!load_data[i][j][k].name[0])
+					continue;
+				printf("%30s: total latency=%8" PRId64 ", count=%8" PRId64 "(avg=%" PRId64 ")\n",
+					load_data[i][j][k].name,
+					load_data[i][j][k].latency,
+					load_data[i][j][k].count,
+					load_data[i][j][k].count ?
+					(load_data[i][j][k].latency /
+					load_data[i][j][k].count) : 0);
+			}
+}
+
+static void dump_store_data(void)
+{
+	int i;
+
+	printf("Memory store operation statistics\n");
+	printf("=================================\n");
+	for (i = 0; i < 6; i++)
+		printf("%30s: %8" PRId64 "\n", store_data[i].name,
+			store_data[i].count);
+}
+
+static void process_load_sample(u64 latency, u64 extra)
+{
+	int i, j, k;
+
+	i = extra & 0x3;
+	j = (extra >> 2) & 0x3;
+	k = (extra >> 4) & 0x3;
+
+	if (j == 0)
+		i += 4;
+
+	load_data[i][j][k].latency += latency;
+	load_data[i][j][k].count++;
+}
+
+static void process_store_sample(u64 extra)
+{
+	if (extra & MEM_STORE_DCU_HIT)
+		store_data[0].count++;
+	else
+		store_data[1].count++;
+
+	if (extra & MEM_STORE_STLB_HIT)
+		store_data[2].count++;
+	else
+		store_data[3].count++;
+
+	if (extra & MEM_STORE_LOCKED_ACCESS)
+		store_data[4].count++;
+	else
+		store_data[5].count++;
+}
+
+static int process_sample_event(union perf_event *event __unused, struct perf_sample *sample,
+                                struct perf_evsel *evsel __unused, struct perf_session *session __unused)
+{
+	if (!strcmp(mem_operation, MEM_OPEARTION_LOAD))
+		process_load_sample(sample->latency, sample->extra);
+	else
+		process_store_sample(sample->extra);
+
+	return 0;
+}
+
+static struct perf_event_ops event_ops = {
+	.sample			= process_sample_event,
+	.mmap			= perf_event__process_mmap,
+	.comm			= perf_event__process_comm,
+	.lost			= perf_event__process_lost,
+	.fork			= perf_event__process_task,
+	.ordered_samples	= true,
+};
+
+static int report_events(void)
+{
+	int err = -EINVAL;
+	struct perf_session *session = perf_session__new(input_name, O_RDONLY,
+							 0, false, &event_ops);
+
+	if (symbol__init() < 0)
+		return -1;
+
+	if (session == NULL)
+		return -ENOMEM;
+
+	err = perf_session__process_events(session, &event_ops);
+
+	if (!strcmp(mem_operation, MEM_OPEARTION_LOAD))
+		dump_load_data();
+	else
+		dump_store_data();
+
+	perf_session__delete(session);
+	return err;
+}
+
+int cmd_mem(int argc, const char **argv, const char *prefix __used)
+{
+	argc = parse_options(argc, argv, mem_options, mem_usage,
+                        PARSE_OPT_STOP_AT_NON_OPTION);
+
+	if (!argc || !mem_operation)
+		usage_with_options(mem_usage, mem_options);
+
+        if (!strncmp(argv[0], "rec", 3))
+		return __cmd_record(argc, argv);
+	else if (!strncmp(argv[0], "rep", 3))
+		return report_events();
+	else
+		usage_with_options(mem_usage, mem_options);
+
+	return 0;
+}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 8e2c857..8ebdcdd 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -63,6 +63,7 @@ static bool			inherit_stat			=  false;
 static bool			no_samples			=  false;
 static bool			sample_address			=  false;
 static bool			sample_time			=  false;
+static bool			latency_data			=  false;
 static bool			no_buildid			=  false;
 static bool			no_buildid_cache		=  false;
 static struct perf_evlist	*evsel_list;
@@ -199,6 +200,11 @@ static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
 		attr->mmap_data = track;
 	}
 
+	if (latency_data) {
+		attr->sample_type	|= PERF_SAMPLE_LATENCY;
+		attr->sample_type	|= PERF_SAMPLE_EXTRA;
+	}
+
 	if (call_graph)
 		attr->sample_type	|= PERF_SAMPLE_CALLCHAIN;
 
@@ -780,6 +786,8 @@ const struct option record_options[] = {
 	OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
 	OPT_BOOLEAN('n', "no-samples", &no_samples,
 		    "don't sample"),
+	OPT_BOOLEAN('l', "latency", &latency_data,
+		    "Latency data"),
 	OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
 		    "do not update the buildid cache"),
 	OPT_BOOLEAN('B', "no-buildid", &no_buildid,
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 3056b45..c7489a6 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -140,7 +140,7 @@ static int perf_event_attr__check_stype(struct perf_event_attr *attr,
 		return 0;
 
 	if (output[type].user_set) {
-		evname = __event_name(attr->type, attr->config);
+		evname = __event_name(attr->type, attr->config, attr->config1);
 		pr_err("Samples for '%s' event do not have %s attribute set. "
 		       "Cannot print '%s' field.\n",
 		       evname, sample_msg, output_field2str(field));
@@ -149,7 +149,7 @@ static int perf_event_attr__check_stype(struct perf_event_attr *attr,
 
 	/* user did not ask for it explicitly so remove from the default list */
 	output[type].fields &= ~field;
-	evname = __event_name(attr->type, attr->config);
+	evname = __event_name(attr->type, attr->config, attr->config1);
 	pr_debug("Samples for '%s' event do not have %s attribute set. "
 		 "Skipping '%s' field.\n",
 		 evname, sample_msg, output_field2str(field));
@@ -292,7 +292,7 @@ static void print_sample_start(struct perf_sample *sample,
 			if (event)
 				evname = event->name;
 		} else
-			evname = __event_name(attr->type, attr->config);
+			evname = __event_name(attr->type, attr->config, 0);
 
 		printf("%s: ", evname ? evname : "(unknown)");
 	}
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 4702e24..419ba8f 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -36,5 +36,6 @@ extern int cmd_lock(int argc, const char **argv, const char *prefix);
 extern int cmd_kvm(int argc, const char **argv, const char *prefix);
 extern int cmd_test(int argc, const char **argv, const char *prefix);
 extern int cmd_inject(int argc, const char **argv, const char *prefix);
+extern int cmd_mem(int argc, const char **argv, const char *prefix);
 
 #endif
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index ec635b7..20c53f8 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -332,6 +332,7 @@ static void handle_internal_command(int argc, const char **argv)
 		{ "kvm",	cmd_kvm,	0 },
 		{ "test",	cmd_test,	0 },
 		{ "inject",	cmd_inject,	0 },
+		{ "mem",	cmd_mem,	0 },
 	};
 	unsigned int i;
 	static const char ext[] = STRIP_EXTENSION;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 1d7f664..1392867 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -76,6 +76,8 @@ struct perf_sample {
 	u64 id;
 	u64 stream_id;
 	u64 period;
+	u64 latency;
+	u64 extra;
 	u32 cpu;
 	u32 raw_size;
 	void *raw_data;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index a03a36b..8eab351 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -405,6 +405,16 @@ int perf_event__parse_sample(const union perf_event *event, u64 type,
 		array++;
 	}
 
+	if (type & PERF_SAMPLE_LATENCY) {
+		data->latency = *array;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_EXTRA) {
+		data->extra = *array;
+		array++;
+	}
+
 	if (type & PERF_SAMPLE_READ) {
 		fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n");
 		return -1;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 41982c3..9f3bcb9 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -40,6 +40,8 @@ static struct event_symbol event_symbols[] = {
   { CHW(BRANCH_INSTRUCTIONS),		"branch-instructions",		"branches"		},
   { CHW(BRANCH_MISSES),			"branch-misses",		""			},
   { CHW(BUS_CYCLES),			"bus-cycles",			""			},
+  { CHW(MEM_LOAD),			"mem-load",			""			},
+  { CHW(MEM_STORE),			"mem-store",			""			},
 
   { CSW(CPU_CLOCK),			"cpu-clock",			""			},
   { CSW(TASK_CLOCK),			"task-clock",			""			},
@@ -297,15 +299,18 @@ const char *event_name(struct perf_evsel *evsel)
 	if (evsel->name)
 		return evsel->name;
 
-	return __event_name(type, config);
+	return __event_name(type, config, evsel->attr.config1);
 }
 
-const char *__event_name(int type, u64 config)
+const char *__event_name(int type, u64 config, u64 extra)
 {
 	static char buf[32];
+	int n;
 
 	if (type == PERF_TYPE_RAW) {
-		sprintf(buf, "raw 0x%" PRIx64, config);
+		n = sprintf(buf, "raw 0x%" PRIx64, config);
+		if (extra)
+			sprintf(buf + n, ":%#" PRIx64, extra);
 		return buf;
 	}
 
@@ -668,6 +673,7 @@ static enum event_result
 parse_symbolic_event(const char **strp, struct perf_event_attr *attr)
 {
 	const char *str = *strp;
+	u64 config;
 	unsigned int i;
 	int n;
 
@@ -676,7 +682,18 @@ parse_symbolic_event(const char **strp, struct perf_event_attr *attr)
 		if (n > 0) {
 			attr->type = event_symbols[i].type;
 			attr->config = event_symbols[i].config;
-			*strp = str + n;
+			str += n;
+			*strp = str;
+
+			if (*str++ == ':') {
+				n = hex2u64(str + 1, &config);
+				if (n > 0) {
+					attr->config1 = config;
+					str += n + 1;
+					*strp = str;
+				}
+			}
+
 			return EVT_HANDLED;
 		}
 	}
@@ -694,9 +711,20 @@ parse_raw_event(const char **strp, struct perf_event_attr *attr)
 		return EVT_FAILED;
 	n = hex2u64(str + 1, &config);
 	if (n > 0) {
-		*strp = str + n + 1;
+		str += n + 1;
+		*strp = str;
 		attr->type = PERF_TYPE_RAW;
 		attr->config = config;
+
+		if (*str++ == ':') {
+			n = hex2u64(str + 1, &config);
+			if (n > 0) {
+				attr->config1 = config;
+				str += n + 1;
+				*strp = str;
+			}
+		}
+
 		return EVT_HANDLED;
 	}
 	return EVT_FAILED;
@@ -1078,7 +1106,7 @@ void print_events(const char *event_glob)
 
 	printf("\n");
 	printf("  %-50s [%s]\n",
-		"rNNN (see 'perf list --help' on how to encode it)",
+		"rNNN[:EEE] (see 'perf list --help' on how to encode it)",
 	       event_type_descriptors[PERF_TYPE_RAW]);
 	printf("\n");
 
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 746d3fc..904c8c4 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -22,7 +22,7 @@ extern bool have_tracepoints(struct list_head *evlist);
 
 const char *event_type(int type);
 const char *event_name(struct perf_evsel *event);
-extern const char *__event_name(int type, u64 config);
+extern const char *__event_name(int type, u64 config, u64 extra);
 
 extern int parse_events(const struct option *opt, const char *str, int unset);
 extern int parse_filter(const struct option *opt, const char *str, int unset);
-- 
1.7.5.1



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ