lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <3884969f-804d-2f53-c648-e2b0bd85edff@codeweavers.com>
Date:   Mon, 26 Apr 2021 10:47:16 -0400
From:   Nicholas Fraser <nfraser@...eweavers.com>
To:     Peter Zijlstra <peterz@...radead.org>,
        Ingo Molnar <mingo@...hat.com>,
        Arnaldo Carvalho de Melo <acme@...nel.org>,
        Mark Rutland <mark.rutland@....com>,
        Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
        Jiri Olsa <jolsa@...hat.com>,
        Namhyung Kim <namhyung@...nel.org>,
        Ian Rogers <irogers@...gle.com>,
        Changbin Du <changbin.du@...el.com>,
        Kan Liang <kan.liang@...ux.intel.com>,
        Tan Xiaojun <tanxiaojun@...wei.com>,
        Jin Yao <yao.jin@...ux.intel.com>, linux-kernel@...r.kernel.org
Cc:     Ulrich Czekalla <uczekalla@...eweavers.com>
Subject: [PATCH v2] perf data: Add JSON export

This adds a feature to export perf data to JSON.

The resolved symbols are exported into the JSON so that external tools
don't need to load the dsos themselves (or even have access to them at
all.) This makes it easy to load and analyze perf data with standalone
tools where direct perf or libbabeltrace integration is impractical.

The exporter uses a minimal inline JSON encoding without any external
dependencies. Currently it only outputs some headers and sample metadata
but it's easily extensible.

Use it like this:

perf data convert --to-json out.json

Signed-off-by: Nicholas Fraser <nfraser@...eweavers.com>
---

Notes:
    This is a follow-up to a patch sent a couple weeks ago. I realized I sent a
    fixed version incorrectly as a reply to a previous version of the same patch so
    I'm re-sending it properly.
    
    Here's an example of what the resulting JSON looks like:
    
    {
            "linux-perf-json-version": 1,
            "headers": {
                    "header-version": 1,
                    "captured-on": "2021-04-06T08:42:01Z",
                    "os-release": "5.11.11-arch1-1",
                    "arch": "x86_64",
                    "cpu-desc": "Intel(R) Core(TM) i5-8250U CPU @ 1.60GHz",
                    ...
            },
            "samples": [
                    {
                            "timestamp": 3074717308597,
                            "pid": 8604,
                            "tid": 8604,
                            "comm": "sh",
                            "callchain": [
                                    {
                                            "ip": "0x7f1e0deb2d36",
                                            "symbol": "__strcmp_avx2",
                                            "dso": "libc-2.33.so"
                                    },
                                    {
                                            "ip": "0x7f1e0dd7f49f",
                                            "symbol": "__gconv_find_transform",
                                            "dso": "libc-2.33.so"
                                    },
                                    ...
                            ]
                    },
                    ...
            ]
    }
    
    The JSON is of course huge but it compresses well. It can be piped directly to
    a compressor, for example:
    
    perf data convert --to-json /dev/stdout | xz > out.json.xz

 tools/perf/Documentation/perf-data.txt |   5 +-
 tools/perf/builtin-data.c              |  26 +-
 tools/perf/util/Build                  |   1 +
 tools/perf/util/data-convert-bt.c      |   2 +-
 tools/perf/util/data-convert-bt.h      |  11 -
 tools/perf/util/data-convert-json.c    | 384 +++++++++++++++++++++++++
 tools/perf/util/data-convert.h         |  10 +
 7 files changed, 418 insertions(+), 21 deletions(-)
 delete mode 100644 tools/perf/util/data-convert-bt.h
 create mode 100644 tools/perf/util/data-convert-json.c

diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt
index 726b9bc9e1a7..417bf17e265c 100644
--- a/tools/perf/Documentation/perf-data.txt
+++ b/tools/perf/Documentation/perf-data.txt
@@ -17,7 +17,7 @@ Data file related processing.
 COMMANDS
 --------
 convert::
-	Converts perf data file into another format (only CTF [1] format is support by now).
+	Converts perf data file into another format.
 	It's possible to set data-convert debug variable to get debug messages from conversion,
 	like:
 	  perf --debug data-convert data convert ...
@@ -27,6 +27,9 @@ OPTIONS for 'convert'
 --to-ctf::
 	Triggers the CTF conversion, specify the path of CTF data directory.
 
+--to-json::
+	Triggers JSON conversion. Specify the JSON filename to output.
+
 --tod::
 	Convert time to wall clock time.
 
diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c
index 8d23b8d6ee8e..15ca23675ef0 100644
--- a/tools/perf/builtin-data.c
+++ b/tools/perf/builtin-data.c
@@ -7,7 +7,6 @@
 #include "debug.h"
 #include <subcmd/parse-options.h>
 #include "data-convert.h"
-#include "data-convert-bt.h"
 
 typedef int (*data_cmd_fn_t)(int argc, const char **argv);
 
@@ -55,7 +54,8 @@ static const char * const data_convert_usage[] = {
 
 static int cmd_data_convert(int argc, const char **argv)
 {
-	const char *to_ctf     = NULL;
+	const char *to_json = NULL;
+	const char *to_ctf = NULL;
 	struct perf_data_convert_opts opts = {
 		.force = false,
 		.all = false,
@@ -63,6 +63,7 @@ static int cmd_data_convert(int argc, const char **argv)
 	const struct option options[] = {
 		OPT_INCR('v', "verbose", &verbose, "be more verbose"),
 		OPT_STRING('i', "input", &input_name, "file", "input file name"),
+		OPT_STRING(0, "to-json", &to_json, NULL, "Convert to JSON format"),
 #ifdef HAVE_LIBBABELTRACE_SUPPORT
 		OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"),
 		OPT_BOOLEAN(0, "tod", &opts.tod, "Convert time to wall clock time"),
@@ -72,11 +73,6 @@ static int cmd_data_convert(int argc, const char **argv)
 		OPT_END()
 	};
 
-#ifndef HAVE_LIBBABELTRACE_SUPPORT
-	pr_err("No conversion support compiled in. perf should be compiled with environment variables LIBBABELTRACE=1 and LIBBABELTRACE_DIR=/path/to/libbabeltrace/\n");
-	return -1;
-#endif
-
 	argc = parse_options(argc, argv, options,
 			     data_convert_usage, 0);
 	if (argc) {
@@ -84,11 +80,25 @@ static int cmd_data_convert(int argc, const char **argv)
 		return -1;
 	}
 
+	if (to_json && to_ctf) {
+		pr_err("You cannot specify both --to-ctf and --to-json.\n");
+		return -1;
+	}
+	if (!to_json && !to_ctf) {
+		pr_err("You must specify one of --to-ctf or --to-json.\n");
+		return -1;
+	}
+
+	if (to_json)
+		return bt_convert__perf2json(input_name, to_json, &opts);
+
 	if (to_ctf) {
 #ifdef HAVE_LIBBABELTRACE_SUPPORT
 		return bt_convert__perf2ctf(input_name, to_ctf, &opts);
 #else
-		pr_err("The libbabeltrace support is not compiled in.\n");
+		pr_err("The libbabeltrace support is not compiled in. perf should be "
+		       "compiled with environment variables LIBBABELTRACE=1 and "
+		       "LIBBABELTRACE_DIR=/path/to/libbabeltrace/\n");
 		return -1;
 #endif
 	}
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index e2563d0154eb..de9ac182b25a 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -163,6 +163,7 @@ perf-$(CONFIG_LIBUNWIND_X86)      += libunwind/x86_32.o
 perf-$(CONFIG_LIBUNWIND_AARCH64)  += libunwind/arm64.o
 
 perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o
+perf-y += data-convert-json.o
 
 perf-y += scripting-engines/
 
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 27c5fef9ad54..803102207a8b 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -21,7 +21,7 @@
 #include <babeltrace/ctf/events.h>
 #include <traceevent/event-parse.h>
 #include "asm/bug.h"
-#include "data-convert-bt.h"
+#include "data-convert.h"
 #include "session.h"
 #include "debug.h"
 #include "tool.h"
diff --git a/tools/perf/util/data-convert-bt.h b/tools/perf/util/data-convert-bt.h
deleted file mode 100644
index 821674d63c4e..000000000000
--- a/tools/perf/util/data-convert-bt.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __DATA_CONVERT_BT_H
-#define __DATA_CONVERT_BT_H
-#include "data-convert.h"
-#ifdef HAVE_LIBBABELTRACE_SUPPORT
-
-int bt_convert__perf2ctf(const char *input_name, const char *to_ctf,
-			 struct perf_data_convert_opts *opts);
-
-#endif /* HAVE_LIBBABELTRACE_SUPPORT */
-#endif /* __DATA_CONVERT_BT_H */
diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c
new file mode 100644
index 000000000000..b57c48d35503
--- /dev/null
+++ b/tools/perf/util/data-convert-json.c
@@ -0,0 +1,384 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * JSON export.
+ *
+ * Copyright (C) 2021, CodeWeavers Inc. <nfraser@...eweavers.com>
+ */
+
+#include "data-convert.h"
+
+#include <fcntl.h>
+#include <inttypes.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "linux/compiler.h"
+#include "linux/err.h"
+#include "util/auxtrace.h"
+#include "util/debug.h"
+#include "util/dso.h"
+#include "util/event.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/header.h"
+#include "util/map.h"
+#include "util/session.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/tool.h"
+
+struct convert_json {
+	struct perf_tool tool;
+	FILE *out;
+	bool first;
+	u64 events_count;
+};
+
+// Outputs a JSON-encoded string surrounded by quotes with characters escaped.
+static void output_json_string(FILE *out, const char *s)
+{
+	fputc('"', out);
+	while (*s) {
+		switch (*s) {
+
+		// required escapes with special forms as per RFC 8259
+		case '"':  fputs("\\\"", out); break;
+		case '\\': fputs("\\\\", out); break;
+		case '\b': fputs("\\b", out);  break;
+		case '\f': fputs("\\f", out);  break;
+		case '\n': fputs("\\n", out);  break;
+		case '\r': fputs("\\r", out);  break;
+		case '\t': fputs("\\t", out);  break;
+
+		default:
+			// all other control characters must be escaped by hex code
+			if (*s <= 0x1f)
+				fprintf(out, "\\u%04x", *s);
+			else
+				fputc(*s, out);
+			break;
+		}
+
+		++s;
+	}
+	fputc('"', out);
+}
+
+// Outputs an optional comma, newline and indentation to delimit a new value
+// from the previous one in a JSON object or array.
+static void output_json_delimiters(FILE *out, bool comma, int depth)
+{
+	int i;
+
+	if (comma)
+		fputc(',', out);
+	fputc('\n', out);
+	for (i = 0; i < depth; ++i)
+		fputc('\t', out);
+}
+
+// Outputs a printf format string (with delimiter) as a JSON value.
+__printf(4, 5)
+static void output_json_format(FILE *out, bool comma, int depth, const char *format, ...)
+{
+	va_list args;
+
+	output_json_delimiters(out, comma, depth);
+	va_start(args, format);
+	vfprintf(out,  format, args);
+	va_end(args);
+}
+
+// Outputs a JSON key-value pair where the value is a string.
+static void output_json_key_string(FILE *out, bool comma, int depth,
+		const char *key, const char *value)
+{
+	output_json_delimiters(out, comma, depth);
+	output_json_string(out, key);
+	fputs(": ", out);
+	output_json_string(out, value);
+}
+
+// Outputs a JSON key-value pair where the value is a printf format string.
+__(printf, 5, 6)
+static void output_json_key_format(FILE *out, bool comma, int depth,
+		const char *key, const char *format, ...)
+{
+	va_list args;
+
+	output_json_delimiters(out, comma, depth);
+	output_json_string(out, key);
+	fputs(": ", out);
+	va_start(args, format);
+	vfprintf(out,  format, args);
+	va_end(args);
+}
+
+static void output_sample_callchain_entry(struct perf_tool *tool,
+		u64 ip, struct addr_location *al)
+{
+	struct convert_json *c = container_of(tool, struct convert_json, tool);
+	FILE *out = c->out;
+
+	output_json_format(out, false, 4, "{");
+	output_json_key_format(out, false, 5, "ip", "\"0x%" PRIx64 "\"", ip);
+
+	if (al && al->sym && al->sym->name && strlen(al->sym->name) > 0) {
+		fputc(',', out);
+		output_json_key_string(out, false, 5, "symbol", al->sym->name);
+
+		if (al->map && al->map->dso) {
+			const char *dso = al->map->dso->short_name;
+
+			if (dso && strlen(dso) > 0) {
+				fputc(',', out);
+				output_json_key_string(out, false, 5, "dso", dso);
+			}
+		}
+	}
+
+	output_json_format(out, false, 4, "}");
+}
+
+static int process_sample_event(struct perf_tool *tool,
+				union perf_event *event __maybe_unused,
+				struct perf_sample *sample,
+				struct evsel *evsel __maybe_unused,
+				struct machine *machine)
+{
+	struct convert_json *c = container_of(tool, struct convert_json, tool);
+	FILE *out = c->out;
+	struct addr_location al, tal;
+	u8 cpumode = PERF_RECORD_MISC_USER;
+
+	if (machine__resolve(machine, &al, sample) < 0) {
+		pr_err("Sample resolution failed!\n");
+		return -1;
+	}
+
+	++c->events_count;
+
+	if (c->first)
+		c->first = false;
+	else
+		fputc(',', out);
+	output_json_format(out, false, 2, "{");
+
+	output_json_key_format(out, false, 3, "timestamp", "%" PRIi64, sample->time);
+	output_json_key_format(out, true, 3, "pid", "%i", al.thread->pid_);
+	output_json_key_format(out, true, 3, "tid", "%i", al.thread->tid);
+
+	if (al.thread->cpu >= 0)
+		output_json_key_format(out, true, 3, "cpu", "%i", al.thread->cpu);
+
+	output_json_key_string(out, true, 3, "comm", thread__comm_str(al.thread));
+
+	output_json_key_format(out, true, 3, "callchain", "[");
+	if (sample->callchain) {
+		unsigned int i;
+		bool ok;
+		bool first_callchain = true;
+
+		for (i = 0; i < sample->callchain->nr; ++i) {
+			u64 ip = sample->callchain->ips[i];
+
+			if (ip >= PERF_CONTEXT_MAX) {
+				switch (ip) {
+				case PERF_CONTEXT_HV:
+					cpumode = PERF_RECORD_MISC_HYPERVISOR;
+					break;
+				case PERF_CONTEXT_KERNEL:
+					cpumode = PERF_RECORD_MISC_KERNEL;
+					break;
+				case PERF_CONTEXT_USER:
+					cpumode = PERF_RECORD_MISC_USER;
+					break;
+				default:
+					pr_debug("invalid callchain context: %"
+							PRId64 "\n", (s64) ip);
+					break;
+				}
+				continue;
+			}
+
+			if (first_callchain)
+				first_callchain = false;
+			else
+				fputc(',', out);
+
+			ok = thread__find_symbol(al.thread, cpumode, ip, &tal);
+			output_sample_callchain_entry(tool, ip, ok ? &tal : NULL);
+		}
+	} else {
+		output_sample_callchain_entry(tool, sample->ip, &al);
+	}
+	output_json_format(out, false, 3, "]");
+
+	output_json_format(out, false, 2, "}");
+	return 0;
+}
+
+static void output_headers(struct perf_session *session, struct convert_json *c)
+{
+	struct stat st;
+	struct perf_header *header = &session->header;
+	int ret;
+	int fd = perf_data__fd(session->data);
+	int i;
+	FILE *out = c->out;
+
+	output_json_key_format(out, false, 2, "header-version", "%u", header->version);
+
+	ret = fstat(fd, &st);
+	if (ret >= 0) {
+		time_t stctime = st.st_mtime;
+		char buf[256];
+
+		strftime(buf, sizeof(buf), "%FT%TZ", gmtime(&stctime));
+		output_json_key_string(out, true, 2, "captured-on", buf);
+	} else {
+		pr_debug("Failed to get mtime of source file, not writing captured-on");
+	}
+
+	output_json_key_format(out, true, 2, "data-offset", "%" PRIu64, header->data_offset);
+	output_json_key_format(out, true, 2, "data-size", "%" PRIu64, header->data_size);
+	output_json_key_format(out, true, 2, "feat-offset", "%" PRIu64, header->feat_offset);
+
+	output_json_key_string(out, true, 2, "hostname", header->env.hostname);
+	output_json_key_string(out, true, 2, "os-release", header->env.os_release);
+	output_json_key_string(out, true, 2, "arch", header->env.arch);
+
+	output_json_key_string(out, true, 2, "cpu-desc", header->env.cpu_desc);
+	output_json_key_string(out, true, 2, "cpuid", header->env.cpuid);
+	output_json_key_format(out, true, 2, "nrcpus-online", "%u", header->env.nr_cpus_online);
+	output_json_key_format(out, true, 2, "nrcpus-avail", "%u", header->env.nr_cpus_avail);
+
+	if (header->env.clock.enabled) {
+		output_json_key_format(out, true, 2, "clockid",
+				"%u", header->env.clock.clockid);
+		output_json_key_format(out, true, 2, "clock-time",
+				"%" PRIu64, header->env.clock.clockid_ns);
+		output_json_key_format(out, true, 2, "real-time",
+				"%" PRIu64, header->env.clock.tod_ns);
+	}
+
+	output_json_key_string(out, true, 2, "perf-version", header->env.version);
+
+	output_json_key_format(out, true, 2, "cmdline", "[");
+	for (i = 0; i < header->env.nr_cmdline; i++) {
+		output_json_delimiters(out, i != 0, 3);
+		output_json_string(c->out, header->env.cmdline_argv[i]);
+	}
+	output_json_format(out, false, 2, "]");
+}
+
+int bt_convert__perf2json(const char *input_name, const char *output_name,
+		struct perf_data_convert_opts *opts __maybe_unused)
+{
+	struct perf_session *session;
+	int fd;
+	int ret = -1;
+
+	struct convert_json c = {
+		.tool = {
+			.sample         = process_sample_event,
+			.mmap           = perf_event__process_mmap,
+			.mmap2          = perf_event__process_mmap2,
+			.comm           = perf_event__process_comm,
+			.namespaces     = perf_event__process_namespaces,
+			.cgroup         = perf_event__process_cgroup,
+			.exit           = perf_event__process_exit,
+			.fork           = perf_event__process_fork,
+			.lost           = perf_event__process_lost,
+			.tracing_data   = perf_event__process_tracing_data,
+			.build_id       = perf_event__process_build_id,
+			.id_index       = perf_event__process_id_index,
+			.auxtrace_info  = perf_event__process_auxtrace_info,
+			.auxtrace       = perf_event__process_auxtrace,
+			.event_update   = perf_event__process_event_update,
+			.ordered_events = true,
+			.ordering_requires_timestamps = true,
+		},
+		.first = true,
+		.events_count = 0,
+	};
+
+	struct perf_data data = {
+		.mode = PERF_DATA_MODE_READ,
+		.path = input_name,
+		.force = opts->force,
+	};
+
+	if (opts->all) {
+		pr_err("--all is currently unsupported for JSON output.\n");
+		goto err;
+	}
+	if (opts->tod) {
+		pr_err("--tod is currently unsupported for JSON output.\n");
+		goto err;
+	}
+
+	fd = open(output_name, O_CREAT | O_WRONLY | (opts->force ? O_TRUNC : O_EXCL), 0666);
+	if (fd == -1) {
+		if (errno == EEXIST)
+			pr_err("Output file exists. Use --force to overwrite it.\n");
+		else
+			pr_err("Error opening output file!\n");
+		goto err;
+	}
+
+	c.out = fdopen(fd, "w");
+	if (!c.out) {
+		fprintf(stderr, "Error opening output file!\n");
+		close(fd);
+		goto err;
+	}
+
+	session = perf_session__new(&data, false, &c.tool);
+	if (IS_ERR(session)) {
+		fprintf(stderr, "Error creating perf session!\n");
+		goto err_fclose;
+	}
+
+	if (symbol__init(&session->header.env) < 0) {
+		fprintf(stderr, "Symbol init error!\n");
+		goto err_session_delete;
+	}
+
+	// The opening brace is printed manually because it isn't delimited from a
+	// previous value (i.e. we don't want a leading newline)
+	fputc('{', c.out);
+
+	// Version number for future-proofing. Most additions should be able to be
+	// done in a backwards-compatible way so this should only need to be bumped
+	// if some major breaking change must be made.
+	output_json_format(c.out, false, 1, "\"linux-perf-json-version\": 1");
+
+	// Output headers
+	output_json_format(c.out, true, 1, "\"headers\": {");
+	output_headers(session, &c);
+	output_json_format(c.out, false, 1, "}");
+
+	// Output samples
+	output_json_format(c.out, true, 1, "\"samples\": [");
+	perf_session__process_events(session);
+	output_json_format(c.out, false, 1, "]");
+	output_json_format(c.out, false, 0, "}");
+	fputc('\n', c.out);
+
+	fprintf(stderr,
+			"[ perf data convert: Converted '%s' into JSON data '%s' ]\n",
+			data.path, output_name);
+
+	fprintf(stderr,
+			"[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples) ]\n",
+			(ftell(c.out)) / 1024.0 / 1024.0, c.events_count);
+
+	ret = 0;
+err_session_delete:
+	perf_session__delete(session);
+err_fclose:
+	fclose(c.out);
+err:
+	return ret;
+}
diff --git a/tools/perf/util/data-convert.h b/tools/perf/util/data-convert.h
index feab5f114e37..1b4c5f598415 100644
--- a/tools/perf/util/data-convert.h
+++ b/tools/perf/util/data-convert.h
@@ -2,10 +2,20 @@
 #ifndef __DATA_CONVERT_H
 #define __DATA_CONVERT_H
 
+#include <stdbool.h>
+
 struct perf_data_convert_opts {
 	bool force;
 	bool all;
 	bool tod;
 };
 
+#ifdef HAVE_LIBBABELTRACE_SUPPORT
+int bt_convert__perf2ctf(const char *input_name, const char *to_ctf,
+			 struct perf_data_convert_opts *opts);
+#endif /* HAVE_LIBBABELTRACE_SUPPORT */
+
+int bt_convert__perf2json(const char *input_name, const char *to_ctf,
+			 struct perf_data_convert_opts *opts);
+
 #endif /* __DATA_CONVERT_H */
-- 
2.31.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ