lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 10 Dec 2020 12:03:40 +0300
From:   Alexander Antonov <alexander.antonov@...ux.intel.com>
To:     acme@...nel.org
Cc:     linux-kernel@...r.kernel.org, jolsa@...hat.com,
        alexander.shishkin@...ux.intel.com, mark.rutland@....com,
        namhyung@...nel.org, mingo@...hat.com, peterz@...radead.org,
        ak@...ux.intel.com, alexander.antonov@...ux.intel.com
Subject: [PATCH 5/5] perf stat: Enable --iiostat mode for x86 platforms

This functionality is based on recently introduced sysfs attributes
for Intel® Xeon® Scalable processor family (code name Skylake-SP):
Commit bb42b3d39781 ("perf/x86/intel/uncore: Expose an Uncore unit to
IIO PMON mapping")

Mode is intended to provide four I/O performance metrics in MB per each
IIO stack:
 - Inbound Read: I/O devices below IIO stack read from the host memory
 - Inbound Write: I/O devices below IIO stack write to the host memory
 - Outbound Read: CPU reads from I/O devices below IIO stack
 - Outbound Write: CPU writes to I/O devices below IIO stack

Each metric requiries only one IIO event which increments at every 4B
transfer in corresponding direction. The formulas to compute metrics
are generic:
    #EventCount * 4B / (1024 * 1024)

Signed-off-by: Alexander Antonov <alexander.antonov@...ux.intel.com>
---
 tools/perf/Documentation/perf-stat.txt |  31 +++
 tools/perf/arch/x86/util/Build         |   1 +
 tools/perf/arch/x86/util/iiostat.c     | 335 +++++++++++++++++++++++++
 3 files changed, 367 insertions(+)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 5d4a673d7621..2c066f7e0681 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -121,6 +121,37 @@ to activate system-wide monitoring. Default is to count on all CPUs.
 -A::
 --no-aggr::
 Do not aggregate counts across all monitored CPUs.
+--iiostat::
+Mode is intended to provide four I/O performance metrics per each IIO
+stack (PCIe root port):
+    --Inbound Read(MB)   - I/O devices below IIO stack read from the host memory, in MB
+    --Inbound Write(MB)  - I/O devices below IIO stack write to the host memory, in MB
+    --Outbound Read(MB)  - CPU reads from I/O devices below IIO stack, in MB
+    --Outbound Write(MB) - CPU writes to I/O devices below IIO stack, in MB
+
+Sample output:
+
+Show all IIO stacks on 2-S platform:
+  $ perf stat --iiostat=show
+    S0-uncore_iio_0<0000:00>
+    S1-uncore_iio_0<0000:80>
+    S0-uncore_iio_1<0000:17>
+    S1-uncore_iio_1<0000:85>
+    S0-uncore_iio_2<0000:3a>
+    S1-uncore_iio_2<0000:ae>
+    S0-uncore_iio_3<0000:5d>
+    S1-uncore_iio_3<0000:d7>
+
+Print metrics for requested IIO stacks, multiple comma-separated list supported.
+  $ perf stat --iiostat=0000:17 -- dd if=/dev/zero of=/dev/nvme0n1 bs=1M oflag=direct
+		357708+0 records in
+		357707+0 records out
+		375083606016 bytes (375 GB, 349 GiB) copied, 213.997 s, 1.8 GB/s
+
+   Performance counter stats for 'system wide':
+
+     port             Inbound Read(MB)    Inbound Write(MB)    Outbound Read(MB)   Outbound Write(MB)
+  0000:17               358559                   44                    0                   22
 
 -n::
 --null::
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index 347c39b960eb..6fa275d3d897 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -6,6 +6,7 @@ perf-y += perf_regs.o
 perf-y += topdown.o
 perf-y += machine.o
 perf-y += event.o
+perf-y += iiostat.o
 
 perf-$(CONFIG_DWARF) += dwarf-regs.o
 perf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
diff --git a/tools/perf/arch/x86/util/iiostat.c b/tools/perf/arch/x86/util/iiostat.c
index 70f93a96723f..44342a111746 100644
--- a/tools/perf/arch/x86/util/iiostat.c
+++ b/tools/perf/arch/x86/util/iiostat.c
@@ -27,6 +27,44 @@
 #include "util/counts.h"
 #include "path.h"
 
+#ifndef MAX_PATH
+#define MAX_PATH 1024
+#endif
+
+#define UNCORE_IIO_PMU_PATH	"devices/uncore_iio_%d"
+#define SYSFS_UNCORE_PMU_PATH	"%s/"UNCORE_IIO_PMU_PATH
+#define PLATFORM_MAPPING_PATH	UNCORE_IIO_PMU_PATH"/die%d"
+
+enum iiostat_mode_t {
+	IIOSTAT_NONE		= -1,
+	IIOSTAT_RUN		= 0,
+	IIOSTAT_SHOW		= 1
+};
+
+static enum iiostat_mode_t iiostat_mode = IIOSTAT_NONE;
+
+/*
+ * Each metric requiries only one IIO event which increments at every 4B transfer
+ * in corresponding direction. The formulas to compute metrics are generic:
+ *     #EventCount * 4B / (1024 * 1024)
+ */
+static const char * const iiostat_metrics[] = {
+	"Inbound Read(MB)",
+	"Inbound Write(MB)",
+	"Outbound Read(MB)",
+	"Outbound Write(MB)",
+};
+
+static inline int iiostat_metrics_count(void)
+{
+	return sizeof(iiostat_metrics) / sizeof(char *);
+}
+
+static const char *iiostat_metric_by_idx(int idx)
+{
+	return *(iiostat_metrics + idx % iiostat_metrics_count());
+}
+
 struct iio_root_port {
 	u32 domain;
 	u8 bus;
@@ -123,3 +161,300 @@ static int iio_root_ports_list_insert(struct iio_root_ports_list *list,
 	}
 	return 0;
 }
+
+static int uncore_pmu_iio_platform_mapping(u8 pmu_idx, struct iio_root_ports_list * const list)
+{
+	char *buf;
+	char path[MAX_PATH];
+	u32 domain;
+	u8 bus;
+	struct iio_root_port *rp;
+	size_t size;
+	int ret;
+
+	for (int die = 0; die < cpu__max_node(); die++) {
+		scnprintf(path, MAX_PATH, PLATFORM_MAPPING_PATH, pmu_idx, die);
+		if (sysfs__read_str(path, &buf, &size) < 0) {
+			if (pmu_idx)
+				goto out;
+			pr_err("Mode iiostat is not supported\n");
+			return -1;
+		}
+		ret = sscanf(buf, "%04x:%02hhx", &domain, &bus);
+		free(buf);
+		if (ret != 2) {
+			pr_err("Invalid mapping data: iio_%d; die%d\n", pmu_idx, die);
+			return -1;
+		}
+		rp = iio_root_port_new(domain, bus, die, pmu_idx);
+		if (!rp || iio_root_ports_list_insert(list, rp)) {
+			free(rp);
+			return -ENOMEM;
+		}
+	}
+out:
+	return 0;
+}
+
+static u8 iio_pmu_count(void)
+{
+	u8 pmu_idx = 0;
+	char path[MAX_PATH];
+	const char *sysfs = sysfs__mountpoint();
+
+	if (sysfs) {
+		for (;; pmu_idx++) {
+			snprintf(path, sizeof(path), SYSFS_UNCORE_PMU_PATH,
+				 sysfs, pmu_idx);
+			if (access(path, F_OK) != 0)
+				break;
+		}
+	}
+	return pmu_idx;
+}
+
+static int iio_root_ports_scan(struct iio_root_ports_list **list)
+{
+	int ret = -ENOMEM;
+	struct iio_root_ports_list *tmp_list;
+	u8 pmu_count = iio_pmu_count();
+
+	if (!pmu_count) {
+		pr_err("Unsupported uncore pmu configuration\n");
+		return -1;
+	}
+
+	tmp_list = iio_root_ports_list_new();
+	if (!tmp_list)
+		goto err;
+
+	for (u8 pmu_idx = 0; pmu_idx < pmu_count; pmu_idx++) {
+		ret = uncore_pmu_iio_platform_mapping(pmu_idx, tmp_list);
+		if (ret)
+			break;
+	}
+err:
+	if (!ret)
+		*list = tmp_list;
+	else
+		iio_root_ports_list_free(tmp_list);
+
+	return ret;
+}
+
+static int iio_root_port_parse_str(u32 *domain, u8 *bus, char *str)
+{
+	int ret;
+	regex_t regex;
+	/*
+	 * Expected format domain:bus:
+	 * Valid domain range [0:ffff]
+	 * Valid bus range [0:ff]
+	 * Example: 0000:af, 0:3d, 01:7
+	 */
+	regcomp(&regex, "^([a-f0-9A-F]{1,}):([a-f0-9A-F]{1,2})$", REG_EXTENDED);
+	ret = regexec(&regex, str, 0, NULL, 0);
+	if (ret || sscanf(str, "%08x:%02hhx", domain, bus) != 2)
+		pr_warning("Unrecognized root port format: %s\n"
+			   "Please use the following format:\n"
+			   "\t [domain]:[bus]\n"
+			   "\t for example: 0000:3d\n", str);
+
+	regfree(&regex);
+	return ret;
+}
+
+static int iio_root_ports_list_filter(struct iio_root_ports_list **list,
+				      const char *filter)
+{
+	char *tok, *tmp, *filter_copy = NULL;
+	struct iio_root_port *rp;
+	u32 domain;
+	u8 bus;
+	int ret = -ENOMEM;
+	struct iio_root_ports_list *tmp_list = iio_root_ports_list_new();
+
+	if (!tmp_list)
+		goto err;
+
+	filter_copy = strdup(filter);
+	if (!filter_copy)
+		goto err;
+
+	for (tok = strtok_r(filter_copy, ",", &tmp); tok; tok = strtok_r(NULL, ",", &tmp)) {
+		if (!iio_root_port_parse_str(&domain, &bus, tok)) {
+			rp = iio_root_port_find_by_notation(*list, domain, bus);
+			if (rp) {
+				(*list)->rps[rp->idx] = NULL;
+				ret = iio_root_ports_list_insert(tmp_list, rp);
+				if (ret) {
+					free(rp);
+					goto err;
+				}
+			} else if (!iio_root_port_find_by_notation(tmp_list, domain, bus))
+				pr_warning("Root port %04x:%02x were not found\n", domain, bus);
+		}
+	}
+
+	if (tmp_list->nr_entries == 0) {
+		pr_err("Requested root ports were not found\n");
+		ret = -EINVAL;
+	}
+err:
+	iio_root_ports_list_free(*list);
+	if (ret)
+		iio_root_ports_list_free(tmp_list);
+	else
+		*list = tmp_list;
+
+	free(filter_copy);
+	return ret;
+}
+
+static int iiostat_event_group(struct evlist *evl, struct iio_root_ports_list *list)
+{
+	int ret;
+	struct iio_root_port **rp;
+	const char *iiostat_cmd_template =
+		"{uncore_iio_%x/event=0x83,umask=0x04,ch_mask=0xF,fc_mask=0x07/,\
+		uncore_iio_%x/event=0x83,umask=0x01,ch_mask=0xF,fc_mask=0x07/,\
+		uncore_iio_%x/event=0xc0,umask=0x04,ch_mask=0xF,fc_mask=0x07/,\
+		uncore_iio_%x/event=0xc0,umask=0x01,ch_mask=0xF,fc_mask=0x07/}";
+	const int len_template = strlen(iiostat_cmd_template) + 1;
+	struct evsel *evsel = NULL;
+	int metrics_count = iiostat_metrics_count();
+	char *iiostat_cmd = calloc(len_template, 1);
+
+	if (!iiostat_cmd)
+		return -ENOMEM;
+
+	for (rp = list->rps; *rp; rp++) {
+		sprintf(iiostat_cmd, iiostat_cmd_template,
+			(*rp)->pmu_idx, (*rp)->pmu_idx, (*rp)->pmu_idx, (*rp)->pmu_idx);
+		ret = parse_events(evl, iiostat_cmd, NULL);
+		if (ret)
+			goto err;
+	}
+
+	evlist__for_each_entry(evl, evsel) {
+		evsel->perf_device = list->rps[evsel->idx / metrics_count];
+	}
+	list->nr_entries = 0;
+err:
+	iio_root_ports_list_free(list);
+	free(iiostat_cmd);
+	return ret;
+}
+
+int iiostat_parse(const struct option *opt, const char *str,
+		  int unset __maybe_unused)
+{
+	int ret;
+	struct iio_root_ports_list *list;
+	struct evlist *evl = *(struct evlist **)opt->value;
+	struct perf_stat_config *config = (struct perf_stat_config *)opt->data;
+
+	if (evl->core.nr_entries > 0) {
+		pr_err("Unsupported event configuration\n");
+		return -1;
+	}
+	config->metric_only = true;
+	config->aggr_mode = AGGR_IIO_STACK;
+	config->iiostat_run = true;
+	ret = iio_root_ports_scan(&list);
+	if (ret)
+		return ret;
+
+	if (!str) {
+		iiostat_mode = IIOSTAT_RUN;
+	} else if (!strcmp(str, "show")) {
+		iiostat_mode = IIOSTAT_SHOW;
+	} else {
+		iiostat_mode = IIOSTAT_RUN;
+		ret = iio_root_ports_list_filter(&list, str);
+		if (ret)
+			return ret;
+	}
+	return iiostat_event_group(evl, list);
+}
+
+void iiostat_prefix(struct perf_stat_config *config,
+		    struct evlist *evlist,
+		    char *prefix, struct timespec *ts)
+{
+	struct iio_root_port *rp = evlist->selected->perf_device;
+
+	if (rp) {
+		if (ts)
+			sprintf(prefix, "%6lu.%09lu%s%04x:%02x%s",
+					ts->tv_sec, ts->tv_nsec,
+					config->csv_sep, rp->domain, rp->bus,
+					config->csv_sep);
+		else
+			sprintf(prefix, "%04x:%02x%s", rp->domain, rp->bus,
+				config->csv_sep);
+	}
+}
+
+void iiostat_print_metric(struct perf_stat_config *config, struct evsel *evsel,
+			  struct perf_stat_output_ctx *out)
+{
+	double iiostat_value = 0;
+	u64 prev_count_val = 0;
+	const char *iiostat_metric = iiostat_metric_by_idx(evsel->idx);
+	u8 die = ((struct iio_root_port *)evsel->perf_device)->die;
+	struct perf_counts_values *count = perf_counts(evsel->counts, die, 0);
+
+	if (evsel->prev_raw_counts && !out->force_header) {
+		struct perf_counts_values *prev_count = perf_counts(evsel->prev_raw_counts, die, 0);
+
+		prev_count_val = prev_count->val;
+		prev_count->val = count->val;
+	}
+	iiostat_value = (count->val - prev_count_val) / ((double) count->run / count->ena);
+	out->print_metric(config, out->ctx, NULL, "%8.0f", iiostat_metric,
+			  iiostat_value / (256 * 1024));
+}
+
+int iiostat_show_root_ports(struct evlist *evlist, struct perf_stat_config *config)
+{
+	struct evsel *evsel;
+	struct iio_root_port *rp = NULL;
+	bool is_show_mode = (iiostat_mode == IIOSTAT_SHOW);
+
+	if (config->aggr_mode != AGGR_IIO_STACK) {
+		pr_err("Unsupported event configuration\n");
+		return -1;
+	}
+
+	if (is_show_mode || verbose) {
+		evlist__for_each_entry(evlist, evsel) {
+			if (!evsel->perf_device) {
+				pr_err("Unsupported event configuration\n");
+				return -1;
+			}
+			if (rp != evsel->perf_device) {
+				rp = evsel->perf_device;
+				iio_root_port_show(config->output, rp);
+			}
+		}
+	}
+	/* Stop iiostat for show mode*/
+	config->iiostat_run = !is_show_mode;
+	if (is_show_mode)
+		iiostat_delete_root_ports(evlist);
+	return 0;
+}
+
+void iiostat_delete_root_ports(struct evlist *evlist)
+{
+	struct evsel *evsel;
+	struct iio_root_port *rp = NULL;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (rp != evsel->perf_device) {
+			rp = evsel->perf_device;
+			free(evsel->perf_device);
+		}
+	}
+}
-- 
2.19.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ