lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Date: Mon, 23 May 2016 11:40:07 -0300 From: Arnaldo Carvalho de Melo <acme@...nel.org> To: Andi Kleen <andi@...stfloor.org> Cc: peterz@...radead.org, jolsa@...nel.org, linux-kernel@...r.kernel.org, mingo@...nel.org, Andi Kleen <ak@...ux.intel.com> Subject: Re: [PATCH 7/8] perf stat: Basic support for TopDown in perf stat Em Thu, May 19, 2016 at 05:10:01PM -0700, Andi Kleen escreveu: > From: Andi Kleen <ak@...ux.intel.com> > > Add basic plumbing for TopDown in perf stat > > Add a new --topdown options to enable events. > When --topdown is specified set up events for all topdown > events supported by the kernel. > Add topdown-* as a special case to the event parser, as is > needed for all events containing -. > > The actual code to compute the metrics is in follow-on patches. > > v2: Use standard sysctl read function. > v3: Move x86 specific code to arch/ > v4: Enable --metric-only implicitly for topdown. > v5: Add --single-thread option to not force per core mode > v6: Fix output order of topdown metrics > v7: Allow combining with -d > v8: Remove --single-thread again > Signed-off-by: Andi Kleen <ak@...ux.intel.com> > --- > tools/perf/Documentation/perf-stat.txt | 16 +++++ > tools/perf/arch/x86/util/Build | 1 + > tools/perf/arch/x86/util/group.c | 27 ++++++++ > tools/perf/builtin-stat.c | 114 ++++++++++++++++++++++++++++++++- > tools/perf/util/group.h | 7 ++ > tools/perf/util/parse-events.l | 1 + > 6 files changed, 163 insertions(+), 3 deletions(-) > create mode 100644 tools/perf/arch/x86/util/group.c > create mode 100644 tools/perf/util/group.h > > diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt > index 04f23b404bbc..3aaa2916f604 100644 > --- a/tools/perf/Documentation/perf-stat.txt > +++ b/tools/perf/Documentation/perf-stat.txt > @@ -204,6 +204,22 @@ Aggregate counts per physical processor for system-wide mode measurements. > --no-aggr:: > Do not aggregate counts across all monitored CPUs. > > +--topdown:: > +Print top down level 1 metrics if supported by the CPU. This allows to > +determine bottle necks in the CPU pipeline for CPU bound workloads, > +by breaking it down into frontend bound, backend bound, bad speculation > +and retiring. Metrics are only printed when they cross a threshold. > + > +The top down metrics may be collected per core instead of per > +CPU thread. In this case per core mode is automatically enabled > +and -a (global monitoring) is needed, requiring root rights or > +perf.perf_event_paranoid=-1. > + > +This enables --metric-only, unless overriden with --no-metric-only. > + > +To interpret the results it is usually needed to know on which > +CPUs the workload runs on. If needed the CPUs can be forced using > +taskset. > > EXAMPLES > -------- > diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build > index 465970370f3e..4cd8a16b1b7b 100644 > --- a/tools/perf/arch/x86/util/Build > +++ b/tools/perf/arch/x86/util/Build > @@ -3,6 +3,7 @@ libperf-y += tsc.o > libperf-y += pmu.o > libperf-y += kvm-stat.o > libperf-y += perf_regs.o > +libperf-y += group.o > > libperf-$(CONFIG_DWARF) += dwarf-regs.o > libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o > diff --git a/tools/perf/arch/x86/util/group.c b/tools/perf/arch/x86/util/group.c > new file mode 100644 > index 000000000000..f3039b5ce8b1 > --- /dev/null > +++ b/tools/perf/arch/x86/util/group.c > @@ -0,0 +1,27 @@ > +#include <stdio.h> > +#include "api/fs/fs.h" > +#include "util/group.h" > + > +/* > + * Check whether we can use a group for top down. > + * Without a group may get bad results due to multiplexing. > + */ > +bool check_group(bool *warn) Please rename this, "check_group" is way too generic, and things that are possibly renamed by arch code, which there are plenty in the tree, usually come prefixed by "arch_" so that we know at a glance that this may be overriden by arch code. > +{ > + int n; > + > + if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0) > + return false; > + if (n > 0) { > + *warn = true; > + return false; > + } > + return true; > +} > + > +void group_warn(void) > +{ > + fprintf(stderr, > + "nmi_watchdog enabled with topdown. May give wrong results.\n" > + "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n"); > +} > diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c > index db84bfc0a478..7c5c50b61b28 100644 > --- a/tools/perf/builtin-stat.c > +++ b/tools/perf/builtin-stat.c > @@ -59,10 +59,13 @@ > #include "util/thread.h" > #include "util/thread_map.h" > #include "util/counts.h" > +#include "util/group.h" > #include "util/session.h" > #include "util/tool.h" > +#include "util/group.h" > #include "asm/bug.h" > > +#include <api/fs/fs.h> > #include <stdlib.h> > #include <sys/prctl.h> > #include <locale.h> > @@ -98,6 +101,15 @@ static const char * transaction_limited_attrs = { > "}" > }; > > +static const char * topdown_attrs[] = { > + "topdown-total-slots", > + "topdown-slots-retired", > + "topdown-recovery-bubbles", > + "topdown-fetch-bubbles", > + "topdown-slots-issued", > + NULL, > +}; > + > static struct perf_evlist *evsel_list; > > static struct target target = { > @@ -112,6 +124,7 @@ static volatile pid_t child_pid = -1; > static bool null_run = false; > static int detailed_run = 0; > static bool transaction_run; > +static bool topdown_run = false; > static bool big_num = true; > static int big_num_opt = -1; > static const char *csv_sep = NULL; > @@ -124,6 +137,7 @@ static unsigned int initial_delay = 0; > static unsigned int unit_width = 4; /* strlen("unit") */ > static bool forever = false; > static bool metric_only = false; > +static bool force_metric_only = false; > static struct timespec ref_time; > static struct cpu_map *aggr_map; > static aggr_get_id_t aggr_get_id; > @@ -1515,6 +1529,14 @@ static int stat__set_big_num(const struct option *opt __maybe_unused, > return 0; > } > > +static int enable_metric_only(const struct option *opt __maybe_unused, > + const char *s __maybe_unused, int unset) > +{ > + force_metric_only = true; > + metric_only = !unset; > + return 0; > +} > + > static const struct option stat_options[] = { > OPT_BOOLEAN('T', "transaction", &transaction_run, > "hardware transaction statistics"), > @@ -1573,8 +1595,10 @@ static const struct option stat_options[] = { > "aggregate counts per thread", AGGR_THREAD), > OPT_UINTEGER('D', "delay", &initial_delay, > "ms to wait before starting measurement after program start"), > - OPT_BOOLEAN(0, "metric-only", &metric_only, > - "Only print computed metrics. No raw values"), > + OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL, > + "Only print computed metrics. No raw values", enable_metric_only), > + OPT_BOOLEAN(0, "topdown", &topdown_run, > + "measure topdown level 1 statistics"), > OPT_END() > }; > > @@ -1767,12 +1791,61 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) > return 0; > } > > +static void filter_events(const char **attr, char **str, bool use_group) Is this really a generic function or is something topdown specific? If the later, please prefix it with "topdown_". > +{ > + int off = 0; > + int i; > + int len = 0; > + char *s; > + > + for (i = 0; attr[i]; i++) { > + if (pmu_have_event("cpu", attr[i])) { > + len += strlen(attr[i]) + 1; > + attr[i - off] = attr[i]; > + } else > + off++; > + } > + attr[i - off] = NULL; > + > + *str = malloc(len + 1 + 2); > + if (!*str) > + return; > + s = *str; > + if (i - off == 0) { > + *s = 0; > + return; > + } > + if (use_group) > + *s++ = '{'; > + for (i = 0; attr[i]; i++) { > + strcpy(s, attr[i]); > + s += strlen(s); > + *s++ = ','; > + } > + if (use_group) { > + s[-1] = '}'; > + *s = 0; > + } else > + s[-1] = 0; > +} > + > +__weak bool check_group(bool *warn) > +{ > + *warn = false; > + return false; > +} > + > +__weak void group_warn(void) > +{ > +} > + > /* > * Add default attributes, if there were no attributes specified or > * if -d/--detailed, -d -d or -d -d -d is used: > */ > static int add_default_attributes(void) > { > + int err; > struct perf_event_attr default_attrs0[] = { > > { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, > @@ -1891,7 +1964,6 @@ static int add_default_attributes(void) > return 0; > > if (transaction_run) { > - int err; > if (pmu_have_event("cpu", "cycles-ct") && > pmu_have_event("cpu", "el-start")) > err = parse_events(evsel_list, transaction_attrs, NULL); > @@ -1904,6 +1976,42 @@ static int add_default_attributes(void) > return 0; > } > > + if (topdown_run) { > + char *str = NULL; > + bool warn = false; > + > + if (stat_config.aggr_mode != AGGR_GLOBAL && > + stat_config.aggr_mode != AGGR_CORE) { > + pr_err("top down event configuration requires --per-core mode\n"); > + return -1; > + } > + stat_config.aggr_mode = AGGR_CORE; > + if (nr_cgroups || !target__has_cpu(&target)) { > + pr_err("top down event configuration requires system-wide mode (-a)\n"); > + return -1; > + } > + > + if (!force_metric_only) > + metric_only = true; > + filter_events(topdown_attrs, &str, check_group(&warn)); > + if (topdown_attrs[0] && str) { > + if (warn) > + group_warn(); > + err = parse_events(evsel_list, str, NULL); > + if (err) { > + fprintf(stderr, > + "Cannot set up top down events %s: %d\n", > + str, err); > + free(str); > + return -1; > + } > + } else { > + fprintf(stderr, "System does not support topdown\n"); > + return -1; > + } > + free(str); > + } > + > if (!evsel_list->nr_entries) { > if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) > return -1; > diff --git a/tools/perf/util/group.h b/tools/perf/util/group.h > new file mode 100644 > index 000000000000..daad3ffdc68d > --- /dev/null > +++ b/tools/perf/util/group.h > @@ -0,0 +1,7 @@ > +#ifndef GROUP_H > +#define GROUP_H 1 > + > +bool check_group(bool *warn); > +void group_warn(void); > + > +#endif > diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l > index 1477fbc78993..744ebe3fa30f 100644 > --- a/tools/perf/util/parse-events.l > +++ b/tools/perf/util/parse-events.l > @@ -259,6 +259,7 @@ cycles-ct { return str(yyscanner, PE_KERNEL_PMU_EVENT); } > cycles-t { return str(yyscanner, PE_KERNEL_PMU_EVENT); } > mem-loads { return str(yyscanner, PE_KERNEL_PMU_EVENT); } > mem-stores { return str(yyscanner, PE_KERNEL_PMU_EVENT); } > +topdown-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); } > > L1-dcache|l1-d|l1d|L1-data | > L1-icache|l1-i|l1i|L1-instruction | > -- > 2.5.5
Powered by blists - more mailing lists