[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <156426997428.6953.2280314984853805596.tglx@nanos.tec.linutronix.de>
Date: Sat, 27 Jul 2019 23:26:14 -0000
From: Thomas Gleixner <tglx@...utronix.de>
To: Linus Torvalds <torvalds@...ux-foundation.org>
Cc: linux-kernel@...r.kernel.org, x86@...nel.org
Subject: [GIT pull] perf/urgent for 5.3-rc2
Linus,
please pull the latest perf-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus
up to: 289a2d22b5b6: perf/x86/intel: Mark expected switch fall-throughs
A pile of perf related fixes:
Kernel:
- Fix SLOTS PEBS event constraints for Icelake CPUs
- Add the missing mask bit to allow counting hardware generated
prefetches on L3 for Icelake CPUs
- Make the test for hypervisor platforms more accurate (as far as possible)
- Handle PMUs correctly which override event->cpu
- Yet another missing fallthrough annotation
Tools:
perf.data:
- Fix loading of compressed data split across adjacent records
- Fix buffer size setting for processing CPU topology perf.data header.
perf stat:
- Fix segfault for event group in repeat mode
- Always separate "stalled cycles per insn" line, it was being appended to
the "instructions" line.
perf script:
- Fix --max-blocks man page description.
- Improve man page description of metrics.
- Fix off by one in brstackinsn IPC computation.
perf probe:
- Avoid calling freeing routine multiple times for same pointer.
perf build:
- Do not use -Wshadow on gcc < 4.8, avoiding too strict warnings
treated as errors, breaking the build.
Thanks,
tglx
------------------>
Alexey Budankov (1):
perf session: Fix loading of compressed data split across adjacent records
Andi Kleen (3):
perf script: Fix --max-blocks man page description
perf script: Improve man page description of metrics
perf script: Fix off by one in brstackinsn IPC computation
Arnaldo Carvalho de Melo (3):
perf probe: Set pev->nargs to zero after freeing pev->args entries
perf probe: Avoid calling freeing routine multiple times for same pointer
perf build: Do not use -Wshadow on gcc < 4.8
Cong Wang (1):
perf stat: Always separate stalled cycles per insn
Gustavo A. R. Silva (1):
perf/x86/intel: Mark expected switch fall-throughs
Jiri Olsa (2):
perf tools: Fix proper buffer size for feature processing
perf stat: Fix segfault for event group in repeat mode
Kan Liang (1):
perf/x86/intel: Fix SLOTS PEBS event constraint
Leonard Crestez (1):
perf/core: Fix creating kernel counters for PMUs that override event->cpu
Yunying Sun (1):
perf/x86/intel: Fix invalid Bit 13 for Icelake MSR_OFFCORE_RSP_x register
Zhenzhong Duan (1):
perf/x86: Apply more accurate check on hypervisor platform
arch/x86/events/intel/core.c | 9 +++++----
arch/x86/events/intel/ds.c | 2 +-
kernel/events/core.c | 2 +-
tools/perf/Documentation/perf-script.txt | 8 ++++----
tools/perf/builtin-probe.c | 10 ++++++++++
tools/perf/builtin-script.c | 2 +-
tools/perf/builtin-stat.c | 9 ++++++++-
tools/perf/util/evsel.c | 2 ++
tools/perf/util/header.c | 2 +-
tools/perf/util/probe-event.c | 1 +
tools/perf/util/session.c | 22 ++++++++++++++--------
tools/perf/util/session.h | 1 +
tools/perf/util/stat-shadow.c | 3 ++-
tools/perf/util/zstd.c | 4 ++--
tools/scripts/Makefile.include | 9 ++++++++-
15 files changed, 61 insertions(+), 25 deletions(-)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 9e911a96972b..648260b5f367 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -20,7 +20,6 @@
#include <asm/intel-family.h>
#include <asm/apic.h>
#include <asm/cpu_device_id.h>
-#include <asm/hypervisor.h>
#include "../perf_event.h"
@@ -263,8 +262,8 @@ static struct event_constraint intel_icl_event_constraints[] = {
};
static struct extra_reg intel_icl_extra_regs[] __read_mostly = {
- INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff9fffull, RSP_0),
- INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff9fffull, RSP_1),
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffffbfffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffffbfffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
EVENT_EXTRA_END
@@ -4053,7 +4052,7 @@ static bool check_msr(unsigned long msr, u64 mask)
* Disable the check for real HW, so we don't
* mess with potentionaly enabled registers:
*/
- if (hypervisor_is_type(X86_HYPER_NATIVE))
+ if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
return true;
/*
@@ -4955,6 +4954,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_SKYLAKE_X:
pmem = true;
+ /* fall through */
case INTEL_FAM6_SKYLAKE_MOBILE:
case INTEL_FAM6_SKYLAKE_DESKTOP:
case INTEL_FAM6_KABYLAKE_MOBILE:
@@ -5004,6 +5004,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_ICELAKE_X:
case INTEL_FAM6_ICELAKE_XEON_D:
pmem = true;
+ /* fall through */
case INTEL_FAM6_ICELAKE_MOBILE:
case INTEL_FAM6_ICELAKE_DESKTOP:
x86_pmu.late_ack = true;
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 2c8db2c19328..f1269e804e9b 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -851,7 +851,7 @@ struct event_constraint intel_skl_pebs_event_constraints[] = {
struct event_constraint intel_icl_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
- INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x400000000ULL), /* SLOTS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */
INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf), /* MEM_INST_RETIRED.LOAD */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 026a14541a38..0463c1151bae 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -11274,7 +11274,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
goto err_unlock;
}
- perf_install_in_context(ctx, event, cpu);
+ perf_install_in_context(ctx, event, event->cpu);
perf_unpin_context(ctx);
mutex_unlock(&ctx->mutex);
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index d4e2e18a5881..caaab28f8400 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -228,11 +228,11 @@ OPTIONS
With the metric option perf script can compute metrics for
sampling periods, similar to perf stat. This requires
- specifying a group with multiple metrics with the :S option
+ specifying a group with multiple events defining metrics with the :S option
for perf record. perf will sample on the first event, and
- compute metrics for all the events in the group. Please note
+ print computed metrics for all the events in the group. Please note
that the metric computed is averaged over the whole sampling
- period, not just for the sample point.
+ period (since the last sample), not just for the sample point.
For sample events it's possible to display misc field with -F +misc option,
following letters are displayed for each bit:
@@ -384,7 +384,7 @@ include::itrace.txt[]
perf script --time 0%-10%,30%-40%
--max-blocks::
- Set the maximum number of program blocks to print with brstackasm for
+ Set the maximum number of program blocks to print with brstackinsn for
each sample.
--reltime::
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 6418782951a4..3d0ffd41fb55 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -698,6 +698,16 @@ __cmd_probe(int argc, const char **argv)
ret = perf_add_probe_events(params.events, params.nevents);
if (ret < 0) {
+
+ /*
+ * When perf_add_probe_events() fails it calls
+ * cleanup_perf_probe_events(pevs, npevs), i.e.
+ * cleanup_perf_probe_events(params.events, params.nevents), which
+ * will call clear_perf_probe_event(), so set nevents to zero
+ * to avoid cleanup_params() to call clear_perf_probe_event() again
+ * on the same pevs.
+ */
+ params.nevents = 0;
pr_err_with_code(" Error: Failed to add events.", ret);
return ret;
}
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 8f24865596af..0140ddb8dd0b 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1059,7 +1059,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
if (ip == end) {
- printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp,
+ printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, ++insn, fp,
&total_cycles);
if (PRINT_FIELD(SRCCODE))
printed += print_srccode(thread, x.cpumode, ip);
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index b55a534b4de0..352cf39d7c2f 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -607,7 +607,13 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
* group leaders.
*/
read_counters(&(struct timespec) { .tv_nsec = t1-t0 });
- perf_evlist__close(evsel_list);
+
+ /*
+ * We need to keep evsel_list alive, because it's processed
+ * later the evsel_list will be closed after.
+ */
+ if (!STAT_RECORD)
+ perf_evlist__close(evsel_list);
return WEXITSTATUS(status);
}
@@ -1997,6 +2003,7 @@ int cmd_stat(int argc, const char **argv)
perf_session__write_header(perf_stat.session, evsel_list, fd, true);
}
+ perf_evlist__close(evsel_list);
perf_session__delete(perf_stat.session);
}
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ebb46da4dfe5..52459dd5ad0c 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1291,6 +1291,7 @@ static void perf_evsel__free_id(struct perf_evsel *evsel)
xyarray__delete(evsel->sample_id);
evsel->sample_id = NULL;
zfree(&evsel->id);
+ evsel->ids = 0;
}
static void perf_evsel__free_config_terms(struct perf_evsel *evsel)
@@ -2077,6 +2078,7 @@ void perf_evsel__close(struct perf_evsel *evsel)
perf_evsel__close_fd(evsel);
perf_evsel__free_fd(evsel);
+ perf_evsel__free_id(evsel);
}
int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index c24db7f4909c..20111f8da5cb 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -3747,7 +3747,7 @@ int perf_event__process_feature(struct perf_session *session,
return 0;
ff.buf = (void *)fe->data;
- ff.size = event->header.size - sizeof(event->header);
+ ff.size = event->header.size - sizeof(*fe);
ff.ph = &session->header;
if (feat_ops[feat].process(&ff, NULL))
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index cd1eb73cfe83..8394d48f8b32 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2230,6 +2230,7 @@ void clear_perf_probe_event(struct perf_probe_event *pev)
field = next;
}
}
+ pev->nargs = 0;
zfree(&pev->args);
}
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index d0fd6c614e68..37efa1f43d8b 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -36,10 +36,16 @@ static int perf_session__process_compressed_event(struct perf_session *session,
void *src;
size_t decomp_size, src_size;
u64 decomp_last_rem = 0;
- size_t decomp_len = session->header.env.comp_mmap_len;
+ size_t mmap_len, decomp_len = session->header.env.comp_mmap_len;
struct decomp *decomp, *decomp_last = session->decomp_last;
- decomp = mmap(NULL, sizeof(struct decomp) + decomp_len, PROT_READ|PROT_WRITE,
+ if (decomp_last) {
+ decomp_last_rem = decomp_last->size - decomp_last->head;
+ decomp_len += decomp_last_rem;
+ }
+
+ mmap_len = sizeof(struct decomp) + decomp_len;
+ decomp = mmap(NULL, mmap_len, PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
if (decomp == MAP_FAILED) {
pr_err("Couldn't allocate memory for decompression\n");
@@ -47,10 +53,10 @@ static int perf_session__process_compressed_event(struct perf_session *session,
}
decomp->file_pos = file_offset;
+ decomp->mmap_len = mmap_len;
decomp->head = 0;
- if (decomp_last) {
- decomp_last_rem = decomp_last->size - decomp_last->head;
+ if (decomp_last_rem) {
memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem);
decomp->size = decomp_last_rem;
}
@@ -61,7 +67,7 @@ static int perf_session__process_compressed_event(struct perf_session *session,
decomp_size = zstd_decompress_stream(&(session->zstd_data), src, src_size,
&(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem);
if (!decomp_size) {
- munmap(decomp, sizeof(struct decomp) + decomp_len);
+ munmap(decomp, mmap_len);
pr_err("Couldn't decompress data\n");
return -1;
}
@@ -255,15 +261,15 @@ static void perf_session__delete_threads(struct perf_session *session)
static void perf_session__release_decomp_events(struct perf_session *session)
{
struct decomp *next, *decomp;
- size_t decomp_len;
+ size_t mmap_len;
next = session->decomp;
- decomp_len = session->header.env.comp_mmap_len;
do {
decomp = next;
if (decomp == NULL)
break;
next = decomp->next;
- munmap(decomp, decomp_len + sizeof(struct decomp));
+ mmap_len = decomp->mmap_len;
+ munmap(decomp, mmap_len);
} while (1);
}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index dd8920b745bc..863dbad87849 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -46,6 +46,7 @@ struct perf_session {
struct decomp {
struct decomp *next;
u64 file_pos;
+ size_t mmap_len;
u64 head;
size_t size;
char data[];
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 656065af4971..accb1bf1cfd8 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -819,7 +819,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
"stalled cycles per insn",
ratio);
} else if (have_frontend_stalled) {
- print_metric(config, ctxp, NULL, NULL,
+ out->new_line(config, ctxp);
+ print_metric(config, ctxp, NULL, "%7.2f ",
"stalled cycles per insn", 0);
}
} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c
index 23bdb9884576..d2202392ffdb 100644
--- a/tools/perf/util/zstd.c
+++ b/tools/perf/util/zstd.c
@@ -99,8 +99,8 @@ size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size
while (input.pos < input.size) {
ret = ZSTD_decompressStream(data->dstream, &output, &input);
if (ZSTD_isError(ret)) {
- pr_err("failed to decompress (B): %ld -> %ld : %s\n",
- src_size, output.size, ZSTD_getErrorName(ret));
+ pr_err("failed to decompress (B): %ld -> %ld, dst_size %ld : %s\n",
+ src_size, output.size, dst_size, ZSTD_getErrorName(ret));
break;
}
output.dst = dst + output.pos;
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index 495066bafbe3..ded7a950dc40 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -32,7 +32,6 @@ EXTRA_WARNINGS += -Wno-system-headers
EXTRA_WARNINGS += -Wold-style-definition
EXTRA_WARNINGS += -Wpacked
EXTRA_WARNINGS += -Wredundant-decls
-EXTRA_WARNINGS += -Wshadow
EXTRA_WARNINGS += -Wstrict-prototypes
EXTRA_WARNINGS += -Wswitch-default
EXTRA_WARNINGS += -Wswitch-enum
@@ -69,8 +68,16 @@ endif
# will do for now and keep the above -Wstrict-aliasing=3 in place
# in newer systems.
# Needed for the __raw_cmpxchg in tools/arch/x86/include/asm/cmpxchg.h
+#
+# See https://lkml.org/lkml/2006/11/28/253 and https://gcc.gnu.org/gcc-4.8/changes.html,
+# that takes into account Linus's comments (search for Wshadow) for the reasoning about
+# -Wshadow not being interesting before gcc 4.8.
+
ifneq ($(filter 3.%,$(MAKE_VERSION)),) # make-3
EXTRA_WARNINGS += -fno-strict-aliasing
+EXTRA_WARNINGS += -Wno-shadow
+else
+EXTRA_WARNINGS += -Wshadow
endif
ifneq ($(findstring $(MAKEFLAGS), w),w)
Powered by blists - more mailing lists