lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20160324081026.GA7182@gmail.com>
Date:	Thu, 24 Mar 2016 09:10:26 +0100
From:	Ingo Molnar <mingo@...nel.org>
To:	Linus Torvalds <torvalds@...ux-foundation.org>
Cc:	linux-kernel@...r.kernel.org,
	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	Thomas Gleixner <tglx@...utronix.de>,
	Arnaldo Carvalho de Melo <acme@...radead.org>,
	Jiri Olsa <jolsa@...hat.com>,
	Andrew Morton <akpm@...ux-foundation.org>
Subject: [GIT PULL] perf fixes and updates

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 05f5ece76a88a2cd4859bc93f90379733dd8b4a3 Merge tag 'perf-core-for-mingo-20160323' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

[ Note: this pull request depends on the previous x86/urgent pull request I just
        sent - if you have not pulled x86/urgent then please disregard this one. ]

This tree contains various perf fixes on the kernel side, plus 3 
hw/event-enablement late additions:

 - Intel Memory Bandwidth Monitoring events and handling
 - the AMD Accumulated Power Mechanism reporting facility
 - more IOMMU events

... and a final round of perf tooling updates/fixes.

 Thanks,

	Ingo

------------------>
Andi Kleen (1):
      perf list: Fix documentation of :ppp

Arnaldo Carvalho de Melo (18):
      tools: Move utilities.mak from perf to tools/scripts/
      perf tools: Remove misplaced __maybe_unused
      perf tests: Forward the perf_sample in the dwarf unwind test
      perf tools: Add cpumode to struct perf_sample
      perf machine: Rename perf_event__preprocess_sample to machine__resolve
      perf thread: Rename perf_event__preprocess_sample_addr to thread__resolve
      perf script: Remove lots of unused arguments
      perf tools: Remove unused DIE_IF macro
      perf tools: Simplify die() mechanism
      perf tools: Remove needless 'extern' from function prototypes
      tools include: Copy linux/stringify.h from the kernel
      perf tools: Do not include stringify.h from the kernel sources
      perf tools: Remove unused perf_pathdup, xstrdup functions
      perf help: Use asprintf instead of adhoc equivalents
      perf probe: No need to use formatting strbuf method
      perf tools: Unexport some methods unused outside strbuf.c
      perf llvm: Use realpath to canonicalize paths
      perf llvm: Use strerror_r instead of the thread unsafe strerror one

Huang Rui (3):
      perf/x86/amd: Move nodes_per_socket into bsp_init_amd()
      x86/cpufeature, perf/x86: Add AMD Accumulated Power Mechanism feature flag
      perf/x86/amd/power: Add AMD accumulated power reporting mechanism

Jakub Jelen (1):
      perf bench numa: Fix assertion for nodes bitfield

Kan Liang (1):
      perf/x86/intel/uncore: Remove ev_sel_ext bit support for PCU

Peter Zijlstra (9):
      perf/core: Fix the unthrottle logic
      perf/x86/ibs: Fix IBS throttle
      perf/x86/ibs: Fix race with IBS_STARTING state
      perf/x86/ibs: Add IBS interrupt to the dynamic throttle
      perf/core: Fix dynamic interrupt throttle
      perf/x86/BTS: Fix RCU usage
      perf/core: Fix Undefined behaviour in rb_alloc()
      perf/x86/cqm: Factor out some common code
      perf/core: Document some hotplug bits

Srinivas Pandruvada (1):
      perf/x86/intel/rapl: Add missing Broadwell models

Steven Rostedt (1):
      tools lib traceevent: Remove redundant CPU output

Sukadev Bhattiprolu (1):
      perf test: Remove 'core_id' check in topo test

Suravee Suthikulpanit (1):
      perf/x86/amd: Add support for new IOMMU performance events

Tony Luck (1):
      perf/x86/mbm: Add memory bandwidth monitoring event management

Vikas Shivappa (5):
      perf/x86/cqm: Fix CQM handling of grouping events into a cache_group
      perf/x86/cqm: Fix CQM memory leak and notifier leak
      perf/x86/mbm: Add Intel Memory B/W Monitoring enumeration and init
      perf/x86/mbm: Implement RMID recycling
      perf/x86/mbm: Add support for MBM counter overflow handling

Wang Nan (1):
      perf symbols: Record text offset in dso to calculate objdump address


 arch/x86/Kconfig                             |   9 +
 arch/x86/events/Makefile                     |   1 +
 arch/x86/events/amd/ibs.c                    |  37 ++-
 arch/x86/events/amd/iommu.c                  |   5 +
 arch/x86/events/amd/power.c                  | 353 +++++++++++++++++++++
 arch/x86/events/core.c                       |   4 +-
 arch/x86/events/intel/cqm.c                  | 454 +++++++++++++++++++++++++--
 arch/x86/events/intel/ds.c                   |   5 +-
 arch/x86/events/intel/rapl.c                 |   2 +
 arch/x86/events/intel/uncore_snbep.c         |   7 +-
 arch/x86/include/asm/cpufeatures.h           |   4 +-
 arch/x86/kernel/cpu/amd.c                    |  18 +-
 arch/x86/kernel/cpu/common.c                 |   4 +-
 include/linux/perf_event.h                   |   5 +
 kernel/events/core.c                         | 114 ++++---
 kernel/events/ring_buffer.c                  |   6 +-
 tools/include/linux/stringify.h              |  12 +
 tools/lib/api/Makefile                       |   2 +-
 tools/lib/subcmd/Makefile                    |   2 +-
 tools/lib/traceevent/event-parse.c           |   4 +-
 tools/perf/Documentation/Makefile            |   2 +-
 tools/perf/Documentation/perf-list.txt       |   6 +-
 tools/perf/Makefile.perf                     |   2 +-
 tools/perf/arch/powerpc/util/header.c        |   4 +-
 tools/perf/bench/bench.h                     |  22 +-
 tools/perf/bench/mem-memcpy-arch.h           |   2 +-
 tools/perf/bench/mem-memset-arch.h           |   2 +-
 tools/perf/bench/numa.c                      |   2 +-
 tools/perf/builtin-annotate.c                |   2 +-
 tools/perf/builtin-diff.c                    |   2 +-
 tools/perf/builtin-help.c                    |  69 ++--
 tools/perf/builtin-inject.c                  |   8 +-
 tools/perf/builtin-mem.c                     |   2 +-
 tools/perf/builtin-report.c                  |   3 +-
 tools/perf/builtin-script.c                  |  46 ++-
 tools/perf/builtin-timechart.c               |   2 +-
 tools/perf/builtin-top.c                     |   8 +-
 tools/perf/builtin-trace.c                   |   9 +-
 tools/perf/builtin.h                         |  64 ++--
 tools/perf/config/Makefile                   |   2 +-
 tools/perf/tests/code-reading.c              |   5 +-
 tools/perf/tests/dwarf-unwind.c              |   4 +-
 tools/perf/tests/hists_common.c              |   6 +-
 tools/perf/tests/hists_cumulate.c            |   9 +-
 tools/perf/tests/hists_filter.c              |   9 +-
 tools/perf/tests/hists_link.c                |  20 +-
 tools/perf/tests/hists_output.c              |   9 +-
 tools/perf/ui/gtk/hists.c                    |   2 +-
 tools/perf/util/Build                        |   1 -
 tools/perf/util/abspath.c                    |  37 ---
 tools/perf/util/annotate.h                   |   2 +-
 tools/perf/util/auxtrace.h                   |   2 +-
 tools/perf/util/build-id.c                   |   3 +-
 tools/perf/util/cache.h                      |  24 +-
 tools/perf/util/callchain.h                  |   4 +-
 tools/perf/util/cgroup.h                     |   4 +-
 tools/perf/util/cloexec.h                    |   2 +-
 tools/perf/util/data-convert-bt.c            |   2 +-
 tools/perf/util/db-export.c                  |   2 +-
 tools/perf/util/dso.h                        |   3 +-
 tools/perf/util/dwarf-aux.c                  |  10 +-
 tools/perf/util/dwarf-aux.h                  |  72 +++--
 tools/perf/util/event.c                      |  23 +-
 tools/perf/util/event.h                      |  13 +-
 tools/perf/util/evsel.c                      |   1 +
 tools/perf/util/genelf.h                     |   8 +-
 tools/perf/util/header.c                     |   5 -
 tools/perf/util/header.h                     |   2 +-
 tools/perf/util/hist.c                       |   2 +-
 tools/perf/util/hist.h                       |   3 +-
 tools/perf/util/intel-bts.c                  |   2 +-
 tools/perf/util/jit.h                        |  12 +-
 tools/perf/util/llvm-utils.c                 |  24 +-
 tools/perf/util/llvm-utils.h                 |   7 +-
 tools/perf/util/machine.c                    |  14 +-
 tools/perf/util/machine.h                    |   2 +-
 tools/perf/util/parse-events.h               |  21 +-
 tools/perf/util/path.c                       |  30 --
 tools/perf/util/probe-event.c                |   2 +-
 tools/perf/util/probe-event.h                |  57 ++--
 tools/perf/util/probe-finder.c               |   8 +-
 tools/perf/util/probe-finder.h               |  24 +-
 tools/perf/util/quote.h                      |   2 +-
 tools/perf/util/session.c                    |   5 +-
 tools/perf/util/sort.c                       |   2 +-
 tools/perf/util/stat-shadow.c                |  18 +-
 tools/perf/util/strbuf.c                     |   9 +-
 tools/perf/util/strbuf.h                     |  21 +-
 tools/perf/util/svghelper.h                  |  51 ++-
 tools/perf/util/symbol-elf.c                 |  12 +-
 tools/perf/util/symbol.h                     |   4 +-
 tools/perf/util/usage.c                      |   8 +-
 tools/perf/util/util.h                       |  27 +-
 tools/perf/util/wrapper.c                    |  12 -
 tools/{perf/config => scripts}/utilities.mak |   0
 95 files changed, 1336 insertions(+), 634 deletions(-)
 create mode 100644 arch/x86/events/amd/power.c
 create mode 100644 tools/include/linux/stringify.h
 delete mode 100644 tools/perf/util/abspath.c
 rename tools/{perf/config => scripts}/utilities.mak (100%)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8f2e6659281b..a313c0e7e165 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1206,6 +1206,15 @@ config MICROCODE_OLD_INTERFACE
 	def_bool y
 	depends on MICROCODE
 
+config PERF_EVENTS_AMD_POWER
+	depends on PERF_EVENTS && CPU_SUP_AMD
+	tristate "AMD Processor Power Reporting Mechanism"
+	---help---
+	  Provide power reporting mechanism support for AMD processors.
+	  Currently, it leverages X86_FEATURE_ACC_POWER
+	  (CPUID Fn8000_0007_EDX[12]) interface to calculate the
+	  average power consumption on Family 15h processors.
+
 config X86_MSR
 	tristate "/dev/cpu/*/msr - Model-specific register support"
 	---help---
diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile
index fdfea1511cc0..f59618a39990 100644
--- a/arch/x86/events/Makefile
+++ b/arch/x86/events/Makefile
@@ -1,6 +1,7 @@
 obj-y			+= core.o
 
 obj-$(CONFIG_CPU_SUP_AMD)               += amd/core.o amd/uncore.o
+obj-$(CONFIG_PERF_EVENTS_AMD_POWER)	+= amd/power.o
 obj-$(CONFIG_X86_LOCAL_APIC)            += amd/ibs.o msr.o
 ifdef CONFIG_AMD_IOMMU
 obj-$(CONFIG_CPU_SUP_AMD)               += amd/iommu.o
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 51087c29b2c2..3ea25c3917c0 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -376,7 +376,13 @@ static void perf_ibs_start(struct perf_event *event, int flags)
 	hwc->state = 0;
 
 	perf_ibs_set_period(perf_ibs, hwc, &period);
+	/*
+	 * Set STARTED before enabling the hardware, such that
+	 * a subsequent NMI must observe it. Then clear STOPPING
+	 * such that we don't consume NMIs by accident.
+	 */
 	set_bit(IBS_STARTED, pcpu->state);
+	clear_bit(IBS_STOPPING, pcpu->state);
 	perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
 
 	perf_event_update_userpage(event);
@@ -390,7 +396,7 @@ static void perf_ibs_stop(struct perf_event *event, int flags)
 	u64 config;
 	int stopping;
 
-	stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
+	stopping = test_bit(IBS_STARTED, pcpu->state);
 
 	if (!stopping && (hwc->state & PERF_HES_UPTODATE))
 		return;
@@ -398,8 +404,24 @@ static void perf_ibs_stop(struct perf_event *event, int flags)
 	rdmsrl(hwc->config_base, config);
 
 	if (stopping) {
+		/*
+		 * Set STOPPING before disabling the hardware, such that it
+		 * must be visible to NMIs the moment we clear the EN bit,
+		 * at which point we can generate an !VALID sample which
+		 * we need to consume.
+		 */
 		set_bit(IBS_STOPPING, pcpu->state);
 		perf_ibs_disable_event(perf_ibs, hwc, config);
+		/*
+		 * Clear STARTED after disabling the hardware; if it were
+		 * cleared before an NMI hitting after the clear but before
+		 * clearing the EN bit might think it a spurious NMI and not
+		 * handle it.
+		 *
+		 * Clearing it after, however, creates the problem of the NMI
+		 * handler seeing STARTED but not having a valid sample.
+		 */
+		clear_bit(IBS_STARTED, pcpu->state);
 		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
 		hwc->state |= PERF_HES_STOPPED;
 	}
@@ -527,20 +549,24 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
 	u64 *buf, *config, period;
 
 	if (!test_bit(IBS_STARTED, pcpu->state)) {
+fail:
 		/*
 		 * Catch spurious interrupts after stopping IBS: After
 		 * disabling IBS there could be still incoming NMIs
 		 * with samples that even have the valid bit cleared.
 		 * Mark all this NMIs as handled.
 		 */
-		return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
+		if (test_and_clear_bit(IBS_STOPPING, pcpu->state))
+			return 1;
+
+		return 0;
 	}
 
 	msr = hwc->config_base;
 	buf = ibs_data.regs;
 	rdmsrl(msr, *buf);
 	if (!(*buf++ & perf_ibs->valid_mask))
-		return 0;
+		goto fail;
 
 	config = &ibs_data.regs[0];
 	perf_ibs_event_update(perf_ibs, event, config);
@@ -599,7 +625,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
 	throttle = perf_event_overflow(event, &data, &regs);
 out:
 	if (throttle)
-		perf_ibs_disable_event(perf_ibs, hwc, *config);
+		perf_ibs_stop(event, 0);
 	else
 		perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
 
@@ -611,6 +637,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
 static int
 perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 {
+	u64 stamp = sched_clock();
 	int handled = 0;
 
 	handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
@@ -619,6 +646,8 @@ perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 	if (handled)
 		inc_irq_stat(apic_perf_irqs);
 
+	perf_sample_event_took(sched_clock() - stamp);
+
 	return handled;
 }
 NOKPROBE_SYMBOL(perf_ibs_nmi_handler);
diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
index 635e5eba0caf..40625ca7a190 100644
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -118,6 +118,11 @@ static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = {
 	AMD_IOMMU_EVENT_DESC(cmd_processed,           "csource=0x11"),
 	AMD_IOMMU_EVENT_DESC(cmd_processed_inv,       "csource=0x12"),
 	AMD_IOMMU_EVENT_DESC(tlb_inv,                 "csource=0x13"),
+	AMD_IOMMU_EVENT_DESC(ign_rd_wr_mmio_1ff8h,    "csource=0x14"),
+	AMD_IOMMU_EVENT_DESC(vapic_int_non_guest,     "csource=0x15"),
+	AMD_IOMMU_EVENT_DESC(vapic_int_guest,         "csource=0x16"),
+	AMD_IOMMU_EVENT_DESC(smi_recv,                "csource=0x17"),
+	AMD_IOMMU_EVENT_DESC(smi_blk,                 "csource=0x18"),
 	{ /* end: all zeroes */ },
 };
 
diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c
new file mode 100644
index 000000000000..55a3529dbf12
--- /dev/null
+++ b/arch/x86/events/amd/power.c
@@ -0,0 +1,353 @@
+/*
+ * Performance events - AMD Processor Power Reporting Mechanism
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Huang Rui <ray.huang@....com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+#include <asm/cpu_device_id.h>
+#include "../perf_event.h"
+
+#define MSR_F15H_CU_PWR_ACCUMULATOR     0xc001007a
+#define MSR_F15H_CU_MAX_PWR_ACCUMULATOR 0xc001007b
+#define MSR_F15H_PTSC			0xc0010280
+
+/* Event code: LSB 8 bits, passed in attr->config any other bit is reserved. */
+#define AMD_POWER_EVENT_MASK		0xFFULL
+
+/*
+ * Accumulated power status counters.
+ */
+#define AMD_POWER_EVENTSEL_PKG		1
+
+/*
+ * The ratio of compute unit power accumulator sample period to the
+ * PTSC period.
+ */
+static unsigned int cpu_pwr_sample_ratio;
+
+/* Maximum accumulated power of a compute unit. */
+static u64 max_cu_acc_power;
+
+static struct pmu pmu_class;
+
+/*
+ * Accumulated power represents the sum of each compute unit's (CU) power
+ * consumption. On any core of each CU we read the total accumulated power from
+ * MSR_F15H_CU_PWR_ACCUMULATOR. cpu_mask represents CPU bit map of all cores
+ * which are picked to measure the power for the CUs they belong to.
+ */
+static cpumask_t cpu_mask;
+
+static void event_update(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 prev_pwr_acc, new_pwr_acc, prev_ptsc, new_ptsc;
+	u64 delta, tdelta;
+
+	prev_pwr_acc = hwc->pwr_acc;
+	prev_ptsc = hwc->ptsc;
+	rdmsrl(MSR_F15H_CU_PWR_ACCUMULATOR, new_pwr_acc);
+	rdmsrl(MSR_F15H_PTSC, new_ptsc);
+
+	/*
+	 * Calculate the CU power consumption over a time period, the unit of
+	 * final value (delta) is micro-Watts. Then add it to the event count.
+	 */
+	if (new_pwr_acc < prev_pwr_acc) {
+		delta = max_cu_acc_power + new_pwr_acc;
+		delta -= prev_pwr_acc;
+	} else
+		delta = new_pwr_acc - prev_pwr_acc;
+
+	delta *= cpu_pwr_sample_ratio * 1000;
+	tdelta = new_ptsc - prev_ptsc;
+
+	do_div(delta, tdelta);
+	local64_add(delta, &event->count);
+}
+
+static void __pmu_event_start(struct perf_event *event)
+{
+	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+		return;
+
+	event->hw.state = 0;
+
+	rdmsrl(MSR_F15H_PTSC, event->hw.ptsc);
+	rdmsrl(MSR_F15H_CU_PWR_ACCUMULATOR, event->hw.pwr_acc);
+}
+
+static void pmu_event_start(struct perf_event *event, int mode)
+{
+	__pmu_event_start(event);
+}
+
+static void pmu_event_stop(struct perf_event *event, int mode)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	/* Mark event as deactivated and stopped. */
+	if (!(hwc->state & PERF_HES_STOPPED))
+		hwc->state |= PERF_HES_STOPPED;
+
+	/* Check if software counter update is necessary. */
+	if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+		/*
+		 * Drain the remaining delta count out of an event
+		 * that we are disabling:
+		 */
+		event_update(event);
+		hwc->state |= PERF_HES_UPTODATE;
+	}
+}
+
+static int pmu_event_add(struct perf_event *event, int mode)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	if (mode & PERF_EF_START)
+		__pmu_event_start(event);
+
+	return 0;
+}
+
+static void pmu_event_del(struct perf_event *event, int flags)
+{
+	pmu_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int pmu_event_init(struct perf_event *event)
+{
+	u64 cfg = event->attr.config & AMD_POWER_EVENT_MASK;
+
+	/* Only look at AMD power events. */
+	if (event->attr.type != pmu_class.type)
+		return -ENOENT;
+
+	/* Unsupported modes and filters. */
+	if (event->attr.exclude_user   ||
+	    event->attr.exclude_kernel ||
+	    event->attr.exclude_hv     ||
+	    event->attr.exclude_idle   ||
+	    event->attr.exclude_host   ||
+	    event->attr.exclude_guest  ||
+	    /* no sampling */
+	    event->attr.sample_period)
+		return -EINVAL;
+
+	if (cfg != AMD_POWER_EVENTSEL_PKG)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void pmu_event_read(struct perf_event *event)
+{
+	event_update(event);
+}
+
+static ssize_t
+get_attr_cpumask(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return cpumap_print_to_pagebuf(true, buf, &cpu_mask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, get_attr_cpumask, NULL);
+
+static struct attribute *pmu_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static struct attribute_group pmu_attr_group = {
+	.attrs = pmu_attrs,
+};
+
+/*
+ * Currently it only supports to report the power of each
+ * processor/package.
+ */
+EVENT_ATTR_STR(power-pkg, power_pkg, "event=0x01");
+
+EVENT_ATTR_STR(power-pkg.unit, power_pkg_unit, "mWatts");
+
+/* Convert the count from micro-Watts to milli-Watts. */
+EVENT_ATTR_STR(power-pkg.scale, power_pkg_scale, "1.000000e-3");
+
+static struct attribute *events_attr[] = {
+	EVENT_PTR(power_pkg),
+	EVENT_PTR(power_pkg_unit),
+	EVENT_PTR(power_pkg_scale),
+	NULL,
+};
+
+static struct attribute_group pmu_events_group = {
+	.name	= "events",
+	.attrs	= events_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+
+static struct attribute *formats_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group pmu_format_group = {
+	.name	= "format",
+	.attrs	= formats_attr,
+};
+
+static const struct attribute_group *attr_groups[] = {
+	&pmu_attr_group,
+	&pmu_format_group,
+	&pmu_events_group,
+	NULL,
+};
+
+static struct pmu pmu_class = {
+	.attr_groups	= attr_groups,
+	/* system-wide only */
+	.task_ctx_nr	= perf_invalid_context,
+	.event_init	= pmu_event_init,
+	.add		= pmu_event_add,
+	.del		= pmu_event_del,
+	.start		= pmu_event_start,
+	.stop		= pmu_event_stop,
+	.read		= pmu_event_read,
+};
+
+static void power_cpu_exit(int cpu)
+{
+	int target;
+
+	if (!cpumask_test_and_clear_cpu(cpu, &cpu_mask))
+		return;
+
+	/*
+	 * Find a new CPU on the same compute unit, if was set in cpumask
+	 * and still some CPUs on compute unit. Then migrate event and
+	 * context to new CPU.
+	 */
+	target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
+	if (target < nr_cpumask_bits) {
+		cpumask_set_cpu(target, &cpu_mask);
+		perf_pmu_migrate_context(&pmu_class, cpu, target);
+	}
+}
+
+static void power_cpu_init(int cpu)
+{
+	int target;
+
+	/*
+	 * 1) If any CPU is set at cpu_mask in the same compute unit, do
+	 * nothing.
+	 * 2) If no CPU is set at cpu_mask in the same compute unit,
+	 * set current STARTING CPU.
+	 *
+	 * Note: if there is a CPU aside of the new one already in the
+	 * sibling mask, then it is also in cpu_mask.
+	 */
+	target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
+	if (target >= nr_cpumask_bits)
+		cpumask_set_cpu(cpu, &cpu_mask);
+}
+
+static int
+power_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (long)hcpu;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_DOWN_FAILED:
+	case CPU_STARTING:
+		power_cpu_init(cpu);
+		break;
+	case CPU_DOWN_PREPARE:
+		power_cpu_exit(cpu);
+		break;
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block power_cpu_notifier_nb = {
+	.notifier_call = power_cpu_notifier,
+	.priority = CPU_PRI_PERF,
+};
+
+static const struct x86_cpu_id cpu_match[] = {
+	{ .vendor = X86_VENDOR_AMD, .family = 0x15 },
+	{},
+};
+
+static int __init amd_power_pmu_init(void)
+{
+	int cpu, target, ret;
+
+	if (!x86_match_cpu(cpu_match))
+		return 0;
+
+	if (!boot_cpu_has(X86_FEATURE_ACC_POWER))
+		return -ENODEV;
+
+	cpu_pwr_sample_ratio = cpuid_ecx(0x80000007);
+
+	if (rdmsrl_safe(MSR_F15H_CU_MAX_PWR_ACCUMULATOR, &max_cu_acc_power)) {
+		pr_err("Failed to read max compute unit power accumulator MSR\n");
+		return -ENODEV;
+	}
+
+	cpu_notifier_register_begin();
+
+	/* Choose one online core of each compute unit. */
+	for_each_online_cpu(cpu) {
+		target = cpumask_first(topology_sibling_cpumask(cpu));
+		if (!cpumask_test_cpu(target, &cpu_mask))
+			cpumask_set_cpu(target, &cpu_mask);
+	}
+
+	ret = perf_pmu_register(&pmu_class, "power", -1);
+	if (WARN_ON(ret)) {
+		pr_warn("AMD Power PMU registration failed\n");
+		goto out;
+	}
+
+	__register_cpu_notifier(&power_cpu_notifier_nb);
+
+	pr_info("AMD Power PMU detected\n");
+
+out:
+	cpu_notifier_register_done();
+
+	return ret;
+}
+module_init(amd_power_pmu_init);
+
+static void __exit amd_power_pmu_exit(void)
+{
+	cpu_notifier_register_begin();
+	__unregister_cpu_notifier(&power_cpu_notifier_nb);
+	cpu_notifier_register_done();
+
+	perf_pmu_unregister(&pmu_class);
+}
+module_exit(amd_power_pmu_exit);
+
+MODULE_AUTHOR("Huang Rui <ray.huang@....com>");
+MODULE_DESCRIPTION("AMD Processor Power Reporting Mechanism");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 5e830d0c95c9..002b2eadd600 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1602,8 +1602,7 @@ __init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
 	return new;
 }
 
-ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
-			  char *page)
+ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page)
 {
 	struct perf_pmu_events_attr *pmu_attr = \
 		container_of(attr, struct perf_pmu_events_attr, attr);
@@ -1615,6 +1614,7 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
 
 	return x86_pmu.events_sysfs_show(page, config);
 }
+EXPORT_SYMBOL_GPL(events_sysfs_show);
 
 EVENT_ATTR(cpu-cycles,			CPU_CYCLES		);
 EVENT_ATTR(instructions,		INSTRUCTIONS		);
diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 93cb412a5579..7b5fd811ef45 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -13,8 +13,16 @@
 #define MSR_IA32_QM_CTR		0x0c8e
 #define MSR_IA32_QM_EVTSEL	0x0c8d
 
+#define MBM_CNTR_WIDTH		24
+/*
+ * Guaranteed time in ms as per SDM where MBM counters will not overflow.
+ */
+#define MBM_CTR_OVERFLOW_TIME	1000
+
 static u32 cqm_max_rmid = -1;
 static unsigned int cqm_l3_scale; /* supposedly cacheline size */
+static bool cqm_enabled, mbm_enabled;
+unsigned int mbm_socket_max;
 
 /**
  * struct intel_pqr_state - State cache for the PQR MSR
@@ -42,8 +50,37 @@ struct intel_pqr_state {
  * interrupts disabled, which is sufficient for the protection.
  */
 static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
+static struct hrtimer *mbm_timers;
+/**
+ * struct sample - mbm event's (local or total) data
+ * @total_bytes    #bytes since we began monitoring
+ * @prev_msr       previous value of MSR
+ */
+struct sample {
+	u64	total_bytes;
+	u64	prev_msr;
+};
 
 /*
+ * samples profiled for total memory bandwidth type events
+ */
+static struct sample *mbm_total;
+/*
+ * samples profiled for local memory bandwidth type events
+ */
+static struct sample *mbm_local;
+
+#define pkg_id	topology_physical_package_id(smp_processor_id())
+/*
+ * rmid_2_index returns the index for the rmid in mbm_local/mbm_total array.
+ * mbm_total[] and mbm_local[] are linearly indexed by socket# * max number of
+ * rmids per socket, an example is given below
+ * RMID1 of Socket0:  vrmid =  1
+ * RMID1 of Socket1:  vrmid =  1 * (cqm_max_rmid + 1) + 1
+ * RMID1 of Socket2:  vrmid =  2 * (cqm_max_rmid + 1) + 1
+ */
+#define rmid_2_index(rmid)  ((pkg_id * (cqm_max_rmid + 1)) + rmid)
+/*
  * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
  * Also protects event->hw.cqm_rmid
  *
@@ -65,9 +102,13 @@ static cpumask_t cqm_cpumask;
 #define RMID_VAL_ERROR		(1ULL << 63)
 #define RMID_VAL_UNAVAIL	(1ULL << 62)
 
-#define QOS_L3_OCCUP_EVENT_ID	(1 << 0)
-
-#define QOS_EVENT_MASK	QOS_L3_OCCUP_EVENT_ID
+/*
+ * Event IDs are used to program IA32_QM_EVTSEL before reading event
+ * counter from IA32_QM_CTR
+ */
+#define QOS_L3_OCCUP_EVENT_ID	0x01
+#define QOS_MBM_TOTAL_EVENT_ID	0x02
+#define QOS_MBM_LOCAL_EVENT_ID	0x03
 
 /*
  * This is central to the rotation algorithm in __intel_cqm_rmid_rotate().
@@ -211,6 +252,21 @@ static void __put_rmid(u32 rmid)
 	list_add_tail(&entry->list, &cqm_rmid_limbo_lru);
 }
 
+static void cqm_cleanup(void)
+{
+	int i;
+
+	if (!cqm_rmid_ptrs)
+		return;
+
+	for (i = 0; i < cqm_max_rmid; i++)
+		kfree(cqm_rmid_ptrs[i]);
+
+	kfree(cqm_rmid_ptrs);
+	cqm_rmid_ptrs = NULL;
+	cqm_enabled = false;
+}
+
 static int intel_cqm_setup_rmid_cache(void)
 {
 	struct cqm_rmid_entry *entry;
@@ -218,7 +274,7 @@ static int intel_cqm_setup_rmid_cache(void)
 	int r = 0;
 
 	nr_rmids = cqm_max_rmid + 1;
-	cqm_rmid_ptrs = kmalloc(sizeof(struct cqm_rmid_entry *) *
+	cqm_rmid_ptrs = kzalloc(sizeof(struct cqm_rmid_entry *) *
 				nr_rmids, GFP_KERNEL);
 	if (!cqm_rmid_ptrs)
 		return -ENOMEM;
@@ -249,11 +305,9 @@ static int intel_cqm_setup_rmid_cache(void)
 	mutex_unlock(&cache_mutex);
 
 	return 0;
-fail:
-	while (r--)
-		kfree(cqm_rmid_ptrs[r]);
 
-	kfree(cqm_rmid_ptrs);
+fail:
+	cqm_cleanup();
 	return -ENOMEM;
 }
 
@@ -281,9 +335,13 @@ static bool __match_event(struct perf_event *a, struct perf_event *b)
 
 	/*
 	 * Events that target same task are placed into the same cache group.
+	 * Mark it as a multi event group, so that we update ->count
+	 * for every event rather than just the group leader later.
 	 */
-	if (a->hw.target == b->hw.target)
+	if (a->hw.target == b->hw.target) {
+		b->hw.is_group_event = true;
 		return true;
+	}
 
 	/*
 	 * Are we an inherited event?
@@ -392,10 +450,26 @@ static bool __conflict_event(struct perf_event *a, struct perf_event *b)
 
 struct rmid_read {
 	u32 rmid;
+	u32 evt_type;
 	atomic64_t value;
 };
 
 static void __intel_cqm_event_count(void *info);
+static void init_mbm_sample(u32 rmid, u32 evt_type);
+static void __intel_mbm_event_count(void *info);
+
+static bool is_mbm_event(int e)
+{
+	return (e >= QOS_MBM_TOTAL_EVENT_ID && e <= QOS_MBM_LOCAL_EVENT_ID);
+}
+
+static void cqm_mask_call(struct rmid_read *rr)
+{
+	if (is_mbm_event(rr->evt_type))
+		on_each_cpu_mask(&cqm_cpumask, __intel_mbm_event_count, rr, 1);
+	else
+		on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, rr, 1);
+}
 
 /*
  * Exchange the RMID of a group of events.
@@ -413,12 +487,12 @@ static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid)
 	 */
 	if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) {
 		struct rmid_read rr = {
-			.value = ATOMIC64_INIT(0),
 			.rmid = old_rmid,
+			.evt_type = group->attr.config,
+			.value = ATOMIC64_INIT(0),
 		};
 
-		on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count,
-				 &rr, 1);
+		cqm_mask_call(&rr);
 		local64_set(&group->count, atomic64_read(&rr.value));
 	}
 
@@ -430,6 +504,22 @@ static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid)
 
 	raw_spin_unlock_irq(&cache_lock);
 
+	/*
+	 * If the allocation is for mbm, init the mbm stats.
+	 * Need to check if each event in the group is mbm event
+	 * because there could be multiple type of events in the same group.
+	 */
+	if (__rmid_valid(rmid)) {
+		event = group;
+		if (is_mbm_event(event->attr.config))
+			init_mbm_sample(rmid, event->attr.config);
+
+		list_for_each_entry(event, head, hw.cqm_group_entry) {
+			if (is_mbm_event(event->attr.config))
+				init_mbm_sample(rmid, event->attr.config);
+		}
+	}
+
 	return old_rmid;
 }
 
@@ -837,6 +927,72 @@ static void intel_cqm_rmid_rotate(struct work_struct *work)
 	schedule_delayed_work(&intel_cqm_rmid_work, delay);
 }
 
+static u64 update_sample(unsigned int rmid, u32 evt_type, int first)
+{
+	struct sample *mbm_current;
+	u32 vrmid = rmid_2_index(rmid);
+	u64 val, bytes, shift;
+	u32 eventid;
+
+	if (evt_type == QOS_MBM_LOCAL_EVENT_ID) {
+		mbm_current = &mbm_local[vrmid];
+		eventid     = QOS_MBM_LOCAL_EVENT_ID;
+	} else {
+		mbm_current = &mbm_total[vrmid];
+		eventid     = QOS_MBM_TOTAL_EVENT_ID;
+	}
+
+	wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
+	rdmsrl(MSR_IA32_QM_CTR, val);
+	if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
+		return mbm_current->total_bytes;
+
+	if (first) {
+		mbm_current->prev_msr = val;
+		mbm_current->total_bytes = 0;
+		return mbm_current->total_bytes;
+	}
+
+	/*
+	 * The h/w guarantees that counters will not overflow
+	 * so long as we poll them at least once per second.
+	 */
+	shift = 64 - MBM_CNTR_WIDTH;
+	bytes = (val << shift) - (mbm_current->prev_msr << shift);
+	bytes >>= shift;
+
+	bytes *= cqm_l3_scale;
+
+	mbm_current->total_bytes += bytes;
+	mbm_current->prev_msr = val;
+
+	return mbm_current->total_bytes;
+}
+
+static u64 rmid_read_mbm(unsigned int rmid, u32 evt_type)
+{
+	return update_sample(rmid, evt_type, 0);
+}
+
+static void __intel_mbm_event_init(void *info)
+{
+	struct rmid_read *rr = info;
+
+	update_sample(rr->rmid, rr->evt_type, 1);
+}
+
+static void init_mbm_sample(u32 rmid, u32 evt_type)
+{
+	struct rmid_read rr = {
+		.rmid = rmid,
+		.evt_type = evt_type,
+		.value = ATOMIC64_INIT(0),
+	};
+
+	/* on each socket, init sample */
+	on_each_cpu_mask(&cqm_cpumask, __intel_mbm_event_init, &rr, 1);
+}
+
 /*
  * Find a group and setup RMID.
  *
@@ -849,6 +1005,7 @@ static void intel_cqm_setup_event(struct perf_event *event,
 	bool conflict = false;
 	u32 rmid;
 
+	event->hw.is_group_event = false;
 	list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
 		rmid = iter->hw.cqm_rmid;
 
@@ -856,6 +1013,8 @@ static void intel_cqm_setup_event(struct perf_event *event,
 			/* All tasks in a group share an RMID */
 			event->hw.cqm_rmid = rmid;
 			*group = iter;
+			if (is_mbm_event(event->attr.config) && __rmid_valid(rmid))
+				init_mbm_sample(rmid, event->attr.config);
 			return;
 		}
 
@@ -872,6 +1031,9 @@ static void intel_cqm_setup_event(struct perf_event *event,
 	else
 		rmid = __get_rmid();
 
+	if (is_mbm_event(event->attr.config) && __rmid_valid(rmid))
+		init_mbm_sample(rmid, event->attr.config);
+
 	event->hw.cqm_rmid = rmid;
 }
 
@@ -893,7 +1055,10 @@ static void intel_cqm_event_read(struct perf_event *event)
 	if (!__rmid_valid(rmid))
 		goto out;
 
-	val = __rmid_read(rmid);
+	if (is_mbm_event(event->attr.config))
+		val = rmid_read_mbm(rmid, event->attr.config);
+	else
+		val = __rmid_read(rmid);
 
 	/*
 	 * Ignore this reading on error states and do not update the value.
@@ -924,10 +1089,100 @@ static inline bool cqm_group_leader(struct perf_event *event)
 	return !list_empty(&event->hw.cqm_groups_entry);
 }
 
+static void __intel_mbm_event_count(void *info)
+{
+	struct rmid_read *rr = info;
+	u64 val;
+
+	val = rmid_read_mbm(rr->rmid, rr->evt_type);
+	if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
+		return;
+	atomic64_add(val, &rr->value);
+}
+
+static enum hrtimer_restart mbm_hrtimer_handle(struct hrtimer *hrtimer)
+{
+	struct perf_event *iter, *iter1;
+	int ret = HRTIMER_RESTART;
+	struct list_head *head;
+	unsigned long flags;
+	u32 grp_rmid;
+
+	/*
+	 * Need to cache_lock as the timer Event Select MSR reads
+	 * can race with the mbm/cqm count() and mbm_init() reads.
+	 */
+	raw_spin_lock_irqsave(&cache_lock, flags);
+
+	if (list_empty(&cache_groups)) {
+		ret = HRTIMER_NORESTART;
+		goto out;
+	}
+
+	list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
+		grp_rmid = iter->hw.cqm_rmid;
+		if (!__rmid_valid(grp_rmid))
+			continue;
+		if (is_mbm_event(iter->attr.config))
+			update_sample(grp_rmid, iter->attr.config, 0);
+
+		head = &iter->hw.cqm_group_entry;
+		if (list_empty(head))
+			continue;
+		list_for_each_entry(iter1, head, hw.cqm_group_entry) {
+			if (!iter1->hw.is_group_event)
+				break;
+			if (is_mbm_event(iter1->attr.config))
+				update_sample(iter1->hw.cqm_rmid,
+					      iter1->attr.config, 0);
+		}
+	}
+
+	hrtimer_forward_now(hrtimer, ms_to_ktime(MBM_CTR_OVERFLOW_TIME));
+out:
+	raw_spin_unlock_irqrestore(&cache_lock, flags);
+
+	return ret;
+}
+
+static void __mbm_start_timer(void *info)
+{
+	hrtimer_start(&mbm_timers[pkg_id], ms_to_ktime(MBM_CTR_OVERFLOW_TIME),
+			     HRTIMER_MODE_REL_PINNED);
+}
+
+static void __mbm_stop_timer(void *info)
+{
+	hrtimer_cancel(&mbm_timers[pkg_id]);
+}
+
+static void mbm_start_timers(void)
+{
+	on_each_cpu_mask(&cqm_cpumask, __mbm_start_timer, NULL, 1);
+}
+
+static void mbm_stop_timers(void)
+{
+	on_each_cpu_mask(&cqm_cpumask, __mbm_stop_timer, NULL, 1);
+}
+
+static void mbm_hrtimer_init(void)
+{
+	struct hrtimer *hr;
+	int i;
+
+	for (i = 0; i < mbm_socket_max; i++) {
+		hr = &mbm_timers[i];
+		hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		hr->function = mbm_hrtimer_handle;
+	}
+}
+
 static u64 intel_cqm_event_count(struct perf_event *event)
 {
 	unsigned long flags;
 	struct rmid_read rr = {
+		.evt_type = event->attr.config,
 		.value = ATOMIC64_INIT(0),
 	};
 
@@ -940,7 +1195,9 @@ static u64 intel_cqm_event_count(struct perf_event *event)
 		return __perf_event_count(event);
 
 	/*
-	 * Only the group leader gets to report values. This stops us
+	 * Only the group leader gets to report values except in case of
+	 * multiple events in the same group, we still need to read the
+	 * other events.This stops us
 	 * reporting duplicate values to userspace, and gives us a clear
 	 * rule for which task gets to report the values.
 	 *
@@ -948,7 +1205,7 @@ static u64 intel_cqm_event_count(struct perf_event *event)
 	 * specific packages - we forfeit that ability when we create
 	 * task events.
 	 */
-	if (!cqm_group_leader(event))
+	if (!cqm_group_leader(event) && !event->hw.is_group_event)
 		return 0;
 
 	/*
@@ -975,7 +1232,7 @@ static u64 intel_cqm_event_count(struct perf_event *event)
 	if (!__rmid_valid(rr.rmid))
 		goto out;
 
-	on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1);
+	cqm_mask_call(&rr);
 
 	raw_spin_lock_irqsave(&cache_lock, flags);
 	if (event->hw.cqm_rmid == rr.rmid)
@@ -1046,8 +1303,14 @@ static int intel_cqm_event_add(struct perf_event *event, int mode)
 static void intel_cqm_event_destroy(struct perf_event *event)
 {
 	struct perf_event *group_other = NULL;
+	unsigned long flags;
 
 	mutex_lock(&cache_mutex);
+	/*
+	* Hold the cache_lock as mbm timer handlers could be
+	* scanning the list of events.
+	*/
+	raw_spin_lock_irqsave(&cache_lock, flags);
 
 	/*
 	 * If there's another event in this group...
@@ -1079,6 +1342,14 @@ static void intel_cqm_event_destroy(struct perf_event *event)
 		}
 	}
 
+	raw_spin_unlock_irqrestore(&cache_lock, flags);
+
+	/*
+	 * Stop the mbm overflow timers when the last event is destroyed.
+	*/
+	if (mbm_enabled && list_empty(&cache_groups))
+		mbm_stop_timers();
+
 	mutex_unlock(&cache_mutex);
 }
 
@@ -1086,11 +1357,13 @@ static int intel_cqm_event_init(struct perf_event *event)
 {
 	struct perf_event *group = NULL;
 	bool rotate = false;
+	unsigned long flags;
 
 	if (event->attr.type != intel_cqm_pmu.type)
 		return -ENOENT;
 
-	if (event->attr.config & ~QOS_EVENT_MASK)
+	if ((event->attr.config < QOS_L3_OCCUP_EVENT_ID) ||
+	     (event->attr.config > QOS_MBM_LOCAL_EVENT_ID))
 		return -EINVAL;
 
 	/* unsupported modes and filters */
@@ -1110,9 +1383,21 @@ static int intel_cqm_event_init(struct perf_event *event)
 
 	mutex_lock(&cache_mutex);
 
+	/*
+	 * Start the mbm overflow timers when the first event is created.
+	*/
+	if (mbm_enabled && list_empty(&cache_groups))
+		mbm_start_timers();
+
 	/* Will also set rmid */
 	intel_cqm_setup_event(event, &group);
 
+	/*
+	* Hold the cache_lock as mbm timer handlers be
+	* scanning the list of events.
+	*/
+	raw_spin_lock_irqsave(&cache_lock, flags);
+
 	if (group) {
 		list_add_tail(&event->hw.cqm_group_entry,
 			      &group->hw.cqm_group_entry);
@@ -1131,6 +1416,7 @@ static int intel_cqm_event_init(struct perf_event *event)
 			rotate = true;
 	}
 
+	raw_spin_unlock_irqrestore(&cache_lock, flags);
 	mutex_unlock(&cache_mutex);
 
 	if (rotate)
@@ -1145,6 +1431,16 @@ EVENT_ATTR_STR(llc_occupancy.unit, intel_cqm_llc_unit, "Bytes");
 EVENT_ATTR_STR(llc_occupancy.scale, intel_cqm_llc_scale, NULL);
 EVENT_ATTR_STR(llc_occupancy.snapshot, intel_cqm_llc_snapshot, "1");
 
+EVENT_ATTR_STR(total_bytes, intel_cqm_total_bytes, "event=0x02");
+EVENT_ATTR_STR(total_bytes.per-pkg, intel_cqm_total_bytes_pkg, "1");
+EVENT_ATTR_STR(total_bytes.unit, intel_cqm_total_bytes_unit, "MB");
+EVENT_ATTR_STR(total_bytes.scale, intel_cqm_total_bytes_scale, "1e-6");
+
+EVENT_ATTR_STR(local_bytes, intel_cqm_local_bytes, "event=0x03");
+EVENT_ATTR_STR(local_bytes.per-pkg, intel_cqm_local_bytes_pkg, "1");
+EVENT_ATTR_STR(local_bytes.unit, intel_cqm_local_bytes_unit, "MB");
+EVENT_ATTR_STR(local_bytes.scale, intel_cqm_local_bytes_scale, "1e-6");
+
 static struct attribute *intel_cqm_events_attr[] = {
 	EVENT_PTR(intel_cqm_llc),
 	EVENT_PTR(intel_cqm_llc_pkg),
@@ -1154,9 +1450,38 @@ static struct attribute *intel_cqm_events_attr[] = {
 	NULL,
 };
 
+static struct attribute *intel_mbm_events_attr[] = {
+	EVENT_PTR(intel_cqm_total_bytes),
+	EVENT_PTR(intel_cqm_local_bytes),
+	EVENT_PTR(intel_cqm_total_bytes_pkg),
+	EVENT_PTR(intel_cqm_local_bytes_pkg),
+	EVENT_PTR(intel_cqm_total_bytes_unit),
+	EVENT_PTR(intel_cqm_local_bytes_unit),
+	EVENT_PTR(intel_cqm_total_bytes_scale),
+	EVENT_PTR(intel_cqm_local_bytes_scale),
+	NULL,
+};
+
+static struct attribute *intel_cmt_mbm_events_attr[] = {
+	EVENT_PTR(intel_cqm_llc),
+	EVENT_PTR(intel_cqm_total_bytes),
+	EVENT_PTR(intel_cqm_local_bytes),
+	EVENT_PTR(intel_cqm_llc_pkg),
+	EVENT_PTR(intel_cqm_total_bytes_pkg),
+	EVENT_PTR(intel_cqm_local_bytes_pkg),
+	EVENT_PTR(intel_cqm_llc_unit),
+	EVENT_PTR(intel_cqm_total_bytes_unit),
+	EVENT_PTR(intel_cqm_local_bytes_unit),
+	EVENT_PTR(intel_cqm_llc_scale),
+	EVENT_PTR(intel_cqm_total_bytes_scale),
+	EVENT_PTR(intel_cqm_local_bytes_scale),
+	EVENT_PTR(intel_cqm_llc_snapshot),
+	NULL,
+};
+
 static struct attribute_group intel_cqm_events_group = {
 	.name = "events",
-	.attrs = intel_cqm_events_attr,
+	.attrs = NULL,
 };
 
 PMU_FORMAT_ATTR(event, "config:0-7");
@@ -1303,12 +1628,70 @@ static const struct x86_cpu_id intel_cqm_match[] = {
 	{}
 };
 
+static void mbm_cleanup(void)
+{
+	if (!mbm_enabled)
+		return;
+
+	kfree(mbm_local);
+	kfree(mbm_total);
+	mbm_enabled = false;
+}
+
+static const struct x86_cpu_id intel_mbm_local_match[] = {
+	{ .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_MBM_LOCAL },
+	{}
+};
+
+static const struct x86_cpu_id intel_mbm_total_match[] = {
+	{ .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_MBM_TOTAL },
+	{}
+};
+
+static int intel_mbm_init(void)
+{
+	int ret = 0, array_size, maxid = cqm_max_rmid + 1;
+
+	mbm_socket_max = topology_max_packages();
+	array_size = sizeof(struct sample) * maxid * mbm_socket_max;
+	mbm_local = kmalloc(array_size, GFP_KERNEL);
+	if (!mbm_local)
+		return -ENOMEM;
+
+	mbm_total = kmalloc(array_size, GFP_KERNEL);
+	if (!mbm_total) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	array_size = sizeof(struct hrtimer) * mbm_socket_max;
+	mbm_timers = kmalloc(array_size, GFP_KERNEL);
+	if (!mbm_timers) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	mbm_hrtimer_init();
+
+out:
+	if (ret)
+		mbm_cleanup();
+
+	return ret;
+}
+
 static int __init intel_cqm_init(void)
 {
-	char *str, scale[20];
+	char *str = NULL, scale[20];
 	int i, cpu, ret;
 
-	if (!x86_match_cpu(intel_cqm_match))
+	if (x86_match_cpu(intel_cqm_match))
+		cqm_enabled = true;
+
+	if (x86_match_cpu(intel_mbm_local_match) &&
+	     x86_match_cpu(intel_mbm_total_match))
+		mbm_enabled = true;
+
+	if (!cqm_enabled && !mbm_enabled)
 		return -ENODEV;
 
 	cqm_l3_scale = boot_cpu_data.x86_cache_occ_scale;
@@ -1365,16 +1748,41 @@ static int __init intel_cqm_init(void)
 		cqm_pick_event_reader(i);
 	}
 
-	__perf_cpu_notifier(intel_cqm_cpu_notifier);
+	if (mbm_enabled)
+		ret = intel_mbm_init();
+	if (ret && !cqm_enabled)
+		goto out;
+
+	if (cqm_enabled && mbm_enabled)
+		intel_cqm_events_group.attrs = intel_cmt_mbm_events_attr;
+	else if (!cqm_enabled && mbm_enabled)
+		intel_cqm_events_group.attrs = intel_mbm_events_attr;
+	else if (cqm_enabled && !mbm_enabled)
+		intel_cqm_events_group.attrs = intel_cqm_events_attr;
 
 	ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1);
-	if (ret)
+	if (ret) {
 		pr_err("Intel CQM perf registration failed: %d\n", ret);
-	else
+		goto out;
+	}
+
+	if (cqm_enabled)
 		pr_info("Intel CQM monitoring enabled\n");
+	if (mbm_enabled)
+		pr_info("Intel MBM enabled\n");
 
+	/*
+	 * Register the hot cpu notifier once we are sure cqm
+	 * is enabled to avoid notifier leak.
+	 */
+	__perf_cpu_notifier(intel_cqm_cpu_notifier);
 out:
 	cpu_notifier_register_done();
+	if (ret) {
+		kfree(str);
+		cqm_cleanup();
+		mbm_cleanup();
+	}
 
 	return ret;
 }
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index ce7211a07c0b..8584b90d8e0b 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -570,11 +570,12 @@ int intel_pmu_drain_bts_buffer(void)
 	 * We will overwrite the from and to address before we output
 	 * the sample.
 	 */
+	rcu_read_lock();
 	perf_prepare_sample(&header, &data, event, &regs);
 
 	if (perf_output_begin(&handle, event, header.size *
 			      (top - base - skip)))
-		return 1;
+		goto unlock;
 
 	for (at = base; at < top; at++) {
 		/* Filter out any records that contain kernel addresses. */
@@ -593,6 +594,8 @@ int intel_pmu_drain_bts_buffer(void)
 	/* There's new data available. */
 	event->hw.interrupts++;
 	event->pending_kill = POLL_IN;
+unlock:
+	rcu_read_unlock();
 	return 1;
 }
 
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index b834a3f55a01..70c93f9b03ac 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -711,6 +711,7 @@ static int __init rapl_pmu_init(void)
 		rapl_pmu_events_group.attrs = rapl_events_cln_attr;
 		break;
 	case 63: /* Haswell-Server */
+	case 79: /* Broadwell-Server */
 		apply_quirk = true;
 		rapl_cntr_mask = RAPL_IDX_SRV;
 		rapl_pmu_events_group.attrs = rapl_events_srv_attr;
@@ -718,6 +719,7 @@ static int __init rapl_pmu_init(void)
 	case 60: /* Haswell */
 	case 69: /* Haswell-Celeron */
 	case 61: /* Broadwell */
+	case 71: /* Broadwell-H */
 		rapl_cntr_mask = RAPL_IDX_HSW;
 		rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
 		break;
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 93f6bd9bf761..ab2bcaaebe38 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -46,7 +46,6 @@
 				(SNBEP_PMON_CTL_EV_SEL_MASK | \
 				 SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
 				 SNBEP_PMON_CTL_EDGE_DET | \
-				 SNBEP_PMON_CTL_EV_SEL_EXT | \
 				 SNBEP_PMON_CTL_INVERT | \
 				 SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
 				 SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
@@ -148,7 +147,6 @@
 /* IVBEP PCU */
 #define IVBEP_PCU_MSR_PMON_RAW_EVENT_MASK	\
 				(SNBEP_PMON_CTL_EV_SEL_MASK | \
-				 SNBEP_PMON_CTL_EV_SEL_EXT | \
 				 SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
 				 SNBEP_PMON_CTL_EDGE_DET | \
 				 SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
@@ -258,7 +256,6 @@
 				 SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
 				 SNBEP_PMON_CTL_EDGE_DET | \
 				 SNBEP_CBO_PMON_CTL_TID_EN | \
-				 SNBEP_PMON_CTL_EV_SEL_EXT | \
 				 SNBEP_PMON_CTL_INVERT | \
 				 KNL_PCU_MSR_PMON_CTL_TRESH_MASK | \
 				 SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
@@ -472,7 +469,7 @@ static struct attribute *snbep_uncore_cbox_formats_attr[] = {
 };
 
 static struct attribute *snbep_uncore_pcu_formats_attr[] = {
-	&format_attr_event_ext.attr,
+	&format_attr_event.attr,
 	&format_attr_occ_sel.attr,
 	&format_attr_edge.attr,
 	&format_attr_inv.attr,
@@ -1313,7 +1310,7 @@ static struct attribute *ivbep_uncore_cbox_formats_attr[] = {
 };
 
 static struct attribute *ivbep_uncore_pcu_formats_attr[] = {
-	&format_attr_event_ext.attr,
+	&format_attr_event.attr,
 	&format_attr_occ_sel.attr,
 	&format_attr_edge.attr,
 	&format_attr_thresh5.attr,
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 074b7604bd51..44ebd04878eb 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -94,7 +94,7 @@
 #define X86_FEATURE_REP_GOOD	( 3*32+16) /* rep microcode works well */
 #define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
 #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
-/* free, was #define X86_FEATURE_11AP	( 3*32+19) * "" Bad local APIC aka 11AP */
+#define X86_FEATURE_ACC_POWER	( 3*32+19) /* AMD Accumulated Power Mechanism */
 #define X86_FEATURE_NOPL	( 3*32+20) /* The NOPL (0F 1F) instructions */
 #define X86_FEATURE_ALWAYS	( 3*32+21) /* "" Always-present feature */
 #define X86_FEATURE_XTOPOLOGY	( 3*32+22) /* cpu topology enum extensions */
@@ -245,6 +245,8 @@
 
 /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
 #define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
+#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
+#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
 
 /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
 #define X86_FEATURE_CLZERO	(13*32+0) /* CLZERO instruction */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index b39338c4b260..68fe8d3bed56 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -306,7 +306,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
 		u32 eax, ebx, ecx, edx;
 
 		cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
-		nodes_per_socket = ((ecx >> 8) & 7) + 1;
 		node_id = ecx & 7;
 
 		/* get compute unit information */
@@ -317,7 +316,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
 		u64 value;
 
 		rdmsrl(MSR_FAM10H_NODE_ID, value);
-		nodes_per_socket = ((value >> 3) & 7) + 1;
 		node_id = value & 7;
 	} else
 		return;
@@ -519,6 +517,18 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
 
 	if (cpu_has(c, X86_FEATURE_MWAITX))
 		use_mwaitx_delay();
+
+	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+		u32 ecx;
+
+		ecx = cpuid_ecx(0x8000001e);
+		nodes_per_socket = ((ecx >> 8) & 7) + 1;
+	} else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) {
+		u64 value;
+
+		rdmsrl(MSR_FAM10H_NODE_ID, value);
+		nodes_per_socket = ((value >> 3) & 7) + 1;
+	}
 }
 
 static void early_init_amd(struct cpuinfo_x86 *c)
@@ -536,6 +546,10 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 			set_sched_clock_stable();
 	}
 
+	/* Bit 12 of 8000_0007 edx is accumulated power mechanism. */
+	if (c->x86_power & BIT(12))
+		set_cpu_cap(c, X86_FEATURE_ACC_POWER);
+
 #ifdef CONFIG_X86_64
 	set_cpu_cap(c, X86_FEATURE_SYSCALL32);
 #else
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 62590aa064c8..e601c1286e29 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -649,7 +649,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
 			cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx);
 			c->x86_capability[CPUID_F_1_EDX] = edx;
 
-			if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) {
+			if ((cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) ||
+			      ((cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL)) ||
+			       (cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)))) {
 				c->x86_cache_max_rmid = ecx;
 				c->x86_cache_occ_scale = ebx;
 			}
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 79ec7bbf0155..15588d4c581d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -121,6 +121,7 @@ struct hw_perf_event {
 		struct { /* intel_cqm */
 			int			cqm_state;
 			u32			cqm_rmid;
+			int			is_group_event;
 			struct list_head	cqm_events_entry;
 			struct list_head	cqm_groups_entry;
 			struct list_head	cqm_group_entry;
@@ -128,6 +129,10 @@ struct hw_perf_event {
 		struct { /* itrace */
 			int			itrace_started;
 		};
+		struct { /* amd_power */
+			u64	pwr_acc;
+			u64	ptsc;
+		};
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 		struct { /* breakpoint */
 			/*
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 712570dddacd..de24fbce5277 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -376,8 +376,11 @@ static void update_perf_cpu_limits(void)
 	u64 tmp = perf_sample_period_ns;
 
 	tmp *= sysctl_perf_cpu_time_max_percent;
-	do_div(tmp, 100);
-	ACCESS_ONCE(perf_sample_allowed_ns) = tmp;
+	tmp = div_u64(tmp, 100);
+	if (!tmp)
+		tmp = 1;
+
+	WRITE_ONCE(perf_sample_allowed_ns, tmp);
 }
 
 static int perf_rotate_context(struct perf_cpu_context *cpuctx);
@@ -409,7 +412,13 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
 	if (ret || !write)
 		return ret;
 
-	update_perf_cpu_limits();
+	if (sysctl_perf_cpu_time_max_percent == 100) {
+		printk(KERN_WARNING
+		       "perf: Dynamic interrupt throttling disabled, can hang your system!\n");
+		WRITE_ONCE(perf_sample_allowed_ns, 0);
+	} else {
+		update_perf_cpu_limits();
+	}
 
 	return 0;
 }
@@ -423,62 +432,68 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
 #define NR_ACCUMULATED_SAMPLES 128
 static DEFINE_PER_CPU(u64, running_sample_length);
 
+static u64 __report_avg;
+static u64 __report_allowed;
+
 static void perf_duration_warn(struct irq_work *w)
 {
-	u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns);
-	u64 avg_local_sample_len;
-	u64 local_samples_len;
-
-	local_samples_len = __this_cpu_read(running_sample_length);
-	avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES;
-
 	printk_ratelimited(KERN_WARNING
-			"perf interrupt took too long (%lld > %lld), lowering "
-			"kernel.perf_event_max_sample_rate to %d\n",
-			avg_local_sample_len, allowed_ns >> 1,
-			sysctl_perf_event_sample_rate);
+		"perf: interrupt took too long (%lld > %lld), lowering "
+		"kernel.perf_event_max_sample_rate to %d\n",
+		__report_avg, __report_allowed,
+		sysctl_perf_event_sample_rate);
 }
 
 static DEFINE_IRQ_WORK(perf_duration_work, perf_duration_warn);
 
 void perf_sample_event_took(u64 sample_len_ns)
 {
-	u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns);
-	u64 avg_local_sample_len;
-	u64 local_samples_len;
+	u64 max_len = READ_ONCE(perf_sample_allowed_ns);
+	u64 running_len;
+	u64 avg_len;
+	u32 max;
 
-	if (allowed_ns == 0)
+	if (max_len == 0)
 		return;
 
-	/* decay the counter by 1 average sample */
-	local_samples_len = __this_cpu_read(running_sample_length);
-	local_samples_len -= local_samples_len/NR_ACCUMULATED_SAMPLES;
-	local_samples_len += sample_len_ns;
-	__this_cpu_write(running_sample_length, local_samples_len);
+	/* Decay the counter by 1 average sample. */
+	running_len = __this_cpu_read(running_sample_length);
+	running_len -= running_len/NR_ACCUMULATED_SAMPLES;
+	running_len += sample_len_ns;
+	__this_cpu_write(running_sample_length, running_len);
 
 	/*
-	 * note: this will be biased artifically low until we have
-	 * seen NR_ACCUMULATED_SAMPLES.  Doing it this way keeps us
+	 * Note: this will be biased artifically low until we have
+	 * seen NR_ACCUMULATED_SAMPLES. Doing it this way keeps us
 	 * from having to maintain a count.
 	 */
-	avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES;
-
-	if (avg_local_sample_len <= allowed_ns)
+	avg_len = running_len/NR_ACCUMULATED_SAMPLES;
+	if (avg_len <= max_len)
 		return;
 
-	if (max_samples_per_tick <= 1)
-		return;
+	__report_avg = avg_len;
+	__report_allowed = max_len;
 
-	max_samples_per_tick = DIV_ROUND_UP(max_samples_per_tick, 2);
-	sysctl_perf_event_sample_rate = max_samples_per_tick * HZ;
-	perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
+	/*
+	 * Compute a throttle threshold 25% below the current duration.
+	 */
+	avg_len += avg_len / 4;
+	max = (TICK_NSEC / 100) * sysctl_perf_cpu_time_max_percent;
+	if (avg_len < max)
+		max /= (u32)avg_len;
+	else
+		max = 1;
 
-	update_perf_cpu_limits();
+	WRITE_ONCE(perf_sample_allowed_ns, avg_len);
+	WRITE_ONCE(max_samples_per_tick, max);
+
+	sysctl_perf_event_sample_rate = max * HZ;
+	perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
 
 	if (!irq_work_queue(&perf_duration_work)) {
-		early_printk("perf interrupt took too long (%lld > %lld), lowering "
+		early_printk("perf: interrupt took too long (%lld > %lld), lowering "
 			     "kernel.perf_event_max_sample_rate to %d\n",
-			     avg_local_sample_len, allowed_ns >> 1,
+			     __report_avg, __report_allowed,
 			     sysctl_perf_event_sample_rate);
 	}
 }
@@ -4210,6 +4225,14 @@ static void __perf_event_period(struct perf_event *event,
 	active = (event->state == PERF_EVENT_STATE_ACTIVE);
 	if (active) {
 		perf_pmu_disable(ctx->pmu);
+		/*
+		 * We could be throttled; unthrottle now to avoid the tick
+		 * trying to unthrottle while we already re-started the event.
+		 */
+		if (event->hw.interrupts == MAX_INTERRUPTS) {
+			event->hw.interrupts = 0;
+			perf_log_throttle(event, 1);
+		}
 		event->pmu->stop(event, PERF_EF_UPDATE);
 	}
 
@@ -9426,10 +9449,29 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 	switch (action & ~CPU_TASKS_FROZEN) {
 
 	case CPU_UP_PREPARE:
+		/*
+		 * This must be done before the CPU comes alive, because the
+		 * moment we can run tasks we can encounter (software) events.
+		 *
+		 * Specifically, someone can have inherited events on kthreadd
+		 * or a pre-existing worker thread that gets re-bound.
+		 */
 		perf_event_init_cpu(cpu);
 		break;
 
 	case CPU_DOWN_PREPARE:
+		/*
+		 * This must be done before the CPU dies because after that an
+		 * active event might want to IPI the CPU and that'll not work
+		 * so great for dead CPUs.
+		 *
+		 * XXX smp_call_function_single() return -ENXIO without a warn
+		 * so we could possibly deal with this.
+		 *
+		 * This is safe against new events arriving because
+		 * sys_perf_event_open() serializes against hotplug using
+		 * get_online_cpus().
+		 */
 		perf_event_exit_cpu(cpu);
 		break;
 	default:
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 1faad2cfdb9e..c61f0cbd308b 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -746,8 +746,10 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
 
 	rb->user_page = all_buf;
 	rb->data_pages[0] = all_buf + PAGE_SIZE;
-	rb->page_order = ilog2(nr_pages);
-	rb->nr_pages = !!nr_pages;
+	if (nr_pages) {
+		rb->nr_pages = 1;
+		rb->page_order = ilog2(nr_pages);
+	}
 
 	ring_buffer_init(rb, watermark, flags);
 
diff --git a/tools/include/linux/stringify.h b/tools/include/linux/stringify.h
new file mode 100644
index 000000000000..841cec8ed525
--- /dev/null
+++ b/tools/include/linux/stringify.h
@@ -0,0 +1,12 @@
+#ifndef __LINUX_STRINGIFY_H
+#define __LINUX_STRINGIFY_H
+
+/* Indirect stringification.  Doing two levels allows the parameter to be a
+ * macro itself.  For example, compile with -DFOO=bar, __stringify(FOO)
+ * converts to "bar".
+ */
+
+#define __stringify_1(x...)	#x
+#define __stringify(x...)	__stringify_1(x)
+
+#endif	/* !__LINUX_STRINGIFY_H */
diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
index bbc82c614bee..316f308a63ea 100644
--- a/tools/lib/api/Makefile
+++ b/tools/lib/api/Makefile
@@ -1,5 +1,5 @@
 include ../../scripts/Makefile.include
-include ../../perf/config/utilities.mak		# QUIET_CLEAN
+include ../../scripts/utilities.mak		# QUIET_CLEAN
 
 ifeq ($(srctree),)
 srctree := $(patsubst %/,%,$(dir $(shell pwd)))
diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile
index 629cf8c14e68..25b3f69f4364 100644
--- a/tools/lib/subcmd/Makefile
+++ b/tools/lib/subcmd/Makefile
@@ -1,5 +1,5 @@
 include ../../scripts/Makefile.include
-include ../../perf/config/utilities.mak		# QUIET_CLEAN
+include ../../scripts/utilities.mak		# QUIET_CLEAN
 
 ifeq ($(srctree),)
 srctree := $(patsubst %/,%,$(dir $(shell pwd)))
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 190cc886ab91..a8b6357d1ffe 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -5427,10 +5427,8 @@ void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s,
 	}
 
 	if (pevent->latency_format) {
-		trace_seq_printf(s, " %3d", record->cpu);
 		pevent_data_lat_fmt(pevent, s, record);
-	} else
-		trace_seq_printf(s, " [%03d]", record->cpu);
+	}
 
 	if (use_usec_format) {
 		if (pevent->flags & PEVENT_NSEC_OUTPUT) {
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index 3ba1c0b09908..098cfb9ca8f0 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -1,5 +1,5 @@
 include ../../scripts/Makefile.include
-include ../config/utilities.mak
+include ../../scripts/utilities.mak
 
 MAN1_TXT= \
 	$(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 79483f40e991..ec723d0a5bb3 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -40,10 +40,12 @@ The 'p' modifier can be used for specifying how precise the instruction
  0 - SAMPLE_IP can have arbitrary skid
  1 - SAMPLE_IP must have constant skid
  2 - SAMPLE_IP requested to have 0 skid
- 3 - SAMPLE_IP must have 0 skid
+ 3 - SAMPLE_IP must have 0 skid, or uses randomization to avoid
+     sample shadowing effects.
 
 For Intel systems precise event sampling is implemented with PEBS
-which supports up to precise-level 2.
+which supports up to precise-level 2, and precise level 3 for
+some special cases
 
 On AMD systems it is implemented using IBS (up to precise-level 2).
 The precise modifier works with event types 0x76 (cpu-cycles, CPU
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 4a4fad4182f5..000ea210389d 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -3,7 +3,7 @@ include ../scripts/Makefile.include
 # The default target of this Makefile is...
 all:
 
-include config/utilities.mak
+include ../scripts/utilities.mak
 
 # Define V to have a more verbose compile.
 #
diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c
index 6c1b8a75db09..6138bdef6e63 100644
--- a/tools/perf/arch/powerpc/util/header.c
+++ b/tools/perf/arch/powerpc/util/header.c
@@ -3,9 +3,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-
-#include "../../util/header.h"
-#include "../../util/util.h"
+#include <linux/stringify.h>
 
 #define mfspr(rn)       ({unsigned long rval; \
 			 asm volatile("mfspr %0," __stringify(rn) \
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index a50df86f2b9b..579a592990dd 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -25,19 +25,17 @@
 # endif
 #endif
 
-extern int bench_numa(int argc, const char **argv, const char *prefix);
-extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
-extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
-extern int bench_mem_memcpy(int argc, const char **argv,
-			    const char *prefix __maybe_unused);
-extern int bench_mem_memset(int argc, const char **argv, const char *prefix);
-extern int bench_futex_hash(int argc, const char **argv, const char *prefix);
-extern int bench_futex_wake(int argc, const char **argv, const char *prefix);
-extern int bench_futex_wake_parallel(int argc, const char **argv,
-				     const char *prefix);
-extern int bench_futex_requeue(int argc, const char **argv, const char *prefix);
+int bench_numa(int argc, const char **argv, const char *prefix);
+int bench_sched_messaging(int argc, const char **argv, const char *prefix);
+int bench_sched_pipe(int argc, const char **argv, const char *prefix);
+int bench_mem_memcpy(int argc, const char **argv, const char *prefix);
+int bench_mem_memset(int argc, const char **argv, const char *prefix);
+int bench_futex_hash(int argc, const char **argv, const char *prefix);
+int bench_futex_wake(int argc, const char **argv, const char *prefix);
+int bench_futex_wake_parallel(int argc, const char **argv, const char *prefix);
+int bench_futex_requeue(int argc, const char **argv, const char *prefix);
 /* pi futexes */
-extern int bench_futex_lock_pi(int argc, const char **argv, const char *prefix);
+int bench_futex_lock_pi(int argc, const char **argv, const char *prefix);
 
 #define BENCH_FORMAT_DEFAULT_STR	"default"
 #define BENCH_FORMAT_DEFAULT		0
diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h
index 57b4ed871459..5aad2a9408b0 100644
--- a/tools/perf/bench/mem-memcpy-arch.h
+++ b/tools/perf/bench/mem-memcpy-arch.h
@@ -2,7 +2,7 @@
 #ifdef HAVE_ARCH_X86_64_SUPPORT
 
 #define MEMCPY_FN(fn, name, desc)		\
-	extern void *fn(void *, const void *, size_t);
+	void *fn(void *, const void *, size_t);
 
 #include "mem-memcpy-x86-64-asm-def.h"
 
diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h
index 633800cb0dcb..0d15786d9ae3 100644
--- a/tools/perf/bench/mem-memset-arch.h
+++ b/tools/perf/bench/mem-memset-arch.h
@@ -2,7 +2,7 @@
 #ifdef HAVE_ARCH_X86_64_SUPPORT
 
 #define MEMSET_FN(fn, name, desc)		\
-	extern void *fn(void *, int, size_t);
+	void *fn(void *, int, size_t);
 
 #include "mem-memset-x86-64-asm-def.h"
 
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 5049d6357a46..7500d959d7eb 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -293,7 +293,7 @@ static void bind_to_memnode(int node)
 	if (node == -1)
 		return;
 
-	BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask));
+	BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8);
 	nodemask = 1L << node;
 
 	ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8);
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index cfe366375c4b..814158393656 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -94,7 +94,7 @@ static int process_sample_event(struct perf_tool *tool,
 	struct addr_location al;
 	int ret = 0;
 
-	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
+	if (machine__resolve(machine, &al, sample) < 0) {
 		pr_warning("problem processing %d event, skipping it.\n",
 			   event->header.type);
 		return -1;
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 4d72359fd15a..8053a8ceefda 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -330,7 +330,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
 	struct hists *hists = evsel__hists(evsel);
 	int ret = -1;
 
-	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
+	if (machine__resolve(machine, &al, sample) < 0) {
 		pr_warning("problem processing %d event, skipping it.\n",
 			   event->header.type);
 		return -1;
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index 49d55e21b1b0..bc1de9b8fd67 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -106,12 +106,14 @@ static void exec_woman_emacs(const char *path, const char *page)
 
 	if (!check_emacsclient_version()) {
 		/* This works only with emacsclient version >= 22. */
-		struct strbuf man_page = STRBUF_INIT;
+		char *man_page;
 
 		if (!path)
 			path = "emacsclient";
-		strbuf_addf(&man_page, "(woman \"%s\")", page);
-		execlp(path, "emacsclient", "-e", man_page.buf, NULL);
+		if (asprintf(&man_page, "(woman \"%s\")", page) > 0) {
+			execlp(path, "emacsclient", "-e", man_page, NULL);
+			free(man_page);
+		}
 		warning("failed to exec '%s': %s", path,
 			strerror_r(errno, sbuf, sizeof(sbuf)));
 	}
@@ -122,7 +124,7 @@ static void exec_man_konqueror(const char *path, const char *page)
 	const char *display = getenv("DISPLAY");
 
 	if (display && *display) {
-		struct strbuf man_page = STRBUF_INIT;
+		char *man_page;
 		const char *filename = "kfmclient";
 		char sbuf[STRERR_BUFSIZE];
 
@@ -141,8 +143,10 @@ static void exec_man_konqueror(const char *path, const char *page)
 				filename = file;
 		} else
 			path = "kfmclient";
-		strbuf_addf(&man_page, "man:%s(1)", page);
-		execlp(path, filename, "newTab", man_page.buf, NULL);
+		if (asprintf(&man_page, "man:%s(1)", page) > 0) {
+			execlp(path, filename, "newTab", man_page, NULL);
+			free(man_page);
+		}
 		warning("failed to exec '%s': %s", path,
 			strerror_r(errno, sbuf, sizeof(sbuf)));
 	}
@@ -161,11 +165,13 @@ static void exec_man_man(const char *path, const char *page)
 
 static void exec_man_cmd(const char *cmd, const char *page)
 {
-	struct strbuf shell_cmd = STRBUF_INIT;
 	char sbuf[STRERR_BUFSIZE];
+	char *shell_cmd;
 
-	strbuf_addf(&shell_cmd, "%s %s", cmd, page);
-	execl("/bin/sh", "sh", "-c", shell_cmd.buf, NULL);
+	if (asprintf(&shell_cmd, "%s %s", cmd, page) > 0) {
+		execl("/bin/sh", "sh", "-c", shell_cmd, NULL);
+		free(shell_cmd);
+	}
 	warning("failed to exec '%s': %s", cmd,
 		strerror_r(errno, sbuf, sizeof(sbuf)));
 }
@@ -299,43 +305,33 @@ static int is_perf_command(const char *s)
 		is_in_cmdlist(&other_cmds, s);
 }
 
-static const char *prepend(const char *prefix, const char *cmd)
-{
-	size_t pre_len = strlen(prefix);
-	size_t cmd_len = strlen(cmd);
-	char *p = malloc(pre_len + cmd_len + 1);
-	memcpy(p, prefix, pre_len);
-	strcpy(p + pre_len, cmd);
-	return p;
-}
-
 static const char *cmd_to_page(const char *perf_cmd)
 {
+	char *s;
+
 	if (!perf_cmd)
 		return "perf";
 	else if (!prefixcmp(perf_cmd, "perf"))
 		return perf_cmd;
-	else
-		return prepend("perf-", perf_cmd);
+
+	return asprintf(&s, "perf-%s", perf_cmd) < 0 ? NULL : s;
 }
 
 static void setup_man_path(void)
 {
-	struct strbuf new_path = STRBUF_INIT;
+	char *new_path;
 	const char *old_path = getenv("MANPATH");
 
 	/* We should always put ':' after our path. If there is no
 	 * old_path, the ':' at the end will let 'man' to try
 	 * system-wide paths after ours to find the manual page. If
 	 * there is old_path, we need ':' as delimiter. */
-	strbuf_addstr(&new_path, system_path(PERF_MAN_PATH));
-	strbuf_addch(&new_path, ':');
-	if (old_path)
-		strbuf_addstr(&new_path, old_path);
-
-	setenv("MANPATH", new_path.buf, 1);
-
-	strbuf_release(&new_path);
+	if (asprintf(&new_path, "%s:%s", system_path(PERF_MAN_PATH), old_path ?: "") > 0) {
+		setenv("MANPATH", new_path, 1);
+		free(new_path);
+	} else {
+		error("Unable to setup man path");
+	}
 }
 
 static void exec_viewer(const char *name, const char *page)
@@ -380,7 +376,7 @@ static int show_info_page(const char *perf_cmd)
 	return -1;
 }
 
-static int get_html_page_path(struct strbuf *page_path, const char *page)
+static int get_html_page_path(char **page_path, const char *page)
 {
 	struct stat st;
 	const char *html_path = system_path(PERF_HTML_PATH);
@@ -392,10 +388,7 @@ static int get_html_page_path(struct strbuf *page_path, const char *page)
 		return -1;
 	}
 
-	strbuf_init(page_path, 0);
-	strbuf_addf(page_path, "%s/%s.html", html_path, page);
-
-	return 0;
+	return asprintf(page_path, "%s/%s.html", html_path, page);
 }
 
 /*
@@ -413,12 +406,12 @@ static void open_html(const char *path)
 static int show_html_page(const char *perf_cmd)
 {
 	const char *page = cmd_to_page(perf_cmd);
-	struct strbuf page_path; /* it leaks but we exec bellow */
+	char *page_path; /* it leaks but we exec bellow */
 
-	if (get_html_page_path(&page_path, page) != 0)
+	if (get_html_page_path(&page_path, page) < 0)
 		return -1;
 
-	open_html(page_path.buf);
+	open_html(page_path);
 
 	return 0;
 }
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 7fa68663ed72..d1a2d104f2bc 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -131,8 +131,7 @@ static int copy_bytes(struct perf_inject *inject, int fd, off_t size)
 
 static s64 perf_event__repipe_auxtrace(struct perf_tool *tool,
 				       union perf_event *event,
-				       struct perf_session *session
-				       __maybe_unused)
+				       struct perf_session *session)
 {
 	struct perf_inject *inject = container_of(tool, struct perf_inject,
 						  tool);
@@ -417,9 +416,6 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
 {
 	struct addr_location al;
 	struct thread *thread;
-	u8 cpumode;
-
-	cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
 	thread = machine__findnew_thread(machine, sample->pid, sample->tid);
 	if (thread == NULL) {
@@ -428,7 +424,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
 		goto repipe;
 	}
 
-	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, &al);
+	thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al);
 
 	if (al.map != NULL) {
 		if (!al.map->dso->hit) {
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 88aeac9aa1da..85db3be4b3cb 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -131,7 +131,7 @@ dump_raw_samples(struct perf_tool *tool,
 	struct addr_location al;
 	const char *fmt;
 
-	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
+	if (machine__resolve(machine, &al, sample) < 0) {
 		fprintf(stderr, "problem processing %d event, skipping it.\n",
 				event->header.type);
 		return -1;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 7eea49f9ed46..160ea23b45aa 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -41,6 +41,7 @@
 
 #include <dlfcn.h>
 #include <linux/bitmap.h>
+#include <linux/stringify.h>
 
 struct report {
 	struct perf_tool	tool;
@@ -154,7 +155,7 @@ static int process_sample_event(struct perf_tool *tool,
 	};
 	int ret = 0;
 
-	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
+	if (machine__resolve(machine, &al, sample) < 0) {
 		pr_debug("problem processing %d event, skipping it.\n",
 			 event->header.type);
 		return -1;
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 57f9a7e7f7d3..3770c3dffe5e 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -405,9 +405,7 @@ static int perf_session__check_output_opt(struct perf_session *session)
 	return 0;
 }
 
-static void print_sample_iregs(union perf_event *event __maybe_unused,
-			  struct perf_sample *sample,
-			  struct thread *thread __maybe_unused,
+static void print_sample_iregs(struct perf_sample *sample,
 			  struct perf_event_attr *attr)
 {
 	struct regs_dump *regs = &sample->intr_regs;
@@ -476,10 +474,7 @@ mispred_str(struct branch_entry *br)
 	return br->flags.predicted ? 'P' : 'M';
 }
 
-static void print_sample_brstack(union perf_event *event __maybe_unused,
-			  struct perf_sample *sample,
-			  struct thread *thread __maybe_unused,
-			  struct perf_event_attr *attr __maybe_unused)
+static void print_sample_brstack(struct perf_sample *sample)
 {
 	struct branch_stack *br = sample->branch_stack;
 	u64 i;
@@ -498,14 +493,11 @@ static void print_sample_brstack(union perf_event *event __maybe_unused,
 	}
 }
 
-static void print_sample_brstacksym(union perf_event *event __maybe_unused,
-			  struct perf_sample *sample,
-			  struct thread *thread __maybe_unused,
-			  struct perf_event_attr *attr __maybe_unused)
+static void print_sample_brstacksym(struct perf_sample *sample,
+				    struct thread *thread)
 {
 	struct branch_stack *br = sample->branch_stack;
 	struct addr_location alf, alt;
-	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 	u64 i, from, to;
 
 	if (!(br && br->nr))
@@ -518,11 +510,11 @@ static void print_sample_brstacksym(union perf_event *event __maybe_unused,
 		from = br->entries[i].from;
 		to   = br->entries[i].to;
 
-		thread__find_addr_map(thread, cpumode, MAP__FUNCTION, from, &alf);
+		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
 		if (alf.map)
 			alf.sym = map__find_symbol(alf.map, alf.addr, NULL);
 
-		thread__find_addr_map(thread, cpumode, MAP__FUNCTION, to, &alt);
+		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
 		if (alt.map)
 			alt.sym = map__find_symbol(alt.map, alt.addr, NULL);
 
@@ -538,8 +530,7 @@ static void print_sample_brstacksym(union perf_event *event __maybe_unused,
 }
 
 
-static void print_sample_addr(union perf_event *event,
-			  struct perf_sample *sample,
+static void print_sample_addr(struct perf_sample *sample,
 			  struct thread *thread,
 			  struct perf_event_attr *attr)
 {
@@ -550,7 +541,7 @@ static void print_sample_addr(union perf_event *event,
 	if (!sample_addr_correlates_sym(attr))
 		return;
 
-	perf_event__preprocess_sample_addr(event, sample, thread, &al);
+	thread__resolve(thread, &al, sample);
 
 	if (PRINT_FIELD(SYM)) {
 		printf(" ");
@@ -567,8 +558,7 @@ static void print_sample_addr(union perf_event *event,
 	}
 }
 
-static void print_sample_bts(union perf_event *event,
-			     struct perf_sample *sample,
+static void print_sample_bts(struct perf_sample *sample,
 			     struct perf_evsel *evsel,
 			     struct thread *thread,
 			     struct addr_location *al)
@@ -598,7 +588,7 @@ static void print_sample_bts(union perf_event *event,
 	    ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) &&
 	     !output[attr->type].user_set)) {
 		printf(" => ");
-		print_sample_addr(event, sample, thread, attr);
+		print_sample_addr(sample, thread, attr);
 	}
 
 	if (print_srcline_last)
@@ -747,7 +737,7 @@ static size_t data_src__printf(u64 data_src)
 	return printf("%-*s", maxlen, out);
 }
 
-static void process_event(struct perf_script *script, union perf_event *event,
+static void process_event(struct perf_script *script,
 			  struct perf_sample *sample, struct perf_evsel *evsel,
 			  struct addr_location *al)
 {
@@ -776,7 +766,7 @@ static void process_event(struct perf_script *script, union perf_event *event,
 		print_sample_flags(sample->flags);
 
 	if (is_bts_event(attr)) {
-		print_sample_bts(event, sample, evsel, thread, al);
+		print_sample_bts(sample, evsel, thread, al);
 		return;
 	}
 
@@ -784,7 +774,7 @@ static void process_event(struct perf_script *script, union perf_event *event,
 		event_format__print(evsel->tp_format, sample->cpu,
 				    sample->raw_data, sample->raw_size);
 	if (PRINT_FIELD(ADDR))
-		print_sample_addr(event, sample, thread, attr);
+		print_sample_addr(sample, thread, attr);
 
 	if (PRINT_FIELD(DATA_SRC))
 		data_src__printf(sample->data_src);
@@ -804,12 +794,12 @@ static void process_event(struct perf_script *script, union perf_event *event,
 	}
 
 	if (PRINT_FIELD(IREGS))
-		print_sample_iregs(event, sample, thread, attr);
+		print_sample_iregs(sample, attr);
 
 	if (PRINT_FIELD(BRSTACK))
-		print_sample_brstack(event, sample, thread, attr);
+		print_sample_brstack(sample);
 	else if (PRINT_FIELD(BRSTACKSYM))
-		print_sample_brstacksym(event, sample, thread, attr);
+		print_sample_brstacksym(sample, thread);
 
 	if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
 		print_sample_bpf_output(sample);
@@ -905,7 +895,7 @@ static int process_sample_event(struct perf_tool *tool,
 		return 0;
 	}
 
-	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
+	if (machine__resolve(machine, &al, sample) < 0) {
 		pr_err("problem processing %d event, skipping it.\n",
 		       event->header.type);
 		return -1;
@@ -920,7 +910,7 @@ static int process_sample_event(struct perf_tool *tool,
 	if (scripting_ops)
 		scripting_ops->process_event(event, sample, evsel, &al);
 	else
-		process_event(scr, event, sample, evsel, &al);
+		process_event(scr, sample, evsel, &al);
 
 out_put:
 	addr_location__put(&al);
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index bd7a7757176f..40cc9bb3506c 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -489,7 +489,7 @@ static const char *cat_backtrace(union perf_event *event,
 	if (!chain)
 		goto exit;
 
-	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
+	if (machine__resolve(machine, &al, sample) < 0) {
 		fprintf(stderr, "problem processing %d event, skipping it.\n",
 			event->header.type);
 		goto exit;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 94af190f6843..833214979c4f 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -67,6 +67,7 @@
 #include <sys/utsname.h>
 #include <sys/mman.h>
 
+#include <linux/stringify.h>
 #include <linux/types.h>
 
 static volatile int done;
@@ -728,7 +729,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
 	if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
 		top->exact_samples++;
 
-	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0)
+	if (machine__resolve(machine, &al, sample) < 0)
 		return;
 
 	if (!top->kptr_restrict_warned &&
@@ -809,7 +810,6 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 	struct perf_session *session = top->session;
 	union perf_event *event;
 	struct machine *machine;
-	u8 origin;
 	int ret;
 
 	while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) {
@@ -822,12 +822,10 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 		evsel = perf_evlist__id2evsel(session->evlist, sample.id);
 		assert(evsel != NULL);
 
-		origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-
 		if (event->header.type == PERF_RECORD_SAMPLE)
 			++top->samples;
 
-		switch (origin) {
+		switch (sample.cpumode) {
 		case PERF_RECORD_MISC_USER:
 			++top->us_samples;
 			if (top->hide_user_symbols)
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 8dc98c598b1a..93ac724fb635 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2256,11 +2256,10 @@ static void print_location(FILE *f, struct perf_sample *sample,
 
 static int trace__pgfault(struct trace *trace,
 			  struct perf_evsel *evsel,
-			  union perf_event *event,
+			  union perf_event *event __maybe_unused,
 			  struct perf_sample *sample)
 {
 	struct thread *thread;
-	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 	struct addr_location al;
 	char map_type = 'd';
 	struct thread_trace *ttrace;
@@ -2279,7 +2278,7 @@ static int trace__pgfault(struct trace *trace,
 	if (trace->summary_only)
 		goto out;
 
-	thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
+	thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
 			      sample->ip, &al);
 
 	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
@@ -2292,11 +2291,11 @@ static int trace__pgfault(struct trace *trace,
 
 	fprintf(trace->output, "] => ");
 
-	thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
+	thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
 				   sample->addr, &al);
 
 	if (!al.map) {
-		thread__find_addr_location(thread, cpumode,
+		thread__find_addr_location(thread, sample->cpumode,
 					   MAP__FUNCTION, sample->addr, &al);
 
 		if (al.map)
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 3f871b54e261..41c24010ab43 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -7,38 +7,38 @@
 extern const char perf_usage_string[];
 extern const char perf_more_info_string[];
 
-extern void list_common_cmds_help(void);
-extern const char *help_unknown_cmd(const char *cmd);
-extern void prune_packed_objects(int);
-extern int read_line_with_nul(char *buf, int size, FILE *file);
-extern int check_pager_config(const char *cmd);
+void list_common_cmds_help(void);
+const char *help_unknown_cmd(const char *cmd);
+void prune_packed_objects(int);
+int read_line_with_nul(char *buf, int size, FILE *file);
+int check_pager_config(const char *cmd);
 
-extern int cmd_annotate(int argc, const char **argv, const char *prefix);
-extern int cmd_bench(int argc, const char **argv, const char *prefix);
-extern int cmd_buildid_cache(int argc, const char **argv, const char *prefix);
-extern int cmd_buildid_list(int argc, const char **argv, const char *prefix);
-extern int cmd_config(int argc, const char **argv, const char *prefix);
-extern int cmd_diff(int argc, const char **argv, const char *prefix);
-extern int cmd_evlist(int argc, const char **argv, const char *prefix);
-extern int cmd_help(int argc, const char **argv, const char *prefix);
-extern int cmd_sched(int argc, const char **argv, const char *prefix);
-extern int cmd_list(int argc, const char **argv, const char *prefix);
-extern int cmd_record(int argc, const char **argv, const char *prefix);
-extern int cmd_report(int argc, const char **argv, const char *prefix);
-extern int cmd_stat(int argc, const char **argv, const char *prefix);
-extern int cmd_timechart(int argc, const char **argv, const char *prefix);
-extern int cmd_top(int argc, const char **argv, const char *prefix);
-extern int cmd_script(int argc, const char **argv, const char *prefix);
-extern int cmd_version(int argc, const char **argv, const char *prefix);
-extern int cmd_probe(int argc, const char **argv, const char *prefix);
-extern int cmd_kmem(int argc, const char **argv, const char *prefix);
-extern int cmd_lock(int argc, const char **argv, const char *prefix);
-extern int cmd_kvm(int argc, const char **argv, const char *prefix);
-extern int cmd_test(int argc, const char **argv, const char *prefix);
-extern int cmd_trace(int argc, const char **argv, const char *prefix);
-extern int cmd_inject(int argc, const char **argv, const char *prefix);
-extern int cmd_mem(int argc, const char **argv, const char *prefix);
-extern int cmd_data(int argc, const char **argv, const char *prefix);
+int cmd_annotate(int argc, const char **argv, const char *prefix);
+int cmd_bench(int argc, const char **argv, const char *prefix);
+int cmd_buildid_cache(int argc, const char **argv, const char *prefix);
+int cmd_buildid_list(int argc, const char **argv, const char *prefix);
+int cmd_config(int argc, const char **argv, const char *prefix);
+int cmd_diff(int argc, const char **argv, const char *prefix);
+int cmd_evlist(int argc, const char **argv, const char *prefix);
+int cmd_help(int argc, const char **argv, const char *prefix);
+int cmd_sched(int argc, const char **argv, const char *prefix);
+int cmd_list(int argc, const char **argv, const char *prefix);
+int cmd_record(int argc, const char **argv, const char *prefix);
+int cmd_report(int argc, const char **argv, const char *prefix);
+int cmd_stat(int argc, const char **argv, const char *prefix);
+int cmd_timechart(int argc, const char **argv, const char *prefix);
+int cmd_top(int argc, const char **argv, const char *prefix);
+int cmd_script(int argc, const char **argv, const char *prefix);
+int cmd_version(int argc, const char **argv, const char *prefix);
+int cmd_probe(int argc, const char **argv, const char *prefix);
+int cmd_kmem(int argc, const char **argv, const char *prefix);
+int cmd_lock(int argc, const char **argv, const char *prefix);
+int cmd_kvm(int argc, const char **argv, const char *prefix);
+int cmd_test(int argc, const char **argv, const char *prefix);
+int cmd_trace(int argc, const char **argv, const char *prefix);
+int cmd_inject(int argc, const char **argv, const char *prefix);
+int cmd_mem(int argc, const char **argv, const char *prefix);
+int cmd_data(int argc, const char **argv, const char *prefix);
 
-extern int find_scripts(char **scripts_array, char **scripts_path_array);
+int find_scripts(char **scripts_array, char **scripts_path_array);
 #endif
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index eca6a912e8c2..f7d7f5a1cad5 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -109,7 +109,7 @@ ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
   CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
 endif
 
-include $(src-perf)/config/utilities.mak
+include $(srctree)/tools/scripts/utilities.mak
 
 ifeq ($(call get-executable,$(FLEX)),)
   dummy := $(error Error: $(FLEX) is missing on this system, please install it)
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index afc9ad0a0515..abd3f0ec0c0b 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -293,7 +293,6 @@ static int process_sample_event(struct machine *machine,
 {
 	struct perf_sample sample;
 	struct thread *thread;
-	u8 cpumode;
 	int ret;
 
 	if (perf_evlist__parse_sample(evlist, event, &sample)) {
@@ -307,9 +306,7 @@ static int process_sample_event(struct machine *machine,
 		return -1;
 	}
 
-	cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-
-	ret = read_object_code(sample.ip, READLEN, cpumode, thread, state);
+	ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread, state);
 	thread__put(thread);
 	return ret;
 }
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 1c5c0221cea2..8f6eb853aaf7 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -20,10 +20,10 @@
 
 static int mmap_handler(struct perf_tool *tool __maybe_unused,
 			union perf_event *event,
-			struct perf_sample *sample __maybe_unused,
+			struct perf_sample *sample,
 			struct machine *machine)
 {
-	return machine__process_mmap2_event(machine, event, NULL);
+	return machine__process_mmap2_event(machine, event, sample);
 }
 
 static int init_live_machine(struct machine *machine)
diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
index 071a8b5f5232..f55f4bd47932 100644
--- a/tools/perf/tests/hists_common.c
+++ b/tools/perf/tests/hists_common.c
@@ -100,9 +100,11 @@ struct machine *setup_fake_machine(struct machines *machines)
 	}
 
 	for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) {
+		struct perf_sample sample = {
+			.cpumode = PERF_RECORD_MISC_USER,
+		};
 		union perf_event fake_mmap_event = {
 			.mmap = {
-				.header = { .misc = PERF_RECORD_MISC_USER, },
 				.pid = fake_mmap_info[i].pid,
 				.tid = fake_mmap_info[i].pid,
 				.start = fake_mmap_info[i].start,
@@ -114,7 +116,7 @@ struct machine *setup_fake_machine(struct machines *machines)
 		strcpy(fake_mmap_event.mmap.filename,
 		       fake_mmap_info[i].filename);
 
-		machine__process_mmap_event(machine, &fake_mmap_event, NULL);
+		machine__process_mmap_event(machine, &fake_mmap_event, &sample);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(fake_symbols); i++) {
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index ecf136c385d5..ed5aa9eaeb6c 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -81,11 +81,6 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
 	size_t i;
 
 	for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
-		const union perf_event event = {
-			.header = {
-				.misc = PERF_RECORD_MISC_USER,
-			},
-		};
 		struct hist_entry_iter iter = {
 			.evsel = evsel,
 			.sample	= &sample,
@@ -97,13 +92,13 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
 		else
 			iter.ops = &hist_iter_normal;
 
+		sample.cpumode = PERF_RECORD_MISC_USER;
 		sample.pid = fake_samples[i].pid;
 		sample.tid = fake_samples[i].pid;
 		sample.ip = fake_samples[i].ip;
 		sample.callchain = (struct ip_callchain *)fake_callchains[i];
 
-		if (perf_event__preprocess_sample(&event, machine, &al,
-						  &sample) < 0)
+		if (machine__resolve(machine, &al, &sample) < 0)
 			goto out;
 
 		if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index 34b945a55d4d..b825d24f8186 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c
@@ -58,11 +58,6 @@ static int add_hist_entries(struct perf_evlist *evlist,
 	 */
 	evlist__for_each(evlist, evsel) {
 		for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
-			const union perf_event event = {
-				.header = {
-					.misc = PERF_RECORD_MISC_USER,
-				},
-			};
 			struct hist_entry_iter iter = {
 				.evsel = evsel,
 				.sample = &sample,
@@ -76,12 +71,12 @@ static int add_hist_entries(struct perf_evlist *evlist,
 			hists->dso_filter = NULL;
 			hists->symbol_filter_str = NULL;
 
+			sample.cpumode = PERF_RECORD_MISC_USER;
 			sample.pid = fake_samples[i].pid;
 			sample.tid = fake_samples[i].pid;
 			sample.ip = fake_samples[i].ip;
 
-			if (perf_event__preprocess_sample(&event, machine, &al,
-							  &sample) < 0)
+			if (machine__resolve(machine, &al, &sample) < 0)
 				goto out;
 
 			al.socket = fake_samples[i].socket;
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 64b257d8d557..358324e47805 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -76,17 +76,12 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
 		struct hists *hists = evsel__hists(evsel);
 
 		for (k = 0; k < ARRAY_SIZE(fake_common_samples); k++) {
-			const union perf_event event = {
-				.header = {
-					.misc = PERF_RECORD_MISC_USER,
-				},
-			};
-
+			sample.cpumode = PERF_RECORD_MISC_USER;
 			sample.pid = fake_common_samples[k].pid;
 			sample.tid = fake_common_samples[k].pid;
 			sample.ip = fake_common_samples[k].ip;
-			if (perf_event__preprocess_sample(&event, machine, &al,
-							  &sample) < 0)
+
+			if (machine__resolve(machine, &al, &sample) < 0)
 				goto out;
 
 			he = __hists__add_entry(hists, &al, NULL,
@@ -102,17 +97,10 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
 		}
 
 		for (k = 0; k < ARRAY_SIZE(fake_samples[i]); k++) {
-			const union perf_event event = {
-				.header = {
-					.misc = PERF_RECORD_MISC_USER,
-				},
-			};
-
 			sample.pid = fake_samples[i][k].pid;
 			sample.tid = fake_samples[i][k].pid;
 			sample.ip = fake_samples[i][k].ip;
-			if (perf_event__preprocess_sample(&event, machine, &al,
-							  &sample) < 0)
+			if (machine__resolve(machine, &al, &sample) < 0)
 				goto out;
 
 			he = __hists__add_entry(hists, &al, NULL,
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index 23cce67c7e48..d3556fbe8c5c 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c
@@ -51,11 +51,6 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
 	size_t i;
 
 	for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
-		const union perf_event event = {
-			.header = {
-				.misc = PERF_RECORD_MISC_USER,
-			},
-		};
 		struct hist_entry_iter iter = {
 			.evsel = evsel,
 			.sample = &sample,
@@ -63,13 +58,13 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
 			.hide_unresolved = false,
 		};
 
+		sample.cpumode = PERF_RECORD_MISC_USER;
 		sample.cpu = fake_samples[i].cpu;
 		sample.pid = fake_samples[i].pid;
 		sample.tid = fake_samples[i].pid;
 		sample.ip = fake_samples[i].ip;
 
-		if (perf_event__preprocess_sample(&event, machine, &al,
-						  &sample) < 0)
+		if (machine__resolve(machine, &al, &sample) < 0)
 			goto out;
 
 		if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index bd9bf7e343b1..2aa45b606fa4 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -55,7 +55,7 @@ static u64 he_get_acc_##_field(struct hist_entry *he)				\
 	return he->stat_acc->_field;						\
 }										\
 										\
-static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,	\
+static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt,		\
 				       struct perf_hpp *hpp,			\
 				       struct hist_entry *he)			\
 {										\
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index eea25e2424e9..da48fd843438 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -1,4 +1,3 @@
-libperf-y += abspath.o
 libperf-y += alias.o
 libperf-y += annotate.o
 libperf-y += build-id.o
diff --git a/tools/perf/util/abspath.c b/tools/perf/util/abspath.c
deleted file mode 100644
index 0e76affe9c36..000000000000
--- a/tools/perf/util/abspath.c
+++ /dev/null
@@ -1,37 +0,0 @@
-#include "cache.h"
-
-static const char *get_pwd_cwd(void)
-{
-	static char cwd[PATH_MAX + 1];
-	char *pwd;
-	struct stat cwd_stat, pwd_stat;
-	if (getcwd(cwd, PATH_MAX) == NULL)
-		return NULL;
-	pwd = getenv("PWD");
-	if (pwd && strcmp(pwd, cwd)) {
-		stat(cwd, &cwd_stat);
-		if (!stat(pwd, &pwd_stat) &&
-		    pwd_stat.st_dev == cwd_stat.st_dev &&
-		    pwd_stat.st_ino == cwd_stat.st_ino) {
-			strlcpy(cwd, pwd, PATH_MAX);
-		}
-	}
-	return cwd;
-}
-
-const char *make_nonrelative_path(const char *path)
-{
-	static char buf[PATH_MAX + 1];
-
-	if (is_absolute_path(path)) {
-		if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
-			die("Too long path: %.*s", 60, path);
-	} else {
-		const char *cwd = get_pwd_cwd();
-		if (!cwd)
-			die("Cannot determine the current working directory");
-		if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX)
-			die("Too long path: %.*s", 60, path);
-	}
-	return buf;
-}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index cea323d9ee7e..9241f8c2b7e1 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -158,7 +158,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize);
 
 int hist_entry__annotate(struct hist_entry *he, size_t privsize);
 
-int symbol__annotate_init(struct map *map __maybe_unused, struct symbol *sym);
+int symbol__annotate_init(struct map *map, struct symbol *sym);
 int symbol__annotate_printf(struct symbol *sym, struct map *map,
 			    struct perf_evsel *evsel, bool full_paths,
 			    int min_pcnt, int max_lines, int context);
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index e5a8e2d4f2af..57ff31ecb8e4 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -517,7 +517,7 @@ static inline void auxtrace__free(struct perf_session *session)
 
 static inline struct auxtrace_record *
 auxtrace_record__init(struct perf_evlist *evlist __maybe_unused,
-		      int *err __maybe_unused)
+		      int *err)
 {
 	*err = 0;
 	return NULL;
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index f1479eeef7da..0573c2ec861d 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -28,7 +28,6 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
 			   struct machine *machine)
 {
 	struct addr_location al;
-	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 	struct thread *thread = machine__findnew_thread(machine, sample->pid,
 							sample->tid);
 
@@ -38,7 +37,7 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
 		return -1;
 	}
 
-	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, &al);
+	thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al);
 
 	if (al.map != NULL)
 		al.map->dso->hit = 1;
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 3ca453f0c51f..1f5a93c2c9a2 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -26,14 +26,14 @@
 extern const char *config_exclusive_filename;
 
 typedef int (*config_fn_t)(const char *, const char *, void *);
-extern int perf_default_config(const char *, const char *, void *);
-extern int perf_config(config_fn_t fn, void *);
-extern int perf_config_int(const char *, const char *);
-extern u64 perf_config_u64(const char *, const char *);
-extern int perf_config_bool(const char *, const char *);
-extern int config_error_nonbool(const char *);
-extern const char *perf_config_dirname(const char *, const char *);
-extern const char *perf_etc_perfconfig(void);
+int perf_default_config(const char *, const char *, void *);
+int perf_config(config_fn_t fn, void *);
+int perf_config_int(const char *, const char *);
+u64 perf_config_u64(const char *, const char *);
+int perf_config_bool(const char *, const char *);
+int config_error_nonbool(const char *);
+const char *perf_config_dirname(const char *, const char *);
+const char *perf_etc_perfconfig(void);
 
 char *alias_lookup(const char *alias);
 int split_cmdline(char *cmdline, const char ***argv);
@@ -64,13 +64,9 @@ static inline int is_absolute_path(const char *path)
 	return path[0] == '/';
 }
 
-const char *make_nonrelative_path(const char *path);
 char *strip_path_suffix(const char *path, const char *suffix);
 
-extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
-extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
-
-extern char *perf_pathdup(const char *fmt, ...)
-	__attribute__((format (printf, 1, 2)));
+char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
+char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
 
 #endif /* __PERF_CACHE_H */
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 18dd22269764..d2a9e694810c 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -220,7 +220,7 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *
 			bool hide_unresolved);
 
 extern const char record_callchain_help[];
-extern int parse_callchain_record(const char *arg, struct callchain_param *param);
+int parse_callchain_record(const char *arg, struct callchain_param *param);
 int parse_callchain_record_opt(const char *arg, struct callchain_param *param);
 int parse_callchain_report_opt(const char *arg);
 int parse_callchain_top_opt(const char *arg);
@@ -236,7 +236,7 @@ static inline void callchain_cursor_snapshot(struct callchain_cursor *dest,
 }
 
 #ifdef HAVE_SKIP_CALLCHAIN_IDX
-extern int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain);
+int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain);
 #else
 static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused,
 			struct ip_callchain *chain __maybe_unused)
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
index b4b8cb42fe5e..31f8dcdbd7ef 100644
--- a/tools/perf/util/cgroup.h
+++ b/tools/perf/util/cgroup.h
@@ -13,7 +13,7 @@ struct cgroup_sel {
 
 
 extern int nr_cgroups; /* number of explicit cgroups defined */
-extern void close_cgroup(struct cgroup_sel *cgrp);
-extern int parse_cgroups(const struct option *opt, const char *str, int unset);
+void close_cgroup(struct cgroup_sel *cgrp);
+int parse_cgroups(const struct option *opt, const char *str, int unset);
 
 #endif /* __CGROUP_H__ */
diff --git a/tools/perf/util/cloexec.h b/tools/perf/util/cloexec.h
index 3bee6773ddb0..d0d465953d36 100644
--- a/tools/perf/util/cloexec.h
+++ b/tools/perf/util/cloexec.h
@@ -5,7 +5,7 @@ unsigned long perf_event_open_cloexec_flag(void);
 
 #ifdef __GLIBC_PREREQ
 #if !__GLIBC_PREREQ(2, 6) && !defined(__UCLIBC__)
-extern int sched_getcpu(void) __THROW;
+int sched_getcpu(void) __THROW;
 #endif
 #endif
 
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 811af89ce0bb..bbf69d248ec5 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -632,7 +632,7 @@ static bool is_flush_needed(struct ctf_stream *cs)
 }
 
 static int process_sample_event(struct perf_tool *tool,
-				union perf_event *_event __maybe_unused,
+				union perf_event *_event,
 				struct perf_sample *sample,
 				struct perf_evsel *evsel,
 				struct machine *machine __maybe_unused)
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index 1c9689e4cc17..049438d51b9a 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -333,7 +333,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
 	    sample_addr_correlates_sym(&evsel->attr)) {
 		struct addr_location addr_al;
 
-		perf_event__preprocess_sample_addr(event, sample, thread, &addr_al);
+		thread__resolve(thread, &addr_al, sample);
 		err = db_ids_from_al(dbe, &addr_al, &es.addr_dso_db_id,
 				     &es.addr_sym_db_id, &es.addr_offset);
 		if (err)
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 45ec4d0a50ed..0953280629cf 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -162,6 +162,7 @@ struct dso {
 	u8		 loaded;
 	u8		 rel;
 	u8		 build_id[BUILD_ID_SIZE];
+	u64		 text_offset;
 	const char	 *short_name;
 	const char	 *long_name;
 	u16		 long_name_len;
@@ -301,7 +302,7 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
  * TODO
 */
 int dso__data_get_fd(struct dso *dso, struct machine *machine);
-void dso__data_put_fd(struct dso *dso __maybe_unused);
+void dso__data_put_fd(struct dso *dso);
 void dso__data_close(struct dso *dso);
 
 off_t dso__data_size(struct dso *dso, struct machine *machine);
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index a509aa8433a1..577e600c8eb1 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -915,7 +915,7 @@ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf)
 		tmp = "*";
 	else if (tag == DW_TAG_subroutine_type) {
 		/* Function pointer */
-		strbuf_addf(buf, "(function_type)");
+		strbuf_add(buf, "(function_type)", 15);
 		return 0;
 	} else {
 		if (!dwarf_diename(&type))
@@ -932,7 +932,7 @@ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf)
 	}
 	ret = die_get_typename(&type, buf);
 	if (ret == 0)
-		strbuf_addf(buf, "%s", tmp);
+		strbuf_addstr(buf, tmp);
 
 	return ret;
 }
@@ -951,7 +951,7 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf)
 	ret = die_get_typename(vr_die, buf);
 	if (ret < 0) {
 		pr_debug("Failed to get type, make it unknown.\n");
-		strbuf_addf(buf, "(unknown_type)");
+		strbuf_add(buf, " (unknown_type)", 14);
 	}
 
 	strbuf_addf(buf, "\t%s", dwarf_diename(vr_die));
@@ -1013,7 +1013,7 @@ static int die_get_var_innermost_scope(Dwarf_Die *sp_die, Dwarf_Die *vr_die,
 	}
 
 	if (!first)
-		strbuf_addf(buf, "]>");
+		strbuf_add(buf, "]>", 2);
 
 out:
 	free(scopes);
@@ -1076,7 +1076,7 @@ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf)
 	}
 
 	if (!first)
-		strbuf_addf(buf, "]>");
+		strbuf_add(buf, "]>", 2);
 
 	return ret;
 }
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index c42ec366f2a7..dc0ce1adb075 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -25,48 +25,48 @@
 #include <elfutils/version.h>
 
 /* Find the realpath of the target file */
-extern const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname);
+const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname);
 
 /* Get DW_AT_comp_dir (should be NULL with older gcc) */
-extern const char *cu_get_comp_dir(Dwarf_Die *cu_die);
+const char *cu_get_comp_dir(Dwarf_Die *cu_die);
 
 /* Get a line number and file name for given address */
-extern int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr,
-			    const char **fname, int *lineno);
+int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr,
+		     const char **fname, int *lineno);
 
 /* Walk on funcitons at given address */
-extern int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
-			int (*callback)(Dwarf_Die *, void *), void *data);
+int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
+			 int (*callback)(Dwarf_Die *, void *), void *data);
 
 /* Ensure that this DIE is a subprogram and definition (not declaration) */
-extern bool die_is_func_def(Dwarf_Die *dw_die);
+bool die_is_func_def(Dwarf_Die *dw_die);
 
 /* Ensure that this DIE is an instance of a subprogram */
-extern bool die_is_func_instance(Dwarf_Die *dw_die);
+bool die_is_func_instance(Dwarf_Die *dw_die);
 
 /* Compare diename and tname */
-extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname);
+bool die_compare_name(Dwarf_Die *dw_die, const char *tname);
 
 /* Matching diename with glob pattern */
-extern bool die_match_name(Dwarf_Die *dw_die, const char *glob);
+bool die_match_name(Dwarf_Die *dw_die, const char *glob);
 
 /* Get callsite line number of inline-function instance */
-extern int die_get_call_lineno(Dwarf_Die *in_die);
+int die_get_call_lineno(Dwarf_Die *in_die);
 
 /* Get callsite file name of inlined function instance */
-extern const char *die_get_call_file(Dwarf_Die *in_die);
+const char *die_get_call_file(Dwarf_Die *in_die);
 
 /* Get type die */
-extern Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
+Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
 
 /* Get a type die, but skip qualifiers and typedef */
-extern Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
+Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
 
 /* Check whether the DIE is signed or not */
-extern bool die_is_signed_type(Dwarf_Die *tp_die);
+bool die_is_signed_type(Dwarf_Die *tp_die);
 
 /* Get data_member_location offset */
-extern int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs);
+int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs);
 
 /* Return values for die_find_child() callbacks */
 enum {
@@ -77,29 +77,29 @@ enum {
 };
 
 /* Search child DIEs */
-extern Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
-				 int (*callback)(Dwarf_Die *, void *),
-				 void *data, Dwarf_Die *die_mem);
+Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
+			 int (*callback)(Dwarf_Die *, void *),
+			 void *data, Dwarf_Die *die_mem);
 
 /* Search a non-inlined function including given address */
-extern Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
-				    Dwarf_Die *die_mem);
+Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+			     Dwarf_Die *die_mem);
 
 /* Search a non-inlined function with tail call at given address */
 Dwarf_Die *die_find_tailfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
 				    Dwarf_Die *die_mem);
 
 /* Search the top inlined function including given address */
-extern Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
-					  Dwarf_Die *die_mem);
+Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+				   Dwarf_Die *die_mem);
 
 /* Search the deepest inlined function including given address */
-extern Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
-				      Dwarf_Die *die_mem);
+Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+			       Dwarf_Die *die_mem);
 
 /* Walk on the instances of given DIE */
-extern int die_walk_instances(Dwarf_Die *in_die,
-			      int (*callback)(Dwarf_Die *, void *), void *data);
+int die_walk_instances(Dwarf_Die *in_die,
+		       int (*callback)(Dwarf_Die *, void *), void *data);
 
 /* Walker on lines (Note: line number will not be sorted) */
 typedef int (* line_walk_callback_t) (const char *fname, int lineno,
@@ -109,22 +109,20 @@ typedef int (* line_walk_callback_t) (const char *fname, int lineno,
  * Walk on lines inside given DIE. If the DIE is a subprogram, walk only on
  * the lines inside the subprogram, otherwise the DIE must be a CU DIE.
  */
-extern int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback,
-			  void *data);
+int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data);
 
 /* Find a variable called 'name' at given address */
-extern Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
-				       Dwarf_Addr addr, Dwarf_Die *die_mem);
+Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
+				Dwarf_Addr addr, Dwarf_Die *die_mem);
 
 /* Find a member called 'name' */
-extern Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
-				  Dwarf_Die *die_mem);
+Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
+			   Dwarf_Die *die_mem);
 
 /* Get the name of given variable DIE */
-extern int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf);
+int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf);
 
 /* Get the name and type of given variable DIE, stored as "type\tname" */
-extern int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf);
-extern int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die,
-			struct strbuf *buf);
+int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf);
+int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf);
 #endif
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 7bad5c3fa7b7..52cf479bc593 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1295,12 +1295,9 @@ void thread__find_addr_location(struct thread *thread,
  * Callers need to drop the reference to al->thread, obtained in
  * machine__findnew_thread()
  */
-int perf_event__preprocess_sample(const union perf_event *event,
-				  struct machine *machine,
-				  struct addr_location *al,
-				  struct perf_sample *sample)
+int machine__resolve(struct machine *machine, struct addr_location *al,
+		     struct perf_sample *sample)
 {
-	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 	struct thread *thread = machine__findnew_thread(machine, sample->pid,
 							sample->tid);
 
@@ -1315,11 +1312,11 @@ int perf_event__preprocess_sample(const union perf_event *event,
 	 * events, but for older perf.data files there was no such thing, so do
 	 * it now.
 	 */
-	if (cpumode == PERF_RECORD_MISC_KERNEL &&
+	if (sample->cpumode == PERF_RECORD_MISC_KERNEL &&
 	    machine__kernel_map(machine) == NULL)
 		machine__create_kernel_maps(machine);
 
-	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, al);
+	thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, al);
 	dump_printf(" ...... dso: %s\n",
 		    al->map ? al->map->dso->long_name :
 			al->level == 'H' ? "[hypervisor]" : "<not found>");
@@ -1395,16 +1392,12 @@ bool sample_addr_correlates_sym(struct perf_event_attr *attr)
 	return false;
 }
 
-void perf_event__preprocess_sample_addr(union perf_event *event,
-					struct perf_sample *sample,
-					struct thread *thread,
-					struct addr_location *al)
+void thread__resolve(struct thread *thread, struct addr_location *al,
+		     struct perf_sample *sample)
 {
-	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-
-	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->addr, al);
+	thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->addr, al);
 	if (!al->map)
-		thread__find_addr_map(thread, cpumode, MAP__VARIABLE,
+		thread__find_addr_map(thread, sample->cpumode, MAP__VARIABLE,
 				      sample->addr, al);
 
 	al->cpu = sample->cpu;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index b7ffb7ee9971..6bb1c928350d 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -192,6 +192,7 @@ struct perf_sample {
 	u64 data_src;
 	u32 flags;
 	u16 insn_len;
+	u8  cpumode;
 	void *raw_data;
 	struct ip_callchain *callchain;
 	struct branch_stack *branch_stack;
@@ -597,10 +598,8 @@ int perf_event__process(struct perf_tool *tool,
 
 struct addr_location;
 
-int perf_event__preprocess_sample(const union perf_event *event,
-				  struct machine *machine,
-				  struct addr_location *al,
-				  struct perf_sample *sample);
+int machine__resolve(struct machine *machine, struct addr_location *al,
+		     struct perf_sample *sample);
 
 void addr_location__put(struct addr_location *al);
 
@@ -608,10 +607,8 @@ struct thread;
 
 bool is_bts_event(struct perf_event_attr *attr);
 bool sample_addr_correlates_sym(struct perf_event_attr *attr);
-void perf_event__preprocess_sample_addr(union perf_event *event,
-					struct perf_sample *sample,
-					struct thread *thread,
-					struct addr_location *al);
+void thread__resolve(struct thread *thread, struct addr_location *al,
+		     struct perf_sample *sample);
 
 const char *perf_event__name(unsigned int id);
 
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 0902fe418754..738ce226002b 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1643,6 +1643,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 	data->stream_id = data->id = data->time = -1ULL;
 	data->period = evsel->attr.sample_period;
 	data->weight = 0;
+	data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
 	if (event->header.type != PERF_RECORD_SAMPLE) {
 		if (!evsel->attr.sample_id_all)
diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
index 45bf9c6d3257..cd67e64a0494 100644
--- a/tools/perf/util/genelf.h
+++ b/tools/perf/util/genelf.h
@@ -2,12 +2,10 @@
 #define __GENELF_H__
 
 /* genelf.c */
-extern int jit_write_elf(int fd, uint64_t code_addr, const char *sym,
-			 const void *code, int csize,
-			 void *debug, int nr_debug_entries);
+int jit_write_elf(int fd, uint64_t code_addr, const char *sym,
+		  const void *code, int csize, void *debug, int nr_debug_entries);
 /* genelf_debug.c */
-extern int jit_add_debug_info(Elf *e, uint64_t code_addr,
-			      void *debug, int nr_debug_entries);
+int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries);
 
 #if   defined(__arm__)
 #define GEN_ELF_ARCH	EM_ARM
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 73e38e472ecd..90680ec9f8b8 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1872,11 +1872,6 @@ static int process_cpu_topology(struct perf_file_section *section,
 		if (ph->needs_swap)
 			nr = bswap_32(nr);
 
-		if (nr > (u32)cpu_nr) {
-			pr_debug("core_id number is too big."
-				 "You may need to upgrade the perf tool.\n");
-			goto free_cpu;
-		}
 		ph->env.cpu[i].core_id = nr;
 
 		ret = readn(fd, &nr, sizeof(nr));
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 3d87ca823c0a..d306ca118449 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -121,7 +121,7 @@ int perf_event__synthesize_event_update_cpus(struct perf_tool *tool,
 					     perf_event__handler_t process);
 int perf_event__process_attr(struct perf_tool *tool, union perf_event *event,
 			     struct perf_evlist **pevlist);
-int perf_event__process_event_update(struct perf_tool *tool __maybe_unused,
+int perf_event__process_event_update(struct perf_tool *tool,
 				     union perf_event *event,
 				     struct perf_evlist **pevlist);
 size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp);
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 290b3cbf6877..31c4641fe5ff 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -670,7 +670,7 @@ iter_prepare_branch_entry(struct hist_entry_iter *iter, struct addr_location *al
 }
 
 static int
-iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused,
+iter_add_single_branch_entry(struct hist_entry_iter *iter,
 			     struct addr_location *al __maybe_unused)
 {
 	/* to avoid calling callback function */
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index ead18c82294f..bec0cd660fbd 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -433,8 +433,7 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
 			  struct perf_sample *sample, bool nonany_branch_mode);
 
 struct option;
-int parse_filter_percentage(const struct option *opt __maybe_unused,
-			    const char *arg, int unset __maybe_unused);
+int parse_filter_percentage(const struct option *opt, const char *arg, int unset);
 int perf_hist_config(const char *var, const char *value);
 
 void perf_hpp_list__init(struct perf_hpp_list *list);
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index eb0e7f8bf515..6bc3ecd2e7ca 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -678,7 +678,7 @@ static int intel_bts_process_auxtrace_event(struct perf_session *session,
 	return 0;
 }
 
-static int intel_bts_flush(struct perf_session *session __maybe_unused,
+static int intel_bts_flush(struct perf_session *session,
 			   struct perf_tool *tool __maybe_unused)
 {
 	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h
index a1e99da0715a..3f42ee4d2a0b 100644
--- a/tools/perf/util/jit.h
+++ b/tools/perf/util/jit.h
@@ -3,13 +3,9 @@
 
 #include <data.h>
 
-extern int jit_process(struct perf_session *session,
-		       struct perf_data_file *output,
-		       struct machine *machine,
-		       char *filename,
-		       pid_t pid,
-		       u64 *nbytes);
-
-extern int jit_inject_record(const char *filename);
+int jit_process(struct perf_session *session, struct perf_data_file *output,
+		struct machine *machine, char *filename, pid_t pid, u64 *nbytes);
+
+int jit_inject_record(const char *filename);
 
 #endif /* __JIT_H__ */
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index 00724d496d38..33071d6159bc 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -3,11 +3,11 @@
  * Copyright (C) 2015, Huawei Inc.
  */
 
+#include <limits.h>
 #include <stdio.h>
-#include "util.h"
+#include <stdlib.h>
 #include "debug.h"
 #include "llvm-utils.h"
-#include "cache.h"
 
 #define CLANG_BPF_CMD_DEFAULT_TEMPLATE				\
 		"$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\
@@ -98,11 +98,12 @@ read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz)
 	void *buf = NULL;
 	FILE *file = NULL;
 	size_t read_sz = 0, buf_sz = 0;
+	char serr[STRERR_BUFSIZE];
 
 	file = popen(cmd, "r");
 	if (!file) {
 		pr_err("ERROR: unable to popen cmd: %s\n",
-		       strerror(errno));
+		       strerror_r(errno, serr, sizeof(serr)));
 		return -EINVAL;
 	}
 
@@ -136,7 +137,7 @@ read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz)
 
 	if (ferror(file)) {
 		pr_err("ERROR: error occurred when reading from pipe: %s\n",
-		       strerror(errno));
+		       strerror_r(errno, serr, sizeof(serr)));
 		err = -EIO;
 		goto errout;
 	}
@@ -334,10 +335,18 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
 	unsigned int kernel_version;
 	char linux_version_code_str[64];
 	const char *clang_opt = llvm_param.clang_opt;
-	char clang_path[PATH_MAX], nr_cpus_avail_str[64];
+	char clang_path[PATH_MAX], abspath[PATH_MAX], nr_cpus_avail_str[64];
+	char serr[STRERR_BUFSIZE];
 	char *kbuild_dir = NULL, *kbuild_include_opts = NULL;
 	const char *template = llvm_param.clang_bpf_cmd_template;
 
+	if (path[0] != '-' && realpath(path, abspath) == NULL) {
+		err = errno;
+		pr_err("ERROR: problems with path %s: %s\n",
+		       path, strerror_r(err, serr, sizeof(serr)));
+		return -err;
+	}
+
 	if (!template)
 		template = CLANG_BPF_CMD_DEFAULT_TEMPLATE;
 
@@ -362,7 +371,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
 	if (nr_cpus_avail <= 0) {
 		pr_err(
 "WARNING:\tunable to get available CPUs in this system: %s\n"
-"        \tUse 128 instead.\n", strerror(errno));
+"        \tUse 128 instead.\n", strerror_r(errno, serr, sizeof(serr)));
 		nr_cpus_avail = 128;
 	}
 	snprintf(nr_cpus_avail_str, sizeof(nr_cpus_avail_str), "%d",
@@ -387,8 +396,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
 	 * stdin to be source file (testing).
 	 */
 	force_set_env("CLANG_SOURCE",
-		      (path[0] == '-') ? path :
-		      make_nonrelative_path(path));
+		      (path[0] == '-') ? path : abspath);
 
 	pr_debug("llvm compiling command template: %s\n", template);
 	err = read_from_pipe(template, &obj_buf, &obj_buf_sz);
diff --git a/tools/perf/util/llvm-utils.h b/tools/perf/util/llvm-utils.h
index 5b3cf1c229e2..23b9a743fe72 100644
--- a/tools/perf/util/llvm-utils.h
+++ b/tools/perf/util/llvm-utils.h
@@ -39,11 +39,10 @@ struct llvm_param {
 };
 
 extern struct llvm_param llvm_param;
-extern int perf_llvm_config(const char *var, const char *value);
+int perf_llvm_config(const char *var, const char *value);
 
-extern int llvm__compile_bpf(const char *path, void **p_obj_buf,
-			     size_t *p_obj_buf_sz);
+int llvm__compile_bpf(const char *path, void **p_obj_buf, size_t *p_obj_buf_sz);
 
 /* This function is for test__llvm() use only */
-extern int llvm__search_clang(void);
+int llvm__search_clang(void);
 #endif
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index ad79297c76c8..80b9b6a87990 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1301,9 +1301,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 
 int machine__process_mmap2_event(struct machine *machine,
 				 union perf_event *event,
-				 struct perf_sample *sample __maybe_unused)
+				 struct perf_sample *sample)
 {
-	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 	struct thread *thread;
 	struct map *map;
 	enum map_type type;
@@ -1312,8 +1311,8 @@ int machine__process_mmap2_event(struct machine *machine,
 	if (dump_trace)
 		perf_event__fprintf_mmap2(event, stdout);
 
-	if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
-	    cpumode == PERF_RECORD_MISC_KERNEL) {
+	if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
+	    sample->cpumode == PERF_RECORD_MISC_KERNEL) {
 		ret = machine__process_kernel_mmap_event(machine, event);
 		if (ret < 0)
 			goto out_problem;
@@ -1355,9 +1354,8 @@ int machine__process_mmap2_event(struct machine *machine,
 }
 
 int machine__process_mmap_event(struct machine *machine, union perf_event *event,
-				struct perf_sample *sample __maybe_unused)
+				struct perf_sample *sample)
 {
-	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 	struct thread *thread;
 	struct map *map;
 	enum map_type type;
@@ -1366,8 +1364,8 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
 	if (dump_trace)
 		perf_event__fprintf_mmap(event, stdout);
 
-	if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
-	    cpumode == PERF_RECORD_MISC_KERNEL) {
+	if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
+	    sample->cpumode == PERF_RECORD_MISC_KERNEL) {
 		ret = machine__process_kernel_mmap_event(machine, event);
 		if (ret < 0)
 			goto out_problem;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 1a3e45baf97f..8499db281158 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -94,7 +94,7 @@ int machine__process_aux_event(struct machine *machine,
 			       union perf_event *event);
 int machine__process_itrace_start_event(struct machine *machine,
 					union perf_event *event);
-int machine__process_switch_event(struct machine *machine __maybe_unused,
+int machine__process_switch_event(struct machine *machine,
 				  union perf_event *event);
 int machine__process_mmap_event(struct machine *machine, union perf_event *event,
 				struct perf_sample *sample);
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 67e493088e81..d740c3ca9a1d 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -22,19 +22,18 @@ struct tracepoint_path {
 	struct tracepoint_path *next;
 };
 
-extern struct tracepoint_path *tracepoint_id_to_path(u64 config);
-extern struct tracepoint_path *tracepoint_name_to_path(const char *name);
-extern bool have_tracepoints(struct list_head *evlist);
+struct tracepoint_path *tracepoint_id_to_path(u64 config);
+struct tracepoint_path *tracepoint_name_to_path(const char *name);
+bool have_tracepoints(struct list_head *evlist);
 
 const char *event_type(int type);
 
-extern int parse_events_option(const struct option *opt, const char *str,
-			       int unset);
-extern int parse_events(struct perf_evlist *evlist, const char *str,
-			struct parse_events_error *error);
-extern int parse_events_terms(struct list_head *terms, const char *str);
-extern int parse_filter(const struct option *opt, const char *str, int unset);
-extern int exclude_perf(const struct option *opt, const char *arg, int unset);
+int parse_events_option(const struct option *opt, const char *str, int unset);
+int parse_events(struct perf_evlist *evlist, const char *str,
+		 struct parse_events_error *error);
+int parse_events_terms(struct list_head *terms, const char *str);
+int parse_filter(const struct option *opt, const char *str, int unset);
+int exclude_perf(const struct option *opt, const char *arg, int unset);
 
 #define EVENTS_HELP_MAX (128*1024)
 
@@ -183,7 +182,7 @@ void print_symbol_events(const char *event_glob, unsigned type,
 void print_tracepoint_events(const char *subsys_glob, const char *event_glob,
 			     bool name_only);
 int print_hwcache_events(const char *event_glob, bool name_only);
-extern int is_valid_tracepoint(const char *event_string);
+int is_valid_tracepoint(const char *event_string);
 
 int valid_event_mount(const char *eventfs);
 char *parse_events_formats_error_string(char *additional_terms);
diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c
index 3654d964e49d..3bf6bf82ff2d 100644
--- a/tools/perf/util/path.c
+++ b/tools/perf/util/path.c
@@ -41,36 +41,6 @@ static char *cleanup_path(char *path)
 	return path;
 }
 
-static char *perf_vsnpath(char *buf, size_t n, const char *fmt, va_list args)
-{
-	const char *perf_dir = get_perf_dir();
-	size_t len;
-
-	len = strlen(perf_dir);
-	if (n < len + 1)
-		goto bad;
-	memcpy(buf, perf_dir, len);
-	if (len && !is_dir_sep(perf_dir[len-1]))
-		buf[len++] = '/';
-	len += vsnprintf(buf + len, n - len, fmt, args);
-	if (len >= n)
-		goto bad;
-	return cleanup_path(buf);
-bad:
-	strlcpy(buf, bad_path, n);
-	return buf;
-}
-
-char *perf_pathdup(const char *fmt, ...)
-{
-	char path[PATH_MAX];
-	va_list args;
-	va_start(args, fmt);
-	(void)perf_vsnpath(path, sizeof(path), fmt, args);
-	va_end(args);
-	return xstrdup(path);
-}
-
 char *mkpath(const char *fmt, ...)
 {
 	va_list args;
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 93996ec4bbe3..8319fbb08636 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2179,7 +2179,7 @@ static int perf_probe_event__sprintf(const char *group, const char *event,
 		strbuf_addf(result, " in %s", module);
 
 	if (pev->nargs > 0) {
-		strbuf_addstr(result, " with");
+		strbuf_add(result, " with", 5);
 		for (i = 0; i < pev->nargs; i++) {
 			ret = synthesize_perf_probe_arg(&pev->args[i],
 							buf, 128);
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index ba926c30f8cd..e54e7b011577 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -114,49 +114,44 @@ int init_probe_symbol_maps(bool user_only);
 void exit_probe_symbol_maps(void);
 
 /* Command string to events */
-extern int parse_perf_probe_command(const char *cmd,
-				    struct perf_probe_event *pev);
-extern int parse_probe_trace_command(const char *cmd,
-				     struct probe_trace_event *tev);
+int parse_perf_probe_command(const char *cmd, struct perf_probe_event *pev);
+int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev);
 
 /* Events to command string */
-extern char *synthesize_perf_probe_command(struct perf_probe_event *pev);
-extern char *synthesize_probe_trace_command(struct probe_trace_event *tev);
-extern int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf,
-				     size_t len);
+char *synthesize_perf_probe_command(struct perf_probe_event *pev);
+char *synthesize_probe_trace_command(struct probe_trace_event *tev);
+int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, size_t len);
 
 /* Check the perf_probe_event needs debuginfo */
-extern bool perf_probe_event_need_dwarf(struct perf_probe_event *pev);
+bool perf_probe_event_need_dwarf(struct perf_probe_event *pev);
 
 /* Release event contents */
-extern void clear_perf_probe_event(struct perf_probe_event *pev);
-extern void clear_probe_trace_event(struct probe_trace_event *tev);
+void clear_perf_probe_event(struct perf_probe_event *pev);
+void clear_probe_trace_event(struct probe_trace_event *tev);
 
 /* Command string to line-range */
-extern int parse_line_range_desc(const char *cmd, struct line_range *lr);
+int parse_line_range_desc(const char *cmd, struct line_range *lr);
 
 /* Release line range members */
-extern void line_range__clear(struct line_range *lr);
+void line_range__clear(struct line_range *lr);
 
 /* Initialize line range */
-extern int line_range__init(struct line_range *lr);
-
-extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs);
-extern int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs);
-extern int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs);
-extern void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs);
-extern int del_perf_probe_events(struct strfilter *filter);
-
-extern int show_perf_probe_event(const char *group, const char *event,
-				 struct perf_probe_event *pev,
-				 const char *module, bool use_stdout);
-extern int show_perf_probe_events(struct strfilter *filter);
-extern int show_line_range(struct line_range *lr, const char *module,
-			   bool user);
-extern int show_available_vars(struct perf_probe_event *pevs, int npevs,
-			       struct strfilter *filter);
-extern int show_available_funcs(const char *module, struct strfilter *filter,
-				bool user);
+int line_range__init(struct line_range *lr);
+
+int add_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int del_perf_probe_events(struct strfilter *filter);
+
+int show_perf_probe_event(const char *group, const char *event,
+			  struct perf_probe_event *pev,
+			  const char *module, bool use_stdout);
+int show_perf_probe_events(struct strfilter *filter);
+int show_line_range(struct line_range *lr, const char *module, bool user);
+int show_available_vars(struct perf_probe_event *pevs, int npevs,
+			struct strfilter *filter);
+int show_available_funcs(const char *module, struct strfilter *filter, bool user);
 bool arch__prefers_symtab(void);
 void arch__fix_tev_from_maps(struct perf_probe_event *pev,
 			     struct probe_trace_event *tev, struct map *map);
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 4ce5c5e18f48..b3bd0fba0237 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -1314,18 +1314,18 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
 			if (probe_conf.show_location_range) {
 				if (!externs) {
 					if (ret)
-						strbuf_addf(&buf, "[INV]\t");
+						strbuf_add(&buf, "[INV]\t", 6);
 					else
-						strbuf_addf(&buf, "[VAL]\t");
+						strbuf_add(&buf, "[VAL]\t", 6);
 				} else
-					strbuf_addf(&buf, "[EXT]\t");
+					strbuf_add(&buf, "[EXT]\t", 6);
 			}
 
 			ret2 = die_get_varname(die_mem, &buf);
 
 			if (!ret2 && probe_conf.show_location_range &&
 				!externs) {
-				strbuf_addf(&buf, "\t");
+				strbuf_addch(&buf, '\t');
 				ret2 = die_get_var_range(&af->pf.sp_die,
 							die_mem, &buf);
 			}
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index 0aec7704e395..51137fccb9c8 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -34,27 +34,25 @@ struct debuginfo {
 };
 
 /* This also tries to open distro debuginfo */
-extern struct debuginfo *debuginfo__new(const char *path);
-extern void debuginfo__delete(struct debuginfo *dbg);
+struct debuginfo *debuginfo__new(const char *path);
+void debuginfo__delete(struct debuginfo *dbg);
 
 /* Find probe_trace_events specified by perf_probe_event from debuginfo */
-extern int debuginfo__find_trace_events(struct debuginfo *dbg,
-					struct perf_probe_event *pev,
-					struct probe_trace_event **tevs);
+int debuginfo__find_trace_events(struct debuginfo *dbg,
+				 struct perf_probe_event *pev,
+				 struct probe_trace_event **tevs);
 
 /* Find a perf_probe_point from debuginfo */
-extern int debuginfo__find_probe_point(struct debuginfo *dbg,
-				       unsigned long addr,
-				       struct perf_probe_point *ppt);
+int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr,
+				struct perf_probe_point *ppt);
 
 /* Find a line range */
-extern int debuginfo__find_line_range(struct debuginfo *dbg,
-				      struct line_range *lr);
+int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr);
 
 /* Find available variables */
-extern int debuginfo__find_available_vars_at(struct debuginfo *dbg,
-					     struct perf_probe_event *pev,
-					     struct variable_list **vls);
+int debuginfo__find_available_vars_at(struct debuginfo *dbg,
+				      struct perf_probe_event *pev,
+				      struct variable_list **vls);
 
 /* Find a src file from a DWARF tag path */
 int get_real_path(const char *raw_path, const char *comp_dir,
diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h
index 172889ea234f..3340c9c4a6ca 100644
--- a/tools/perf/util/quote.h
+++ b/tools/perf/util/quote.h
@@ -24,6 +24,6 @@
  * sq_quote() in a real application.
  */
 
-extern void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen);
+void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen);
 
 #endif /* __PERF_QUOTE_H */
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 60b3593d210d..4abd85c6346d 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1107,12 +1107,11 @@ static struct machine *machines__find_for_cpumode(struct machines *machines,
 					       union perf_event *event,
 					       struct perf_sample *sample)
 {
-	const u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 	struct machine *machine;
 
 	if (perf_guest &&
-	    ((cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ||
-	     (cpumode == PERF_RECORD_MISC_GUEST_USER))) {
+	    ((sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ||
+	     (sample->cpumode == PERF_RECORD_MISC_GUEST_USER))) {
 		u32 pid;
 
 		if (event->header.type == PERF_RECORD_MMAP
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 93fa136b0025..47966a1618c7 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2225,7 +2225,7 @@ int hpp_dimension__add_output(unsigned col)
 }
 
 static int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
-			       struct perf_evlist *evlist __maybe_unused,
+			       struct perf_evlist *evlist,
 			       int level)
 {
 	unsigned int i;
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index b33ffb2af2cf..fdb71961143e 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -152,8 +152,7 @@ static const char *get_ratio_color(enum grc_type type, double ratio)
 }
 
 static void print_stalled_cycles_frontend(int cpu,
-					  struct perf_evsel *evsel
-					  __maybe_unused, double avg,
+					  struct perf_evsel *evsel, double avg,
 					  struct perf_stat_output_ctx *out)
 {
 	double total, ratio = 0.0;
@@ -175,8 +174,7 @@ static void print_stalled_cycles_frontend(int cpu,
 }
 
 static void print_stalled_cycles_backend(int cpu,
-					 struct perf_evsel *evsel
-					 __maybe_unused, double avg,
+					 struct perf_evsel *evsel, double avg,
 					 struct perf_stat_output_ctx *out)
 {
 	double total, ratio = 0.0;
@@ -194,7 +192,7 @@ static void print_stalled_cycles_backend(int cpu,
 }
 
 static void print_branch_misses(int cpu,
-				struct perf_evsel *evsel __maybe_unused,
+				struct perf_evsel *evsel,
 				double avg,
 				struct perf_stat_output_ctx *out)
 {
@@ -213,7 +211,7 @@ static void print_branch_misses(int cpu,
 }
 
 static void print_l1_dcache_misses(int cpu,
-				   struct perf_evsel *evsel __maybe_unused,
+				   struct perf_evsel *evsel,
 				   double avg,
 				   struct perf_stat_output_ctx *out)
 {
@@ -232,7 +230,7 @@ static void print_l1_dcache_misses(int cpu,
 }
 
 static void print_l1_icache_misses(int cpu,
-				   struct perf_evsel *evsel __maybe_unused,
+				   struct perf_evsel *evsel,
 				   double avg,
 				   struct perf_stat_output_ctx *out)
 {
@@ -250,7 +248,7 @@ static void print_l1_icache_misses(int cpu,
 }
 
 static void print_dtlb_cache_misses(int cpu,
-				    struct perf_evsel *evsel __maybe_unused,
+				    struct perf_evsel *evsel,
 				    double avg,
 				    struct perf_stat_output_ctx *out)
 {
@@ -268,7 +266,7 @@ static void print_dtlb_cache_misses(int cpu,
 }
 
 static void print_itlb_cache_misses(int cpu,
-				    struct perf_evsel *evsel __maybe_unused,
+				    struct perf_evsel *evsel,
 				    double avg,
 				    struct perf_stat_output_ctx *out)
 {
@@ -286,7 +284,7 @@ static void print_itlb_cache_misses(int cpu,
 }
 
 static void print_ll_cache_misses(int cpu,
-				  struct perf_evsel *evsel __maybe_unused,
+				  struct perf_evsel *evsel,
 				  double avg,
 				  struct perf_stat_output_ctx *out)
 {
diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c
index d3d279275432..8fb73295ec34 100644
--- a/tools/perf/util/strbuf.c
+++ b/tools/perf/util/strbuf.c
@@ -51,6 +51,13 @@ void strbuf_grow(struct strbuf *sb, size_t extra)
 	ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
 }
 
+void strbuf_addch(struct strbuf *sb, int c)
+{
+	strbuf_grow(sb, 1);
+	sb->buf[sb->len++] = c;
+	sb->buf[sb->len] = '\0';
+}
+
 void strbuf_add(struct strbuf *sb, const void *data, size_t len)
 {
 	strbuf_grow(sb, len);
@@ -58,7 +65,7 @@ void strbuf_add(struct strbuf *sb, const void *data, size_t len)
 	strbuf_setlen(sb, sb->len + len);
 }
 
-void strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap)
+static void strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap)
 {
 	int len;
 	va_list ap_saved;
diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h
index 7a32c838884d..ab9be0fbbd40 100644
--- a/tools/perf/util/strbuf.h
+++ b/tools/perf/util/strbuf.h
@@ -51,16 +51,16 @@ struct strbuf {
 #define STRBUF_INIT  { 0, 0, strbuf_slopbuf }
 
 /*----- strbuf life cycle -----*/
-extern void strbuf_init(struct strbuf *buf, ssize_t hint);
-extern void strbuf_release(struct strbuf *);
-extern char *strbuf_detach(struct strbuf *, size_t *);
+void strbuf_init(struct strbuf *buf, ssize_t hint);
+void strbuf_release(struct strbuf *buf);
+char *strbuf_detach(struct strbuf *buf, size_t *);
 
 /*----- strbuf size related -----*/
 static inline ssize_t strbuf_avail(const struct strbuf *sb) {
 	return sb->alloc ? sb->alloc - sb->len - 1 : 0;
 }
 
-extern void strbuf_grow(struct strbuf *, size_t);
+void strbuf_grow(struct strbuf *buf, size_t);
 
 static inline void strbuf_setlen(struct strbuf *sb, size_t len) {
 	if (!sb->alloc)
@@ -71,22 +71,17 @@ static inline void strbuf_setlen(struct strbuf *sb, size_t len) {
 }
 
 /*----- add data in your buffer -----*/
-static inline void strbuf_addch(struct strbuf *sb, int c) {
-	strbuf_grow(sb, 1);
-	sb->buf[sb->len++] = c;
-	sb->buf[sb->len] = '\0';
-}
+void strbuf_addch(struct strbuf *sb, int c);
 
-extern void strbuf_add(struct strbuf *, const void *, size_t);
+void strbuf_add(struct strbuf *buf, const void *, size_t);
 static inline void strbuf_addstr(struct strbuf *sb, const char *s) {
 	strbuf_add(sb, s, strlen(s));
 }
 
 __attribute__((format(printf,2,3)))
-extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...);
-extern void strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap);
+void strbuf_addf(struct strbuf *sb, const char *fmt, ...);
 
 /* XXX: if read fails, any partial read is undone */
-extern ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint);
+ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint);
 
 #endif /* __PERF_STRBUF_H */
diff --git a/tools/perf/util/svghelper.h b/tools/perf/util/svghelper.h
index 9292a5291445..946fdf2db97c 100644
--- a/tools/perf/util/svghelper.h
+++ b/tools/perf/util/svghelper.h
@@ -3,32 +3,31 @@
 
 #include <linux/types.h>
 
-extern void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end);
-extern void svg_ubox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
-extern void svg_lbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
-extern void svg_fbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
-extern void svg_box(int Yslot, u64 start, u64 end, const char *type);
-extern void svg_blocked(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
-extern void svg_running(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
-extern void svg_waiting(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
-extern void svg_cpu_box(int cpu, u64 max_frequency, u64 turbo_frequency);
-
-
-extern void svg_process(int cpu, u64 start, u64 end, int pid, const char *name, const char *backtrace);
-extern void svg_cstate(int cpu, u64 start, u64 end, int type);
-extern void svg_pstate(int cpu, u64 start, u64 end, u64 freq);
-
-
-extern void svg_time_grid(double min_thickness);
-extern void svg_io_legenda(void);
-extern void svg_legenda(void);
-extern void svg_wakeline(u64 start, int row1, int row2, const char *backtrace);
-extern void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2, const char *backtrace);
-extern void svg_interrupt(u64 start, int row, const char *backtrace);
-extern void svg_text(int Yslot, u64 start, const char *text);
-extern void svg_close(void);
-extern int svg_build_topology_map(char *sib_core, int sib_core_nr,
-				  char *sib_thr, int sib_thr_nr);
+void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end);
+void svg_ubox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
+void svg_lbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
+void svg_fbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
+void svg_box(int Yslot, u64 start, u64 end, const char *type);
+void svg_blocked(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
+void svg_running(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
+void svg_waiting(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
+void svg_cpu_box(int cpu, u64 max_frequency, u64 turbo_frequency);
+
+
+void svg_process(int cpu, u64 start, u64 end, int pid, const char *name, const char *backtrace);
+void svg_cstate(int cpu, u64 start, u64 end, int type);
+void svg_pstate(int cpu, u64 start, u64 end, u64 freq);
+
+
+void svg_time_grid(double min_thickness);
+void svg_io_legenda(void);
+void svg_legenda(void);
+void svg_wakeline(u64 start, int row1, int row2, const char *backtrace);
+void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2, const char *backtrace);
+void svg_interrupt(u64 start, int row, const char *backtrace);
+void svg_text(int Yslot, u64 start, const char *text);
+void svg_close(void);
+int svg_build_topology_map(char *sib_core, int sib_core_nr, char *sib_thr, int sib_thr_nr);
 
 extern int svg_page_width;
 extern u64 svg_highlight;
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index b1dd68f358fc..bc229a74c6a9 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -793,6 +793,7 @@ int dso__load_sym(struct dso *dso, struct map *map,
 	uint32_t idx;
 	GElf_Ehdr ehdr;
 	GElf_Shdr shdr;
+	GElf_Shdr tshdr;
 	Elf_Data *syms, *opddata = NULL;
 	GElf_Sym sym;
 	Elf_Scn *sec, *sec_strndx;
@@ -832,6 +833,9 @@ int dso__load_sym(struct dso *dso, struct map *map,
 	sec = syms_ss->symtab;
 	shdr = syms_ss->symshdr;
 
+	if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL))
+		dso->text_offset = tshdr.sh_addr - tshdr.sh_offset;
+
 	if (runtime_ss->opdsec)
 		opddata = elf_rawdata(runtime_ss->opdsec, NULL);
 
@@ -880,12 +884,8 @@ int dso__load_sym(struct dso *dso, struct map *map,
 	 * Handle any relocation of vdso necessary because older kernels
 	 * attempted to prelink vdso to its virtual address.
 	 */
-	if (dso__is_vdso(dso)) {
-		GElf_Shdr tshdr;
-
-		if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL))
-			map->reloc = map->start - tshdr.sh_addr + tshdr.sh_offset;
-	}
+	if (dso__is_vdso(dso))
+		map->reloc = map->start - dso->text_offset;
 
 	dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap);
 	/*
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index a937053a0ae0..c8b7544d9267 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -34,8 +34,8 @@
 #endif
 
 #ifdef HAVE_LIBELF_SUPPORT
-extern Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
-				GElf_Shdr *shp, const char *name, size_t *idx);
+Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
+			     GElf_Shdr *shp, const char *name, size_t *idx);
 #endif
 
 #ifndef DMGL_PARAMS
diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c
index 6adfa18cdd4e..996046a66fe5 100644
--- a/tools/perf/util/usage.c
+++ b/tools/perf/util/usage.c
@@ -41,15 +41,9 @@ static void warn_builtin(const char *warn, va_list params)
 /* If we are in a dlopen()ed .so write to a global variable would segfault
  * (ugh), so keep things static. */
 static void (*usage_routine)(const char *err) NORETURN = usage_builtin;
-static void (*die_routine)(const char *err, va_list params) NORETURN = die_builtin;
 static void (*error_routine)(const char *err, va_list params) = error_builtin;
 static void (*warn_routine)(const char *err, va_list params) = warn_builtin;
 
-void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN)
-{
-	die_routine = routine;
-}
-
 void set_warning_routine(void (*routine)(const char *err, va_list params))
 {
 	warn_routine = routine;
@@ -65,7 +59,7 @@ void die(const char *err, ...)
 	va_list params;
 
 	va_start(params, err);
-	die_routine(err, params);
+	die_builtin(err, params);
 	va_end(params);
 }
 
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index d0d50cef8b2a..8298d607c738 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -133,25 +133,15 @@ extern char buildid_dir[];
 #define PERF_GTK_DSO  "libperf-gtk.so"
 
 /* General helper functions */
-extern void usage(const char *err) NORETURN;
-extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2)));
-extern int error(const char *err, ...) __attribute__((format (printf, 1, 2)));
-extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2)));
+void usage(const char *err) NORETURN;
+void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2)));
+int error(const char *err, ...) __attribute__((format (printf, 1, 2)));
+void warning(const char *err, ...) __attribute__((format (printf, 1, 2)));
 
-#include "../../../include/linux/stringify.h"
+void set_warning_routine(void (*routine)(const char *err, va_list params));
 
-#define DIE_IF(cnd)	\
-	do { if (cnd)	\
-		die(" at (" __FILE__ ":" __stringify(__LINE__) "): "	\
-		    __stringify(cnd) "\n");				\
-	} while (0)
-
-
-extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN);
-extern void set_warning_routine(void (*routine)(const char *err, va_list params));
-
-extern int prefixcmp(const char *str, const char *prefix);
-extern void set_buildid_dir(const char *dir);
+int prefixcmp(const char *str, const char *prefix);
+void set_buildid_dir(const char *dir);
 
 #ifdef __GLIBC_PREREQ
 #if __GLIBC_PREREQ(2, 1)
@@ -172,8 +162,7 @@ static inline char *gitstrchrnul(const char *s, int c)
 /*
  * Wrappers:
  */
-extern char *xstrdup(const char *str);
-extern void *xrealloc(void *ptr, size_t size) __attribute__((weak));
+void *xrealloc(void *ptr, size_t size) __attribute__((weak));
 
 
 static inline void *zalloc(size_t size)
diff --git a/tools/perf/util/wrapper.c b/tools/perf/util/wrapper.c
index 19f15b650703..5f1a07c4b87b 100644
--- a/tools/perf/util/wrapper.c
+++ b/tools/perf/util/wrapper.c
@@ -12,18 +12,6 @@ static inline void release_pack_memory(size_t size __maybe_unused,
 {
 }
 
-char *xstrdup(const char *str)
-{
-	char *ret = strdup(str);
-	if (!ret) {
-		release_pack_memory(strlen(str) + 1, -1);
-		ret = strdup(str);
-		if (!ret)
-			die("Out of memory, strdup failed");
-	}
-	return ret;
-}
-
 void *xrealloc(void *ptr, size_t size)
 {
 	void *ret = realloc(ptr, size);
diff --git a/tools/perf/config/utilities.mak b/tools/scripts/utilities.mak
similarity index 100%
rename from tools/perf/config/utilities.mak
rename to tools/scripts/utilities.mak

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ