[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <159980774689.20229.8560566905019815809.tip-bot2@tip-bot2>
Date: Fri, 11 Sep 2020 07:02:26 -0000
From: "tip-bot2 for Kim Phillips" <tip-bot2@...utronix.de>
To: linux-tip-commits@...r.kernel.org
Cc: Kim Phillips <kim.phillips@....com>,
"Peter Zijlstra (Intel)" <peterz@...radead.org>,
x86 <x86@...nel.org>, LKML <linux-kernel@...r.kernel.org>
Subject: [tip: perf/core] perf/x86/amd/ibs: Support 27-bit extended Op/cycle counter
The following commit has been merged into the perf/core branch of tip:
Commit-ID: 8b0bed7d410f48499d72af2e2bcd890daad94e0d
Gitweb: https://git.kernel.org/tip/8b0bed7d410f48499d72af2e2bcd890daad94e0d
Author: Kim Phillips <kim.phillips@....com>
AuthorDate: Tue, 08 Sep 2020 16:47:39 -05:00
Committer: Peter Zijlstra <peterz@...radead.org>
CommitterDate: Thu, 10 Sep 2020 11:19:36 +02:00
perf/x86/amd/ibs: Support 27-bit extended Op/cycle counter
IBS hardware with the OpCntExt feature gets a 7-bit wider internal
counter. Both the maximum and current count bitfields in the
IBS_OP_CTL register are extended to support reading and writing it.
No changes are necessary to the driver for handling the extra
contiguous current count bits (IbsOpCurCnt), as the driver already
passes through 32 bits of that field. However, the driver has to do
some extra bit manipulation when converting from a period to the
non-contiguous (although conveniently aligned) extra bits in the
IbsOpMaxCnt bitfield.
This decreases IBS Op interrupt overhead when the period is over
1,048,560 (0xffff0), which would previously activate the driver's
software counter. That threshold is now 134,217,712 (0x7fffff0).
Signed-off-by: Kim Phillips <kim.phillips@....com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
Link: https://lkml.kernel.org/r/20200908214740.18097-7-kim.phillips@amd.com
---
arch/x86/events/amd/ibs.c | 42 ++++++++++++++++++++++--------
arch/x86/include/asm/perf_event.h | 1 +-
2 files changed, 32 insertions(+), 11 deletions(-)
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index cfbd020..ea323dc 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -339,10 +339,13 @@ static u64 get_ibs_op_count(u64 config)
* and the lower 7 bits of CurCnt are randomized.
* Otherwise CurCnt has the full 27-bit current counter value.
*/
- if (config & IBS_OP_VAL)
+ if (config & IBS_OP_VAL) {
count = (config & IBS_OP_MAX_CNT) << 4;
- else if (ibs_caps & IBS_CAPS_RDWROPCNT)
+ if (ibs_caps & IBS_CAPS_OPCNTEXT)
+ count += config & IBS_OP_MAX_CNT_EXT_MASK;
+ } else if (ibs_caps & IBS_CAPS_RDWROPCNT) {
count = (config & IBS_OP_CUR_CNT) >> 32;
+ }
return count;
}
@@ -398,7 +401,7 @@ static void perf_ibs_start(struct perf_event *event, int flags)
struct hw_perf_event *hwc = &event->hw;
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
- u64 period;
+ u64 period, config = 0;
if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
return;
@@ -407,13 +410,19 @@ static void perf_ibs_start(struct perf_event *event, int flags)
hwc->state = 0;
perf_ibs_set_period(perf_ibs, hwc, &period);
+ if (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_OPCNTEXT)) {
+ config |= period & IBS_OP_MAX_CNT_EXT_MASK;
+ period &= ~IBS_OP_MAX_CNT_EXT_MASK;
+ }
+ config |= period >> 4;
+
/*
* Set STARTED before enabling the hardware, such that a subsequent NMI
* must observe it.
*/
set_bit(IBS_STARTED, pcpu->state);
clear_bit(IBS_STOPPING, pcpu->state);
- perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
+ perf_ibs_enable_event(perf_ibs, hwc, config);
perf_event_update_userpage(event);
}
@@ -581,7 +590,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
struct perf_ibs_data ibs_data;
int offset, size, check_rip, offset_max, throttle = 0;
unsigned int msr;
- u64 *buf, *config, period;
+ u64 *buf, *config, period, new_config = 0;
if (!test_bit(IBS_STARTED, pcpu->state)) {
fail:
@@ -676,13 +685,17 @@ out:
if (throttle) {
perf_ibs_stop(event, 0);
} else {
- period >>= 4;
-
- if ((ibs_caps & IBS_CAPS_RDWROPCNT) &&
- (*config & IBS_OP_CNT_CTL))
- period |= *config & IBS_OP_CUR_CNT_RAND;
+ if (perf_ibs == &perf_ibs_op) {
+ if (ibs_caps & IBS_CAPS_OPCNTEXT) {
+ new_config = period & IBS_OP_MAX_CNT_EXT_MASK;
+ period &= ~IBS_OP_MAX_CNT_EXT_MASK;
+ }
+ if ((ibs_caps & IBS_CAPS_RDWROPCNT) && (*config & IBS_OP_CNT_CTL))
+ new_config |= *config & IBS_OP_CUR_CNT_RAND;
+ }
+ new_config |= period >> 4;
- perf_ibs_enable_event(perf_ibs, hwc, period);
+ perf_ibs_enable_event(perf_ibs, hwc, new_config);
}
perf_event_update_userpage(event);
@@ -749,6 +762,13 @@ static __init void perf_event_ibs_init(void)
perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
*attr++ = &format_attr_cnt_ctl.attr;
}
+
+ if (ibs_caps & IBS_CAPS_OPCNTEXT) {
+ perf_ibs_op.max_period |= IBS_OP_MAX_CNT_EXT_MASK;
+ perf_ibs_op.config_mask |= IBS_OP_MAX_CNT_EXT_MASK;
+ perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK;
+ }
+
perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index e20aa58..6960cd6 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -405,6 +405,7 @@ struct pebs_xmm {
#define IBS_OP_ENABLE (1ULL<<17)
#define IBS_OP_MAX_CNT 0x0000FFFFULL
#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
+#define IBS_OP_MAX_CNT_EXT_MASK (0x7FULL<<20) /* separate upper 7 bits */
#define IBS_RIP_INVALID (1ULL<<38)
#ifdef CONFIG_X86_LOCAL_APIC
Powered by blists - more mailing lists