lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20230911174652.191605-1-wyes.karny@amd.com>
Date:   Mon, 11 Sep 2023 17:46:52 +0000
From:   Wyes Karny <wyes.karny@....com>
To:     <peterz@...radead.org>, <mingo@...hat.com>, <acme@...nel.org>
CC:     <mark.rutland@....com>, <alexander.shishkin@...ux.intel.com>,
        <jolsa@...nel.org>, <namhyung@...nel.org>, <irogers@...gle.com>,
        <adrian.hunter@...el.com>, <tglx@...utronix.de>, <bp@...en8.de>,
        <dave.hansen@...ux.intel.com>, <x86@...nel.org>, <hpa@...or.com>,
        <linux-perf-users@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
        <gautham.shenoy@....com>, <ravi.bangoria@....com>,
        <Sandipan.Das@....com>, <eranian@...gle.com>,
        <kan.liang@...ux.intel.com>, Wyes Karny <wyes.karny@....com>
Subject: [RFC] amd_uncore/rapl: Add per-core energy tracking support for AMD

Introduce distinct energy monitoring event in the amd_uncore driver to
accommodate per-core power consumption readings. Unlike the existing
energy-cores event in the power PMU, which aggregates power data per
socket/package [1], this new event aligns with AMD's per-core RAPL counters.
This change caters to the need for separate power consumption
measurements for individual cores.

Running a workload pinned to CPU 1 give below output with perf stat:

$sudo perf stat --per-core -C 0-7 -I 1000 -e amd_rapl/energy-cores/

           time core               cpus             counts   unit events
    17.176191314 S0-D0-C0              1               0.02 Joules amd_rapl/energy-cores/
    17.176191314 S0-D0-C1              1               4.35 Joules amd_rapl/energy-cores/
    17.176191314 S0-D0-C2              1               0.02 Joules amd_rapl/energy-cores/
    17.176191314 S0-D0-C3              1               0.02 Joules amd_rapl/energy-cores/
    17.176191314 S0-D0-C4              1               0.02 Joules amd_rapl/energy-cores/
    17.176191314 S0-D0-C5              1               0.02 Joules amd_rapl/energy-cores/
    17.176191314 S0-D0-C6              1               0.02 Joules amd_rapl/energy-cores/
    17.176191314 S0-D0-C7              1               0.02 Joules amd_rapl/energy-cores/

[1]: https://lore.kernel.org/lkml/3e766f0e-37d4-0f82-3868-31b14228868d@linux.intel.com/

Signed-off-by: Wyes Karny <wyes.karny@....com>
---
 arch/x86/events/amd/uncore.c | 213 +++++++++++++++++++++++++++++++++--
 1 file changed, 202 insertions(+), 11 deletions(-)

diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 83f15fe411b3..e03b12e20ebd 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -26,6 +26,7 @@
 #define RDPMC_BASE_LLC		10
 
 #define COUNTER_SHIFT		16
+#define RAPL_CNTR_WIDTH	32
 
 #undef pr_fmt
 #define pr_fmt(fmt)	"amd_uncore: " fmt
@@ -35,6 +36,8 @@ static int num_counters_llc;
 static int num_counters_nb;
 static bool l3_mask;
 
+static int rapl_hw_unit __read_mostly;
+
 static HLIST_HEAD(uncore_unused_list);
 
 struct amd_uncore {
@@ -52,12 +55,15 @@ struct amd_uncore {
 
 static struct amd_uncore * __percpu *amd_uncore_nb;
 static struct amd_uncore * __percpu *amd_uncore_llc;
+static struct amd_uncore * __percpu *amd_uncore_rapl;
 
 static struct pmu amd_nb_pmu;
 static struct pmu amd_llc_pmu;
+static struct pmu amd_rapl_pmu;
 
 static cpumask_t amd_nb_active_mask;
 static cpumask_t amd_llc_active_mask;
+static cpumask_t amd_rapl_active_mask;
 
 static bool is_nb_event(struct perf_event *event)
 {
@@ -69,44 +75,78 @@ static bool is_llc_event(struct perf_event *event)
 	return event->pmu->type == amd_llc_pmu.type;
 }
 
+static bool is_rapl_event(struct perf_event *event)
+{
+	return event->pmu->type == amd_rapl_pmu.type;
+}
+
 static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
 {
 	if (is_nb_event(event) && amd_uncore_nb)
 		return *per_cpu_ptr(amd_uncore_nb, event->cpu);
 	else if (is_llc_event(event) && amd_uncore_llc)
 		return *per_cpu_ptr(amd_uncore_llc, event->cpu);
+	else if (is_rapl_event(event) && amd_uncore_rapl)
+		return *per_cpu_ptr(amd_uncore_rapl, event->cpu);
 
 	return NULL;
 }
 
+static inline u64 rapl_scale(u64 v)
+{
+	return v << (32 - rapl_hw_unit);
+}
+
 static void amd_uncore_read(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	u64 prev, new;
 	s64 delta;
+	int shift;
 
 	/*
 	 * since we do not enable counter overflow interrupts,
 	 * we do not have to worry about prev_count changing on us
 	 */
 
-	prev = local64_read(&hwc->prev_count);
-	rdpmcl(hwc->event_base_rdpmc, new);
-	local64_set(&hwc->prev_count, new);
-	delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
-	delta >>= COUNTER_SHIFT;
+	if (is_rapl_event(event)) {
+		shift = RAPL_CNTR_WIDTH;
+again:
+		prev = local64_read(&hwc->prev_count);
+		rdmsrl(hwc->event_base, new);
+		if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev) {
+			cpu_relax();
+			goto again;
+		}
+		delta = (new << shift) - (prev << shift);
+		delta >>= shift;
+		delta = rapl_scale(delta);
+	} else {
+		prev = local64_read(&hwc->prev_count);
+		rdpmcl(hwc->event_base_rdpmc, new);
+		local64_set(&hwc->prev_count, new);
+		delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
+		delta >>= COUNTER_SHIFT;
+	}
 	local64_add(delta, &event->count);
 }
 
 static void amd_uncore_start(struct perf_event *event, int flags)
 {
 	struct hw_perf_event *hwc = &event->hw;
+	u64 new;
 
 	if (flags & PERF_EF_RELOAD)
 		wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
 
+	if (is_rapl_event(event)) {
+		rdmsrl(hwc->event_base, new);
+		local64_set(&hwc->prev_count, new);
+	}
+
 	hwc->state = 0;
-	wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
+	if (hwc->config_base)
+		wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
 	perf_event_update_userpage(event);
 }
 
@@ -114,7 +154,8 @@ static void amd_uncore_stop(struct perf_event *event, int flags)
 {
 	struct hw_perf_event *hwc = &event->hw;
 
-	wrmsrl(hwc->config_base, hwc->config);
+	if (hwc->config_base)
+		wrmsrl(hwc->config_base, hwc->config);
 	hwc->state |= PERF_HES_STOPPED;
 
 	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
@@ -153,9 +194,15 @@ static int amd_uncore_add(struct perf_event *event, int flags)
 	if (hwc->idx == -1)
 		return -EBUSY;
 
-	hwc->config_base = uncore->msr_base + (2 * hwc->idx);
-	hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
-	hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
+	if (is_rapl_event(event)) {
+		hwc->config_base = 0;
+		hwc->event_base = uncore->msr_base;
+		hwc->event_base_rdpmc = 0;
+	} else {
+		hwc->config_base = uncore->msr_base + (2 * hwc->idx);
+		hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
+		hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
+	}
 	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 
 	/*
@@ -285,6 +332,8 @@ static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
 		active_mask = &amd_nb_active_mask;
 	else if (pmu->type == amd_llc_pmu.type)
 		active_mask = &amd_llc_active_mask;
+	else if (pmu->type == amd_rapl_pmu.type)
+		active_mask = &amd_rapl_active_mask;
 	else
 		return 0;
 
@@ -326,6 +375,69 @@ DEFINE_UNCORE_FORMAT_ATTR(enallslices,	enallslices,	"config:46");		   /* F19h L3
 DEFINE_UNCORE_FORMAT_ATTR(enallcores,	enallcores,	"config:47");		   /* F19h L3 */
 DEFINE_UNCORE_FORMAT_ATTR(sliceid,	sliceid,	"config:48-50");	   /* F19h L3 */
 
+#define RAPL_EVENT_ATTR_STR(_name, v, str)                                      \
+static struct perf_pmu_events_attr event_attr_##v = {                           \
+	.attr           = __ATTR(_name, 0444, perf_event_sysfs_show, NULL),     \
+	.id             = 0,                                                    \
+	.event_str      = str,                                                  \
+};
+
+RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
+RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10");
+
+/*
+ * There are no default events, but we need to create
+ * "events" group (with empty attrs) before updating
+ * it with detected events.
+ */
+static struct attribute *attrs_empty[] = {
+	NULL,
+};
+
+static struct attribute_group rapl_pmu_events_group = {
+	.name = "events",
+	.attrs = attrs_empty,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+static struct attribute *rapl_formats_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group rapl_pmu_format_group = {
+	.name = "format",
+	.attrs = rapl_formats_attr,
+};
+
+static const struct attribute_group *rapl_attr_groups[] = {
+	&amd_uncore_attr_group,
+	&rapl_pmu_format_group,
+	&rapl_pmu_events_group,
+	NULL,
+};
+
+static struct attribute *rapl_events_cores[] = {
+	&event_attr_rapl_cores.attr.attr,
+	&event_attr_rapl_cores_unit.attr.attr,
+	&event_attr_rapl_cores_scale.attr.attr,
+	NULL,
+};
+
+static struct attribute_group rapl_events_cores_group = {
+	.name  = "events",
+	.attrs = rapl_events_cores,
+};
+
+
+static const struct attribute_group *rapl_attr_update[] = {
+	&rapl_events_cores_group,
+	NULL,
+};
+
+
+
 /* Common DF and NB attributes */
 static struct attribute *amd_uncore_df_format_attr[] = {
 	&format_attr_event12.attr,	/* event */
@@ -425,6 +537,21 @@ static struct pmu amd_llc_pmu = {
 	.module		= THIS_MODULE,
 };
 
+static struct pmu amd_rapl_pmu = {
+	.task_ctx_nr	= perf_invalid_context,
+	.attr_groups	= rapl_attr_groups,
+	.attr_update	= rapl_attr_update,
+	.name		= "amd_rapl",
+	.event_init	= amd_uncore_event_init,
+	.add		= amd_uncore_add,
+	.del		= amd_uncore_del,
+	.start		= amd_uncore_start,
+	.stop		= amd_uncore_stop,
+	.read		= amd_uncore_read,
+	.capabilities	= PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
+	.module		= THIS_MODULE,
+};
+
 static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
 {
 	return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
@@ -440,7 +567,7 @@ amd_uncore_events_alloc(unsigned int num, unsigned int cpu)
 
 static int amd_uncore_cpu_up_prepare(unsigned int cpu)
 {
-	struct amd_uncore *uncore_nb = NULL, *uncore_llc = NULL;
+	struct amd_uncore *uncore_nb = NULL, *uncore_llc = NULL, *uncore_rapl = NULL;
 
 	if (amd_uncore_nb) {
 		*per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
@@ -478,6 +605,23 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
 		*per_cpu_ptr(amd_uncore_llc, cpu) = uncore_llc;
 	}
 
+	if (amd_uncore_rapl) {
+		*per_cpu_ptr(amd_uncore_rapl, cpu) = NULL;
+		uncore_rapl = amd_uncore_alloc(cpu);
+		if (!uncore_rapl)
+			goto fail;
+		uncore_rapl->cpu = cpu;
+		uncore_rapl->num_counters = 1;
+		uncore_rapl->msr_base = MSR_AMD_CORE_ENERGY_STATUS;
+		uncore_rapl->active_mask = &amd_rapl_active_mask;
+		uncore_rapl->pmu = &amd_rapl_pmu;
+		uncore_rapl->events = amd_uncore_events_alloc(1, cpu);
+		if (!uncore_nb->events)
+			goto fail;
+		uncore_rapl->id = -1;
+		*per_cpu_ptr(amd_uncore_rapl, cpu) = uncore_rapl;
+	}
+
 	return 0;
 
 fail:
@@ -543,6 +687,14 @@ static int amd_uncore_cpu_starting(unsigned int cpu)
 		*per_cpu_ptr(amd_uncore_llc, cpu) = uncore;
 	}
 
+	if (amd_uncore_rapl) {
+		uncore = *per_cpu_ptr(amd_uncore_rapl, cpu);
+		uncore->id = topology_core_id(cpu);
+
+		uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_rapl);
+		*per_cpu_ptr(amd_uncore_rapl, cpu) = uncore;
+	}
+
 	return 0;
 }
 
@@ -577,6 +729,9 @@ static int amd_uncore_cpu_online(unsigned int cpu)
 	if (amd_uncore_llc)
 		uncore_online(cpu, amd_uncore_llc);
 
+	if (amd_uncore_rapl)
+		uncore_online(cpu, amd_uncore_rapl);
+
 	return 0;
 }
 
@@ -614,6 +769,9 @@ static int amd_uncore_cpu_down_prepare(unsigned int cpu)
 	if (amd_uncore_llc)
 		uncore_down_prepare(cpu, amd_uncore_llc);
 
+	if (amd_uncore_rapl)
+		uncore_down_prepare(cpu, amd_uncore_rapl);
+
 	return 0;
 }
 
@@ -640,6 +798,9 @@ static int amd_uncore_cpu_dead(unsigned int cpu)
 	if (amd_uncore_llc)
 		uncore_dead(cpu, amd_uncore_llc);
 
+	if (amd_uncore_rapl)
+		uncore_dead(cpu, amd_uncore_rapl);
+
 	return 0;
 }
 
@@ -649,6 +810,7 @@ static int __init amd_uncore_init(void)
 	struct attribute **l3_attr = amd_uncore_l3_format_attr;
 	union cpuid_0x80000022_ebx ebx;
 	int ret = -ENODEV;
+	u64 msr_rapl_power_unit_bits;
 
 	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
 	    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
@@ -730,6 +892,29 @@ static int __init amd_uncore_init(void)
 		ret = 0;
 	}
 
+	if (boot_cpu_has(X86_FEATURE_RAPL)) {
+		if (boot_cpu_data.x86 >= 0x19) {
+			amd_uncore_rapl = alloc_percpu(struct amd_uncore *);
+			if (!amd_uncore_rapl) {
+				ret = -ENOMEM;
+				goto fail_llc;
+			}
+
+			ret = perf_pmu_register(&amd_rapl_pmu, amd_rapl_pmu.name, -1);
+			if (ret)
+				goto fail_llc;
+			pr_info("%s PMU detected\n", amd_rapl_pmu.name);
+
+			if (rdmsrl_safe(MSR_AMD_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits)) {
+				pr_err("RAPL POWER unit reg read error\n");
+				ret = -1;
+			} else {
+				rapl_hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
+				ret = 0;
+			}
+		}
+	}
+
 	/*
 	 * Install callbacks. Core will call them for each online cpu.
 	 */
@@ -780,6 +965,12 @@ static void __exit amd_uncore_exit(void)
 		free_percpu(amd_uncore_nb);
 		amd_uncore_nb = NULL;
 	}
+
+	if (boot_cpu_has(X86_FEATURE_RAPL)) {
+		perf_pmu_unregister(&amd_rapl_pmu);
+		free_percpu(amd_uncore_rapl);
+		amd_uncore_rapl = NULL;
+	}
 }
 
 module_init(amd_uncore_init);
-- 
2.34.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ