[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1498676232-23841-5-git-send-email-anju@linux.vnet.ibm.com>
Date: Thu, 29 Jun 2017 00:27:09 +0530
From: Anju T Sudhakar <anju@...ux.vnet.ibm.com>
To: mpe@...erman.id.au
Cc: linux-kernel@...r.kernel.org, linuxppc-dev@...ts.ozlabs.org,
ego@...ux.vnet.ibm.com, bsingharora@...il.com, anton@...ba.org,
sukadev@...ux.vnet.ibm.com, mikey@...ling.org,
stewart@...ux.vnet.ibm.com, dja@...ens.net, eranian@...gle.com,
hemant@...ux.vnet.ibm.com, maddy@...ux.vnet.ibm.com,
anju@...ux.vnet.ibm.com
Subject: [PATCH v11 04/10] powerpc/perf: Add generic IMC pmu group and event functions
Device tree IMC driver code parses the IMC units and their events. It
passes the information to IMC pmu code which is placed in powerpc/perf
as "imc-pmu.c".
Patch adds a set of generic imc pmu related event functions to be
used by each imc pmu unit. Add code to setup format attribute and to
register imc pmus. Add a event_init function for nest_imc events.
Since, the IMC counters' data are periodically fed to a memory location,
the functions to read/update, start/stop, add/del can be generic and can
be used by all IMC PMU units.
Signed-off-by: Anju T Sudhakar <anju@...ux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hemant@...ux.vnet.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@...ux.vnet.ibm.com>
---
arch/powerpc/include/asm/imc-pmu.h | 5 +
arch/powerpc/perf/Makefile | 3 +
arch/powerpc/perf/imc-pmu.c | 279 ++++++++++++++++++++++++++++++
arch/powerpc/platforms/powernv/opal-imc.c | 11 +-
4 files changed, 296 insertions(+), 2 deletions(-)
create mode 100644 arch/powerpc/perf/imc-pmu.c
diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
index 2a0239e..25d0c57 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -63,6 +63,9 @@ struct imc_events {
#define IMC_CPUMASK_ATTR 1
#define IMC_EVENT_ATTR 2
#define IMC_NULL_ATTR 3
+#define IMC_EVENT_OFFSET_MASK 0xffffffffULL
+#define IMC_EVENT_RVALUE_MASK 0x100000000ULL
+#define IMC_NEST_EVENT_MODE 0x1fe00000000ULL
/*
* Device tree parser code detects IMC pmu support and
@@ -101,4 +104,6 @@ enum {
*/
#define IMC_DOMAIN_NEST 1
+extern struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+extern int init_imc_pmu(struct imc_events *events, int idx, struct imc_pmu *pmu_ptr);
#endif /* PPC_POWERNV_IMC_PMU_DEF_H */
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index 4d606b9..b29d918 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -6,6 +6,9 @@ obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o
obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \
power5+-pmu.o power6-pmu.o power7-pmu.o \
isa207-common.o power8-pmu.o power9-pmu.o
+
+obj-$(CONFIG_HV_PERF_IMC_CTRS) += imc-pmu.o
+
obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o
obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
new file mode 100644
index 0000000..326c9ea
--- /dev/null
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -0,0 +1,279 @@
+/*
+ * Nest Performance Monitor counter support.
+ *
+ * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
+ * (C) 2017 Anju T Sudhakar, IBM Corporation.
+ * (C) 2017 Hemant K Shaw, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or later version.
+ */
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include <asm/opal.h>
+#include <asm/imc-pmu.h>
+#include <asm/cputhreads.h>
+#include <asm/smp.h>
+#include <linux/string.h>
+
+/* Needed for sanity check */
+struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+
+struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
+{
+ return container_of(event->pmu, struct imc_pmu, pmu);
+}
+
+PMU_FORMAT_ATTR(event, "config:0-47");
+PMU_FORMAT_ATTR(offset, "config:0-31");
+PMU_FORMAT_ATTR(rvalue, "config:32");
+PMU_FORMAT_ATTR(mode, "config:33-40");
+static struct attribute *nest_imc_format_attrs[] = {
+ &format_attr_event.attr,
+ &format_attr_offset.attr,
+ &format_attr_rvalue.attr,
+ &format_attr_mode.attr,
+ NULL,
+};
+
+static struct attribute_group imc_format_group = {
+ .name = "format",
+ .attrs = nest_imc_format_attrs,
+};
+
+static int nest_imc_event_init(struct perf_event *event)
+{
+ int chip_id;
+ u32 l_config, config = event->attr.config;
+ struct imc_mem_info *pcni;
+ struct imc_pmu *pmu;
+ bool flag = false;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* Sampling not supported */
+ if (event->hw.sample_period)
+ return -EINVAL;
+
+ /* unsupported modes and filters */
+ if (event->attr.exclude_user ||
+ event->attr.exclude_kernel ||
+ event->attr.exclude_hv ||
+ event->attr.exclude_idle ||
+ event->attr.exclude_host ||
+ event->attr.exclude_guest)
+ return -EINVAL;
+
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ pmu = imc_event_to_pmu(event);
+ /*
+ * Sanity check for config (event offset, mode and rvalue).
+ * mode and rvalue should be zero, if not just return.
+ */
+ if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size) ||
+ ((config & IMC_EVENT_RVALUE_MASK) != 0) ||
+ ((config & IMC_NEST_EVENT_MODE) != 0))
+ return -EINVAL;
+
+ chip_id = topology_physical_package_id(event->cpu);
+ pcni = pmu->mem_info;
+ do {
+ if (pcni->id == chip_id) {
+ flag = true;
+ break;
+ }
+ pcni++;
+ } while (pcni);
+ if (!flag)
+ return -ENODEV;
+ /*
+ * Memory for Nest HW counter data could be in multiple pages.
+ * Hence check and pick the right event base page for chip with
+ * "chip_id" and add "config" to it".
+ */
+ l_config = config & IMC_EVENT_OFFSET_MASK;
+ event->hw.event_base = (u64)pcni->vbase[l_config/PAGE_SIZE] + (config & ~PAGE_MASK);
+ return 0;
+}
+
+static void imc_read_counter(struct perf_event *event)
+{
+ u64 *addr, data;
+
+ /*
+ * In-Memory Collection (IMC) counters are free flowing counters.
+ * So we take a snapshot of the counter value on enable and save it
+ * to calculate the delta at later stage to present the event counter
+ * value.
+ */
+ addr = (u64 *)event->hw.event_base;
+ data = __be64_to_cpu(READ_ONCE(*addr));
+ local64_set(&event->hw.prev_count, data);
+}
+
+static void imc_perf_event_update(struct perf_event *event)
+{
+ u64 counter_prev, counter_new, final_count, *addr;
+
+ addr = (u64 *)event->hw.event_base;
+ counter_prev = local64_read(&event->hw.prev_count);
+ counter_new = __be64_to_cpu(READ_ONCE(*addr));
+ final_count = counter_new - counter_prev;
+
+ /*
+ * Need to update prev_count is that, counter could be
+ * read in a periodic interval from the tool side.
+ */
+ local64_set(&event->hw.prev_count, counter_new);
+ /* Update the delta to the event count */
+ local64_add(final_count, &event->count);
+}
+
+static void imc_event_start(struct perf_event *event, int flags)
+{
+ /*
+ * In Memory Counters are free flowing counters. HW or the microcode
+ * keeps adding to the counter offset in memory. To get event
+ * counter value, we snapshot the value here and we calculate
+ * delta at later point.
+ */
+ imc_read_counter(event);
+}
+
+static void imc_event_stop(struct perf_event *event, int flags)
+{
+ /*
+ * Take a snapshot and calculate the delta and update
+ * the event counter values.
+ */
+ imc_perf_event_update(event);
+}
+
+static int imc_event_add(struct perf_event *event, int flags)
+{
+ if (flags & PERF_EF_START)
+ imc_event_start(event, flags);
+
+ return 0;
+}
+
+/* update_pmu_ops : Populate the appropriate operations for "pmu" */
+static int update_pmu_ops(struct imc_pmu *pmu)
+{
+ if (!pmu)
+ return -EINVAL;
+
+ pmu->pmu.task_ctx_nr = perf_invalid_context;
+ pmu->pmu.event_init = nest_imc_event_init;
+ pmu->pmu.add = imc_event_add;
+ pmu->pmu.del = imc_event_stop;
+ pmu->pmu.start = imc_event_start;
+ pmu->pmu.stop = imc_event_stop;
+ pmu->pmu.read = imc_perf_event_update;
+ pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group;
+ pmu->pmu.attr_groups = pmu->attr_groups;
+
+ return 0;
+}
+
+/* dev_str_attr : Populate event "name" and string "str" in attribute */
+static struct attribute *dev_str_attr(const char *name, const char *str)
+{
+ struct perf_pmu_events_attr *attr;
+
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+ if (!attr)
+ return NULL;
+ sysfs_attr_init(&attr->attr.attr);
+
+ attr->event_str = str;
+ attr->attr.attr.name = name;
+ attr->attr.attr.mode = 0444;
+ attr->attr.show = perf_event_sysfs_show;
+
+ return &attr->attr.attr;
+}
+
+/*
+ * update_events_in_group: Update the "events" information in an attr_group
+ * and assign the attr_group to the pmu "pmu".
+ */
+static int update_events_in_group(struct imc_events *events,
+ int idx, struct imc_pmu *pmu)
+{
+ struct attribute_group *attr_group;
+ struct attribute **attrs;
+ int i;
+
+ /* If there is no events for this pmu, just return zero */
+ if (!events)
+ return 0;
+
+ /* Allocate memory for attribute group */
+ attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL);
+ if (!attr_group)
+ return -ENOMEM;
+
+ /* Allocate memory for attributes */
+ attrs = kzalloc((sizeof(struct attribute *) * (idx + 1)), GFP_KERNEL);
+ if (!attrs) {
+ kfree(attr_group);
+ return -ENOMEM;
+ }
+
+ attr_group->name = "events";
+ attr_group->attrs = attrs;
+ for (i = 0; i < idx; i++, events++) {
+ attrs[i] = dev_str_attr((char *)events->ev_name,
+ (char *)events->ev_value);
+ }
+
+ /* Save the event attribute */
+ pmu->attr_groups[IMC_EVENT_ATTR] = attr_group;
+ return 0;
+}
+
+/*
+ * init_imc_pmu : Setup and register the IMC pmu device.
+ *
+ * @events: events memory for this pmu.
+ * @idx: number of event entries created.
+ * @pmu_ptr: memory allocated for this pmu.
+ */
+int init_imc_pmu(struct imc_events *events, int idx,
+ struct imc_pmu *pmu_ptr)
+{
+ int ret = -ENODEV;
+
+ ret = update_events_in_group(events, idx, pmu_ptr);
+ if (ret)
+ goto err_free;
+
+ ret = update_pmu_ops(pmu_ptr);
+ if (ret)
+ goto err_free;
+
+ ret = perf_pmu_register(&pmu_ptr->pmu, pmu_ptr->pmu.name, -1);
+ if (ret)
+ goto err_free;
+
+ pr_info("%s performance monitor hardware support registered\n",
+ pmu_ptr->pmu.name);
+
+ return 0;
+
+err_free:
+ /* Only free the attr_groups which are dynamically allocated */
+ if (pmu_ptr->attr_groups[IMC_EVENT_ATTR]) {
+ if (pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs)
+ kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
+ kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
+ }
+
+ return ret;
+}
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 839c257..a68d66d 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -34,8 +34,6 @@
#include <asm/cputable.h>
#include <asm/imc-pmu.h>
-struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
-
static int imc_event_prop_update(char *name, struct imc_events *events)
{
char *buf;
@@ -452,8 +450,17 @@ static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain)
if (prop)
imc_events_setup(parent, pmu_index, pmu_ptr, prop, &idx);
}
+ /* Function to register IMC pmu */
+ ret = init_imc_pmu(pmu_ptr->events, idx, pmu_ptr);
+ if (ret) {
+ pr_err("IMC PMU %s Register failed\n", pmu_ptr->pmu.name);
+ goto free_events;
+ }
return 0;
+free_events:
+ if (pmu_ptr->events)
+ imc_free_events(pmu_ptr->events, idx);
free_pmu:
if (pmu_ptr)
kfree(pmu_ptr);
--
2.7.4
Powered by blists - more mailing lists