[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20150326055705.GA5593@us.ibm.com>
Date: Wed, 25 Mar 2015 22:57:05 -0700
From: Sukadev Bhattiprolu <sukadev@...ux.vnet.ibm.com>
To: Peter Zijlstra <peterz@...radead.org>
Cc: Michael Ellerman <mpe@...erman.id.au>,
Paul Mackerras <paulus@...ba.org>, dev@...yps.com,
linux-kernel@...r.kernel.org, linuxppc-dev@...ts.ozlabs.org
Subject: Re: [PATCH 4/4] perf/powerpc: Implement group_read() txn interface
for 24x7 counters
Peter Zijlstra [peterz@...radead.org] wrote:
|
| Is there a down-side to always doing the txn based group read? If an
| arch does not implement the read txn support it'll fall back to doing
| independent read ops, but we end up doing those anyway.
|
| That way we get less special case code.
We could, but would need to move the perf_event_read() earlier in
the perf_event_read_group(). Can we do something like this (it
could be broken into two patches, but merging for easier review)
Would something liks this work?
----
perf_event_read_value() is mostly computing event count, enabled
and running times. Move the perf_event_read() into caller and
rename perf_event_read_value() to perf_event_compute_values().
Then, in perf_event_read_group(), read the event counts using the
transaction interface for all PMUs.
----
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 8e6b7d8..5896cb1 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -144,9 +144,11 @@ static u64 read_pmc(struct kvm_pmc *pmc)
counter = pmc->counter;
- if (pmc->perf_event)
- counter += perf_event_read_value(pmc->perf_event,
+ if (pmc->perf_event) {
+ perf_event_read(pmc->perf_event);
+ counter += perf_event_compute_values(pmc->perf_event,
&enabled, &running);
+ }
/* FIXME: Scaling needed? */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c8fe60e..1e30560 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -579,7 +579,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr,
void *context);
extern void perf_pmu_migrate_context(struct pmu *pmu,
int src_cpu, int dst_cpu);
-extern u64 perf_event_read_value(struct perf_event *event,
+extern u64 perf_event_compute_values(struct perf_event *event,
u64 *enabled, u64 *running);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a6abcd3..f7e4705 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3643,7 +3643,27 @@ static void orphans_remove_work(struct work_struct *work)
put_ctx(ctx);
}
-u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
+static int perf_event_read_values(struct perf_event *leader)
+{
+ int ret;
+ struct perf_event *sub;
+ struct pmu *pmu;
+
+ pmu = leader->pmu;
+
+ pmu->start_txn(pmu, PERF_PMU_TXN_READ);
+
+ pmu->read(leader);
+ list_for_each_entry(sub, &leader->sibling_list, group_entry)
+ pmu->read(sub);
+
+ ret = pmu->commit_txn(pmu, PERF_PMU_TXN_READ);
+
+ return ret;
+}
+
+u64 perf_event_compute_values(struct perf_event *event, u64 *enabled,
+ u64 *running)
{
struct perf_event *child;
u64 total = 0;
@@ -3653,7 +3673,6 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
mutex_lock(&event->child_mutex);
- perf_event_read(event);
total += perf_event_count(event);
*enabled += event->total_time_enabled +
@@ -3671,7 +3690,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
return total;
}
-EXPORT_SYMBOL_GPL(perf_event_read_value);
+EXPORT_SYMBOL_GPL(perf_event_compute_values);
static int perf_event_read_group(struct perf_event *event,
u64 read_format, char __user *buf)
@@ -3684,7 +3703,11 @@ static int perf_event_read_group(struct perf_event *event,
lockdep_assert_held(&ctx->mutex);
- count = perf_event_read_value(leader, &enabled, &running);
+ ret = perf_event_read_values(leader);
+ if (ret)
+ return ret;
+
+ count = perf_event_compute_values(leader, &enabled, &running);
values[n++] = 1 + leader->nr_siblings;
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
@@ -3705,7 +3728,7 @@ static int perf_event_read_group(struct perf_event *event,
list_for_each_entry(sub, &leader->sibling_list, group_entry) {
n = 0;
- values[n++] = perf_event_read_value(sub, &enabled, &running);
+ values[n++] = perf_event_compute_values(sub, &enabled, &running);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(sub);
@@ -3728,7 +3751,8 @@ static int perf_event_read_one(struct perf_event *event,
u64 values[4];
int n = 0;
- values[n++] = perf_event_read_value(event, &enabled, &running);
+ perf_event_read(event);
+ values[n++] = perf_event_compute_values(event, &enabled, &running);
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
values[n++] = enabled;
if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists