[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1238508032.8530.278.camel@twins>
Date: Tue, 31 Mar 2009 16:00:32 +0200
From: Peter Zijlstra <a.p.zijlstra@...llo.nl>
To: Paul Mackerras <paulus@...ba.org>
Cc: Corey Ashford <cjashfor@...ux.vnet.ibm.com>,
Ingo Molnar <mingo@...e.hu>, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 13/15] perf_counter: provide generic callchain bits
On Tue, 2009-03-31 at 20:12 +1100, Paul Mackerras wrote:
> Corey Ashford writes:
>
> > If this is the case, what is the exact meaning of PERF_COUNTER_SIMPLE
> > now? and PERF_COUNTER_GROUP? One simplification would be that reading
> > the fd of a group leader would always read up all of the counters in the
> > group (along with their enabled and running times if those bits are
> > set), and that reading a single counter's fd would yield only that
> > counter's values and times (if enabled). In effect, these two values,
> > PERF_COUNTER_GROUP and PERF_COUNTER_SIMPLE would no longer be necessary
> > at all. Other bits would be used to determine what's in the mmap'd samples.
>
> Now that events are only read through mmap, it's quite simple -
> hw_event.read_format controls what read() gives you, and
> hw_event.record_type controls what gets put into the pages that you
> get with mmap.
>
> Currently read_format and record_type don't use the same set of bit
> definitions. Maybe they should? I don't have a strong feeling about
> it, but that might be a nice simplification.
Something like the below?
That still has the record and read things separate, but as one unified
overflow output.
I reduced record and read fields to 32bits, as the output type only has
32bits.
diff did a wonderful job making the patch unreadable :/
---
Subject:
From: Peter Zijlstra <a.p.zijlstra@...llo.nl>
Date: Tue Mar 31 15:13:36 CEST 2009
Signed-off-by: Peter Zijlstra <a.p.zijlstra@...llo.nl>
---
include/linux/perf_counter.h | 37 +++++++---------
kernel/perf_counter.c | 99 ++++++++++++++++---------------------------
2 files changed, 57 insertions(+), 79 deletions(-)
Index: linux-2.6/include/linux/perf_counter.h
===================================================================
--- linux-2.6.orig/include/linux/perf_counter.h
+++ linux-2.6/include/linux/perf_counter.h
@@ -73,15 +73,6 @@ enum sw_event_ids {
PERF_SW_EVENTS_MAX = 7,
};
-/*
- * IRQ-notification data record type:
- */
-enum perf_counter_record_type {
- PERF_RECORD_SIMPLE = 0,
- PERF_RECORD_IRQ = 1,
- PERF_RECORD_GROUP = 2,
-};
-
#define __PERF_COUNTER_MASK(name) \
(((1ULL << PERF_COUNTER_##name##_BITS) - 1) << \
PERF_COUNTER_##name##_SHIFT)
@@ -103,6 +94,17 @@ enum perf_counter_record_type {
#define PERF_COUNTER_EVENT_MASK __PERF_COUNTER_MASK(EVENT)
/*
+ * Bits that can be set in hw_event.record_type to request information
+ * in the overflow packets.
+ */
+enum perf_counter_record_format {
+ PERF_RECORD_IP = 1U << 0,
+ PERF_RECORD_TID = 1U << 1,
+ PERF_RECORD_GROUP = 1U << 2,
+ PERF_RECORD_CALLCHAIN = 1U << 3,
+};
+
+/*
* Bits that can be set in hw_event.read_format to request that
* reads on the counter should return the indicated quantities,
* in increasing order of bit value, after the counter value.
@@ -125,8 +127,8 @@ struct perf_counter_hw_event {
__u64 config;
__u64 irq_period;
- __u64 record_type;
- __u64 read_format;
+ __u32 record_type;
+ __u32 read_format;
__u64 disabled : 1, /* off by default */
nmi : 1, /* NMI sampling */
@@ -137,12 +139,10 @@ struct perf_counter_hw_event {
exclude_kernel : 1, /* ditto kernel */
exclude_hv : 1, /* ditto hypervisor */
exclude_idle : 1, /* don't count when idle */
- include_tid : 1, /* include the tid */
mmap : 1, /* include mmap data */
munmap : 1, /* include munmap data */
- callchain : 1, /* add callchain data */
- __reserved_1 : 51;
+ __reserved_1 : 53;
__u32 extra_config_len;
__u32 __reserved_4;
@@ -212,15 +212,14 @@ struct perf_event_header {
enum perf_event_type {
- PERF_EVENT_GROUP = 1,
-
- PERF_EVENT_MMAP = 2,
- PERF_EVENT_MUNMAP = 3,
+ PERF_EVENT_MMAP = 1,
+ PERF_EVENT_MUNMAP = 2,
PERF_EVENT_OVERFLOW = 1UL << 31,
__PERF_EVENT_IP = 1UL << 30,
__PERF_EVENT_TID = 1UL << 29,
- __PERF_EVENT_CALLCHAIN = 1UL << 28,
+ __PERF_EVENT_GROUP = 1UL << 28,
+ __PERF_EVENT_CALLCHAIN = 1UL << 27,
};
#ifdef __KERNEL__
Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -1765,27 +1765,34 @@ static void perf_output_end(struct perf_
rcu_read_unlock();
}
-static void perf_output_simple(struct perf_counter *counter,
- int nmi, struct pt_regs *regs)
+void perf_counter_output(struct perf_counter *counter,
+ int nmi, struct pt_regs *regs)
{
int ret;
+ u64 record_type = counter->hw_event.record_type;
struct perf_output_handle handle;
struct perf_event_header header;
u64 ip;
struct {
u32 pid, tid;
} tid_entry;
+ struct {
+ u64 event;
+ u64 counter;
+ } group_entry;
struct perf_callchain_entry *callchain = NULL;
int callchain_size = 0;
header.type = PERF_EVENT_OVERFLOW;
header.size = sizeof(header);
- ip = instruction_pointer(regs);
- header.type |= __PERF_EVENT_IP;
- header.size += sizeof(ip);
+ if (record_type & PERF_RECORD_IP) {
+ ip = instruction_pointer(regs);
+ header.type |= __PERF_EVENT_IP;
+ header.size += sizeof(ip);
+ }
- if (counter->hw_event.include_tid) {
+ if (record_type & PERF_RECORD_TID) {
/* namespace issues */
tid_entry.pid = current->group_leader->pid;
tid_entry.tid = current->pid;
@@ -1794,7 +1801,13 @@ static void perf_output_simple(struct pe
header.size += sizeof(tid_entry);
}
- if (counter->hw_event.callchain) {
+ if (record_type & PERF_RECORD_GROUP) {
+ header.type |= __PERF_EVENT_GROUP;
+ header.size += sizeof(u64) +
+ counter->nr_siblings * sizeof(group_entry);
+ }
+
+ if (record_type & PERF_RECORD_CALLCHAIN) {
callchain = perf_callchain(regs);
if (callchain) {
@@ -1810,69 +1823,35 @@ static void perf_output_simple(struct pe
return;
perf_output_put(&handle, header);
- perf_output_put(&handle, ip);
- if (counter->hw_event.include_tid)
- perf_output_put(&handle, tid_entry);
+ if (record_type & PERF_RECORD_IP)
+ perf_output_put(&handle, ip);
- if (callchain)
- perf_output_copy(&handle, callchain, callchain_size);
-
- perf_output_end(&handle);
-}
-
-static void perf_output_group(struct perf_counter *counter, int nmi)
-{
- struct perf_output_handle handle;
- struct perf_event_header header;
- struct perf_counter *leader, *sub;
- unsigned int size;
- struct {
- u64 event;
- u64 counter;
- } entry;
- int ret;
-
- size = sizeof(header) + counter->nr_siblings * sizeof(entry);
+ if (record_type & PERF_RECORD_TID)
+ perf_output_put(&handle, tid_entry);
- ret = perf_output_begin(&handle, counter, size, nmi);
- if (ret)
- return;
+ if (record_type & PERF_RECORD_GROUP) {
+ struct perf_counter *leader, *sub;
+ u64 nr = counter->nr_siblings;
- header.type = PERF_EVENT_GROUP;
- header.size = size;
+ perf_output_put(&handle, nr);
- perf_output_put(&handle, header);
+ leader = counter->group_leader;
+ list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+ if (sub != counter)
+ sub->hw_ops->read(sub);
- leader = counter->group_leader;
- list_for_each_entry(sub, &leader->sibling_list, list_entry) {
- if (sub != counter)
- sub->hw_ops->read(sub);
+ group_entry.event = sub->hw_event.config;
+ group_entry.counter = atomic64_read(&sub->count);
- entry.event = sub->hw_event.config;
- entry.counter = atomic64_read(&sub->count);
-
- perf_output_put(&handle, entry);
+ perf_output_put(&handle, group_entry);
+ }
}
- perf_output_end(&handle);
-}
-
-void perf_counter_output(struct perf_counter *counter,
- int nmi, struct pt_regs *regs)
-{
- switch (counter->hw_event.record_type) {
- case PERF_RECORD_SIMPLE:
- return;
-
- case PERF_RECORD_IRQ:
- perf_output_simple(counter, nmi, regs);
- break;
+ if (callchain)
+ perf_output_copy(&handle, callchain, callchain_size);
- case PERF_RECORD_GROUP:
- perf_output_group(counter, nmi);
- break;
- }
+ perf_output_end(&handle);
}
/*
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists