[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1298865151-23656-4-git-send-email-daahern@cisco.com>
Date: Sun, 27 Feb 2011 20:52:28 -0700
From: David Ahern <daahern@...co.com>
To: linux-perf-users@...r.kernel.org, linux-kernel@...r.kernel.org
Cc: acme@...stprotocols.net, mingo@...e.hu, peterz@...radead.org,
fweisbec@...il.com, paulus@...ba.org, tglx@...utronix.de,
David Ahern <daahern@...co.com>
Subject: [PATCH 3/6] perf record: add time-of-day option
Enable data collection for generating time-of-day strings when
printing individual perf samples. This is done by sampling the
realtime clock event with the perf_clock time stamps.
If the realtime-clock event is not available (e.g, older kernels)
fallback to a synthesized event. (I realize there is resistance
to new synthesized events, but it is a simple way to gain this
feature on older kernels without the need to modify the kernel
code).
Signed-off-by: David Ahern <daahern@...co.com>
---
include/linux/perf_event.h | 1 +
kernel/perf_event.c | 19 +++++
tools/perf/Documentation/perf-record.txt | 5 ++
tools/perf/builtin-record.c | 110 +++++++++++++++++++++++++++++-
tools/perf/util/event.c | 1 +
tools/perf/util/event.h | 8 ++
tools/perf/util/evlist.c | 2 +-
tools/perf/util/evlist.h | 2 +
tools/perf/util/session.c | 4 +
tools/perf/util/session.h | 3 +-
10 files changed, 151 insertions(+), 4 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 51a2f34..404b1ee 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -240,6 +240,7 @@ struct perf_event_attr {
#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64)
#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5)
#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *)
+#define PERF_EVENT_IOC_RECORD_SAMPLE _IO('$', 7)
enum perf_event_ioc_flags {
PERF_IOC_FLAG_GROUP = 1U << 0,
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index a25a63d..7999f55 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -3250,6 +3250,7 @@ static struct perf_event *perf_fget_light(int fd, int *fput_needed)
static int perf_event_set_output(struct perf_event *event,
struct perf_event *output_event);
static int perf_event_set_filter(struct perf_event *event, void __user *arg);
+static int perf_event_generate_sample(struct perf_event *event);
static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
@@ -3296,6 +3297,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case PERF_EVENT_IOC_SET_FILTER:
return perf_event_set_filter(event, (void __user *)arg);
+ case PERF_EVENT_IOC_RECORD_SAMPLE:
+ return perf_event_generate_sample(event);
+
default:
return -ENOTTY;
}
@@ -4398,6 +4402,21 @@ exit:
rcu_read_unlock();
}
+/* add a sample to the event stream based on user request */
+static int perf_event_generate_sample(struct perf_event *event)
+{
+ struct perf_sample_data data;
+ struct pt_regs regs;
+
+ perf_fetch_caller_regs(®s);
+ event->pmu->read(event);
+ perf_sample_data_init(&data, 0);
+ data.period = event->hw.last_period;
+ perf_event_output(event, 0, &data, ®s);
+
+ return 0;
+}
+
/*
* read event_id
*/
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 5a520f8..8eb5b0a 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -148,6 +148,11 @@ an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must ha
corresponding events, i.e., they always refer to events defined earlier on the command
line.
+--tod::
+Collect data for time-of-day strings when printing events. This option adds
+reference time samples to the event stream for converting perf timestamps to
+time-of-day.
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index e39883e..ac0717c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -56,6 +56,8 @@ static bool nodelay = false;
static bool raw_samples = false;
static bool sample_id_all_avail = true;
static bool system_wide = false;
+static bool want_tod_data = false;
+static bool synth_reftime = false;
static pid_t target_pid = -1;
static pid_t target_tid = -1;
static pid_t child_pid = -1;
@@ -235,7 +237,7 @@ static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
- if (evlist->nr_entries > 1)
+ if ((evlist->nr_entries > 1) || want_tod_data)
attr->sample_type |= PERF_SAMPLE_ID;
/*
@@ -280,6 +282,12 @@ static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
attr->sample_type |= PERF_SAMPLE_CPU;
}
+ if (want_tod_data) {
+ attr->sample_type |= PERF_SAMPLE_TIME;
+ attr->sample_type |= PERF_SAMPLE_CPU;
+ attr->sample_type |= PERF_SAMPLE_READ;
+ }
+
if (nodelay) {
attr->watermark = 0;
attr->wakeup_events = 1;
@@ -294,6 +302,88 @@ static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
}
}
+static int perf_event__synthesize_reftime(perf_event__handler_t process,
+ struct perf_session *psession)
+{
+ union perf_event ev;
+ struct timespec tp;
+
+ memset(&ev, 0, sizeof(ev));
+
+ /* race here between successive calls, but should be close enough */
+ if (gettimeofday(&ev.reftime.tv, NULL) != 0) {
+ error("gettimeofday failed. Cannot generate reference time.\n");
+ return -1;
+ }
+ if (clock_gettime(CLOCK_MONOTONIC, &tp) != 0) {
+ error("clock_gettime failed. Cannot generate reference time.\n");
+ return -1;
+ }
+ ev.reftime.nsec = (u64) tp.tv_sec * NSEC_PER_SEC + (u64) tp.tv_nsec;
+
+ ev.header.type = PERF_RECORD_REFTIME;
+ ev.header.size = sizeof(ev.reftime);
+
+ return process(&ev, NULL, psession);
+}
+
+static void create_tod_counter(void)
+{
+ int fd;
+ struct perf_event_attr attr;
+ struct perf_evsel *evsel;
+ /* only on 1 cpu */
+ struct cpu_map *cpus = cpu_map__new("0");
+ /* not associated with a process */
+ struct thread_map *threads = thread_map__new(-1, -1);
+
+ struct perf_evsel *first_evsel = list_entry(evsel_list->entries.next,
+ struct perf_evsel, node);
+
+ attr = first_evsel->attr;
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_REALTIME_CLOCK;
+ attr.sample_period = 3600 * NSEC_PER_SEC;
+ attr.freq = 0;
+
+ evsel = perf_evsel__new(&attr, evsel_list->nr_entries);
+ if (!evsel)
+ die("Error: Failed to allocate memory for time counter\n");
+
+ config_attr(evsel, evsel_list);
+
+ if (perf_evsel__open(evsel, cpus, threads, 0, 1) < 0) {
+ if (errno == EINVAL) {
+ if (verbose)
+ warning("Failed to open realtime clock event\n");
+ synth_reftime = true;
+ return;
+ }
+ die("Failed to open realtime clock event\n");
+ }
+
+ if (perf_evsel__alloc_id(evsel, cpus->nr, threads->nr) < 0)
+ die("Failed to allocate an id for realtime-clock event\n");
+
+ fd = FD(evsel, 0, 0);
+ if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, FD(first_evsel, 0, 0)) != 0)
+ die("Failed to add realtime-clock event to output stream\n");
+
+ if (perf_evlist__id_hash(evsel_list, evsel, 0, 0, fd) < 0)
+ die("id_hash failed for realtime-clock event\n");
+
+ create_counter(evsel, 0);
+
+ /* generate first sample - want a sample immediately so
+ * that time conversions are avialable from the get-go.
+ * Let user-specified rate take care of samples after that.
+ */
+ if (ioctl(fd, PERF_EVENT_IOC_RECORD_SAMPLE) != 0)
+ error("failed to generate sample for realtime clock\n");
+
+ return;
+}
+
static void open_counters(struct perf_evlist *evlist)
{
struct perf_evsel *pos;
@@ -335,7 +425,8 @@ try_again:
* Old kernel, no attr->sample_id_type_all field
*/
sample_id_all_avail = false;
- if (!sample_time && !raw_samples && !time_needed)
+ if (!sample_time && !raw_samples
+ && !time_needed && !want_tod_data)
attr->sample_type &= ~PERF_SAMPLE_TIME;
goto retry_sample_id;
@@ -378,6 +469,9 @@ try_again:
list_for_each_entry(pos, &evlist->entries, node)
create_counter(pos, cpu);
}
+
+ if (want_tod_data)
+ create_tod_counter();
}
static int process_buildids(void)
@@ -657,6 +751,16 @@ static int __cmd_record(int argc, const char **argv)
}
}
+ if (synth_reftime) {
+ if (verbose)
+ warning(" ... fall back to synthesized reftime\n");
+
+ if (perf_event__synthesize_reftime(process_synthesized_event,
+ session) != 0)
+ error("Failed to create reftime event. "
+ "Cannot generate wall-clock timestamps\n");
+ }
+
machine = perf_session__find_host_machine(session);
if (!machine) {
pr_err("Couldn't find native kernel information.\n");
@@ -815,6 +919,8 @@ const struct option record_options[] = {
OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
"monitor event in cgroup name only",
parse_cgroups),
+ OPT_BOOLEAN(0, "tod", &want_tod_data,
+ "collect data for time-of-day strings"),
OPT_END()
};
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index fbf5754..6bbd551 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -24,6 +24,7 @@ static const char *perf_event__names[] = {
[PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
[PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID",
[PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND",
+ [PERF_RECORD_REFTIME] = "REF_TIME",
};
const char *perf_event__name(unsigned int id)
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 512a1ca..d4810e0 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -99,6 +99,7 @@ enum perf_user_event_type { /* above any possible kernel type */
PERF_RECORD_HEADER_TRACING_DATA = 66,
PERF_RECORD_HEADER_BUILD_ID = 67,
PERF_RECORD_FINISHED_ROUND = 68,
+ PERF_RECORD_REFTIME = 69,
PERF_RECORD_HEADER_MAX
};
@@ -125,6 +126,12 @@ struct tracing_data_event {
u32 size;
};
+struct reftime_event {
+ struct perf_event_header header;
+ struct timeval tv;
+ u64 nsec;
+};
+
union perf_event {
struct perf_event_header header;
struct ip_event ip;
@@ -138,6 +145,7 @@ union perf_event {
struct event_type_event event_type;
struct tracing_data_event tracing_data;
struct build_id_event build_id;
+ struct reftime_event reftime;
};
void perf_event__print_totals(void);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 95b21fe..bb49243 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -106,7 +106,7 @@ void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
evlist->nr_fds++;
}
-static int perf_evlist__id_hash(struct perf_evlist *evlist, struct perf_evsel *evsel,
+int perf_evlist__id_hash(struct perf_evlist *evlist, struct perf_evsel *evsel,
int cpu, int thread, int fd)
{
struct perf_sample_id *sid;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index c988405..bd73572 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -48,6 +48,8 @@ union perf_event *perf_evlist__read_on_cpu(struct perf_evlist *self, int cpu);
int perf_evlist__alloc_mmap(struct perf_evlist *evlist);
int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite);
void perf_evlist__munmap(struct perf_evlist *evlist);
+int perf_evlist__id_hash(struct perf_evlist *evlist, struct perf_evsel *evsel,
+ int cpu, int thread, int fd);
static inline void perf_evlist__set_maps(struct perf_evlist *evlist,
struct cpu_map *cpus,
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index dc0235b..1ef8e8a 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -791,6 +791,10 @@ static int perf_session__process_user_event(struct perf_session *session, union
return ops->build_id(event, session);
case PERF_RECORD_FINISHED_ROUND:
return ops->finished_round(event, session, ops);
+ case PERF_RECORD_REFTIME:
+ if (ops->reftime)
+ return ops->reftime(event, session);
+ return -EINVAL;
default:
return -EINVAL;
}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 212f810..b46672a 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -78,7 +78,8 @@ struct perf_event_ops {
event_synth_op attr,
event_type,
tracing_data,
- build_id;
+ build_id,
+ reftime;
event_op2 finished_round;
bool ordered_samples;
bool ordering_requires_timestamps;
--
1.7.4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists