linux-kernel - Re: perf: multiplexing broken on amd fam16h?

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 22 Jul 2015 14:22:08 +0200
From:	Peter Zijlstra <peterz@...radead.org>
To:	Vince Weaver <vincent.weaver@...ne.edu>
Cc:	linux-kernel@...r.kernel.org, Ingo Molnar <mingo@...hat.com>,
	Arnaldo Carvalho de Melo <acme@...nel.org>
Subject: Re: perf: multiplexing broken on amd fam16h?

On Tue, Jul 21, 2015 at 04:08:28PM -0400, Vince Weaver wrote:
> On Tue, 21 Jul 2015, Peter Zijlstra wrote:
> 
> > On Tue, Jul 21, 2015 at 02:14:54PM -0400, Vince Weaver wrote:
> > > Hello
> > > 
> > > I have a new AMD A8 system and perf multiplexing support is acting weird.
> > > 
> > > If I run the perf_event_tests check_multiplexing test it fails like this:
> > > 
> > > Event	TotalCount	RawCount	Scale					ScaledCount	Error
> > > 0	500282143	150403975	-2.00 (c8050b5 fffffffff9c224ef)	-300807950	160.13%
> > > 
> > > So the problem is that the TOTAL_TIME_RUNNING value is negative, which as 
> > > far as I know shouldn't be possible.  
> > 
> > Correct, that should not happen. I don't have a Fam16 machine, but I can
> > try and stare at the code a wee bit. Although there's another few perf
> > issues I am still looking into as well :/
> 
> well I've traced it back to see that in
> 	update_event_times()
> run_end is less than event->tstamp_running, which is why the result
> is negative.
> 
> [ 1279.857205] VMW: 1 run_end=1279846999345 tstamp_running=1279929016607
> [ 1279.857222] VMW: 0 run_end=1279855184453 tstamp_running=1279931111760
> 
> It seems to happen both in the PERF_EVENT_STATE_INACTIVE and 
> !PERF_EVENT_STATE_INACTIVE cases.
> 
> tracking back why those values are wrong is proving to be a bit trickier.

Can you run with something like the below?

---
 kernel/events/core.c | 79 ++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 58 insertions(+), 21 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index d3dae3419b99..cd96bb9b1c72 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1115,15 +1115,26 @@ static void perf_unpin_context(struct perf_event_context *ctx)
 	raw_spin_unlock_irqrestore(&ctx->lock, flags);
 }
 
+static void start_context_time(struct perf_event_context *ctx)
+{
+	u64 now = perf_clock();
+
+	ctx->timestamp = now;
+	trace_printk("ctx: %p now: %Lu\n", ctx, now);
+}
+
 /*
  * Update the record of the current time in a context.
  */
 static void update_context_time(struct perf_event_context *ctx)
 {
 	u64 now = perf_clock();
+	u64 delta = now - ctx->timestamp;
 
-	ctx->time += now - ctx->timestamp;
+	ctx->time += delta;
 	ctx->timestamp = now;
+	trace_printk("ctx: %p now: %Lu time: %Lu delta: %Lu\n",
+			ctx, now, ctx->time, delta);
 }
 
 static u64 perf_event_time(struct perf_event *event)
@@ -1167,6 +1178,10 @@ static void update_event_times(struct perf_event *event)
 
 	event->total_time_enabled = run_end - event->tstamp_enabled;
 
+	trace_printk("event: %p time: %Lu stopped: %Lu end: %Lu enabled: %Lu\n",
+			event, ctx->time, event->tstamp_stopped, 
+			run_end, event->tstamp_enabled);
+
 	if (event->state == PERF_EVENT_STATE_INACTIVE)
 		run_end = event->tstamp_stopped;
 	else
@@ -1174,6 +1189,9 @@ static void update_event_times(struct perf_event *event)
 
 	event->total_time_running = run_end - event->tstamp_running;
 
+	trace_printk("event: %p time: %Lu stopped: %Lu end: %Lu running: %Lu\n",
+			event, ctx->time, event->tstamp_stopped, 
+			run_end, event->tstamp_running);
 }
 
 /*
@@ -1526,21 +1544,25 @@ event_sched_out(struct perf_event *event,
 	WARN_ON_ONCE(event->ctx != ctx);
 	lockdep_assert_held(&ctx->lock);
 
-	/*
-	 * An event which could not be activated because of
-	 * filter mismatch still needs to have its timings
-	 * maintained, otherwise bogus information is return
-	 * via read() for time_enabled, time_running:
-	 */
-	if (event->state == PERF_EVENT_STATE_INACTIVE
-	    && !event_filter_match(event)) {
-		delta = tstamp - event->tstamp_stopped;
-		event->tstamp_running += delta;
-		event->tstamp_stopped = tstamp;
-	}
+	if (event->state != PERF_EVENT_STATE_ACTIVE) {
+		/*
+		 * An event which could not be activated because of
+		 * filter mismatch still needs to have its timings
+		 * maintained, otherwise bogus information is return
+		 * via read() for time_enabled, time_running:
+		 */
+		if (event->state == PERF_EVENT_STATE_INACTIVE &&
+		    !event_filter_match(event)) {
+			delta = tstamp - event->tstamp_stopped;
+			event->tstamp_running += delta;
+			event->tstamp_stopped = tstamp;
+
+			trace_printk("event: %p filter-update: time: %Lu running: %Lu delta: %Lu\n",
+					event, tstamp, event->tstamp_running, delta);
+		}
 
-	if (event->state != PERF_EVENT_STATE_ACTIVE)
 		return;
+	}
 
 	perf_pmu_disable(event->pmu);
 
@@ -1550,6 +1572,9 @@ event_sched_out(struct perf_event *event,
 		event->state = PERF_EVENT_STATE_OFF;
 	}
 	event->tstamp_stopped = tstamp;
+
+	trace_printk("event: %p time: %Lu\n", event, tstamp);
+
 	event->pmu->del(event, 0);
 	event->oncpu = -1;
 
@@ -1870,6 +1895,9 @@ event_sched_in(struct perf_event *event,
 
 	event->tstamp_running += tstamp - event->tstamp_stopped;
 
+	trace_printk("event: %p time: %Lu stopped: %Lu running: %Lu\n",
+			event, tstamp, event->tstamp_stopped, event->tstamp_running);
+
 	perf_set_shadow_time(event, ctx, tstamp);
 
 	perf_log_itrace_start(event);
@@ -1907,7 +1935,7 @@ group_sched_in(struct perf_event *group_event,
 {
 	struct perf_event *event, *partial_group = NULL;
 	struct pmu *pmu = ctx->pmu;
-	u64 now = ctx->time;
+	u64 tstamp = perf_event_time(group_event);
 	bool simulate = false;
 
 	if (group_event->state == PERF_EVENT_STATE_OFF)
@@ -1954,8 +1982,11 @@ group_sched_in(struct perf_event *group_event,
 			simulate = true;
 
 		if (simulate) {
-			event->tstamp_running += now - event->tstamp_stopped;
-			event->tstamp_stopped = now;
+			event->tstamp_running += tstamp - event->tstamp_stopped;
+			event->tstamp_stopped = tstamp;
+			trace_printk("event: %p simulate: time: %Lu running: %Lu stopped: %Lu\n",
+					event, tstamp, event->tstamp_running,
+					event->tstamp_stopped);
 		} else {
 			event_sched_out(event, cpuctx, ctx);
 		}
@@ -2010,6 +2041,7 @@ static void add_event_to_ctx(struct perf_event *event,
 	event->tstamp_enabled = tstamp;
 	event->tstamp_running = tstamp;
 	event->tstamp_stopped = tstamp;
+	trace_printk("event: %p time: %Lu\n", event, tstamp);
 }
 
 static void task_ctx_sched_out(struct perf_event_context *ctx);
@@ -2153,9 +2185,11 @@ perf_install_in_context(struct perf_event_context *ctx,
 
 /*
  * Put a event into inactive state and update time fields.
+ *
  * Enabling the leader of a group effectively enables all
  * the group members that aren't explicitly disabled, so we
  * have to update their ->tstamp_enabled also.
+ *
  * Note: this works for group members as well as group leaders
  * since the non-leader members' sibling_lists will be empty.
  */
@@ -2166,9 +2200,14 @@ static void __perf_event_mark_enabled(struct perf_event *event)
 
 	event->state = PERF_EVENT_STATE_INACTIVE;
 	event->tstamp_enabled = tstamp - event->total_time_enabled;
+	trace_printk("event: %p time: %Lu enabled: %Lu\n",
+			event, tstamp, event->tstamp_enabled);
 	list_for_each_entry(sub, &event->sibling_list, group_entry) {
-		if (sub->state >= PERF_EVENT_STATE_INACTIVE)
+		if (sub->state >= PERF_EVENT_STATE_INACTIVE) {
 			sub->tstamp_enabled = tstamp - sub->total_time_enabled;
+			trace_printk("event: %p time: %Lu enabled: %Lu\n",
+					sub, tstamp, sub->tstamp_enabled);
+		}
 	}
 }
 
@@ -2741,15 +2780,13 @@ ctx_sched_in(struct perf_event_context *ctx,
 	     enum event_type_t event_type,
 	     struct task_struct *task)
 {
-	u64 now;
 	int is_active = ctx->is_active;
 
 	ctx->is_active |= event_type;
 	if (likely(!ctx->nr_events))
 		return;
 
-	now = perf_clock();
-	ctx->timestamp = now;
+	start_context_time(ctx);
 	perf_cgroup_set_timestamp(task, ctx);
 	/*
 	 * First go through the list and put on any pinned groups
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/