linux-kernel - Re: [PATCH v6 2/3]: perf/core: use context tstamp

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <c052d929-db1d-8b31-4a62-07563d48063b@linux.intel.com>
Date:   Thu, 10 Aug 2017 18:57:43 +0300
From:   Alexey Budankov <alexey.budankov@...ux.intel.com>
To:     Peter Zijlstra <peterz@...radead.org>
Cc:     Ingo Molnar <mingo@...hat.com>,
        Arnaldo Carvalho de Melo <acme@...nel.org>,
        Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
        Andi Kleen <ak@...ux.intel.com>,
        Kan Liang <kan.liang@...el.com>,
        Dmitri Prokhorov <Dmitry.Prohorov@...el.com>,
        Valery Cherepennikov <valery.cherepennikov@...el.com>,
        Mark Rutland <mark.rutland@....com>,
        Stephane Eranian <eranian@...gle.com>,
        David Carrillo-Cisneros <davidcc@...gle.com>,
        linux-kernel <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH v6 2/3]: perf/core: use context tstamp_data for skipped
 events on mux interrupt

Well, Ok.

I re-implemented this patch of patch set and also implemented a unit 
test that IMHO mimics the case mentioned above to check if it solves the issue.

The test is a single thread application that creates 272 fds for every of  
two INSTRUCTIONS_RETIRED events covering 272 cores of Intel Xeon Phi. 

The test simultaneously executes 1 million of instructions several times 
when the events are enabled and reads the events' counts, TOTAL_ENABLED and 
TOTAL_RUNNING over read() system call.

The first event is allocated, enabled and disabled at the beginning and 
the end of the test execution phase where as the second event allocation and
measurements are included into the measurement interval of the first event.

Below is what I am getting now when running the test on the patched kernel:

EID	CPU	COUNT		T_ENABLED	T_RUNNING	 SCALE
--- 0 enabled ---
--- million instructions ---
0	26	1334		1666671		137366		  8.24
0	94	138637		1735872		527098		 30.37
0	162	874166		1823695		1083785		 59.43
--- million instructions ---
0	26	1334		3328832		137366		  4.13
0	94	1164146		3318599		2109825		 63.58
0	162	874166		3390716		1083785		 31.96
--- million instructions ---
0	26	1334		4835955		137366		  2.84
0	94	2189671		4820750		3611976		 74.93
0	162	874166		4934513		1083785		 21.96
--- 1 enabled ---
--- million instructions ---
0	26	1334		22661310	137366		  0.61
0	94	3248450		22667165	21458391	 94.67
0	162	874166		22742990	1083785		  4.77
1	94	1033387		2150307		2150307		100.00
--- million instructions ---
0	26	1334		24878504	137366		  0.55
0	94	4289784		24869644	23660870	 95.14
0	162	874166		24943564	1083785		  4.34
1	94	2074675		4370708		4370708		100.00
--- 1 disabled ---
--- million instructions ---
0	26	1334		27681611	137366		  0.50
0	94	5337752		27675968	26467194	 95.63
0	162	874166		27749528	1083785		  3.91
1	94	2089278		5024218		5024218		100.00
--- 0 disabled ---
--- million instructions ---
0	26	1334		29004868	137366		  0.47
0	94	5372734		28964075	27755301	 95.83
0	162	874166		29005751	1083785		  3.74
1	94	2089278		5024218		5024218		100.00

The output demonstrates that test thread migrated two time during execution
thus several fds were employed for measuring amount of executed instructions.

Also the output shows that T_RUNNING values of events updated and
maintained so that sums of SCALE values for every event are near 100% 
(no multiplexing) after every million instructions execution.

Unit test code is attached for convenience.

The key thing in the patch is explicit updating of tstamp fields for
INACTIVE events in update_event_times().

Signed-off-by: Alexey Budankov <alexey.budankov@...ux.intel.com>
---
kernel/events/core.c | 52 +++++++++++++++++++++++++++++++---------------------
 1 file changed, 31 insertions(+), 21 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0a4f619..d195fdc 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1391,6 +1391,27 @@ static u64 perf_event_time(struct perf_event *event)
 	return ctx ? ctx->time : 0;
 }
 
+void perf_event_tstamp_init(struct perf_event *event)
+{
+	u64 tstamp = perf_event_time(event);
+
+	event->tstamp_enabled = tstamp;
+	event->tstamp_running = tstamp;
+	event->tstamp_stopped = tstamp;
+}
+
+void perf_event_tstamp_update(struct perf_event *event)
+{
+	u64 tstamp, delta;
+
+	tstamp = perf_event_time(event);
+
+	delta = tstamp - event->tstamp_stopped;
+
+	event->tstamp_running += delta;
+	event->tstamp_stopped = tstamp;
+}
+
 /*
  * Update the total_time_enabled and total_time_running fields for a event.
  */
@@ -1405,6 +1426,9 @@ static void update_event_times(struct perf_event *event)
 	    event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
 		return;
 
+	if (event->state == PERF_EVENT_STATE_INACTIVE)
+		perf_event_tstamp_update(event);
+
 	/*
 	 * in cgroup mode, time_enabled represents
 	 * the time the event was enabled AND active
@@ -1430,7 +1454,6 @@ static void update_event_times(struct perf_event *event)
 		run_end = perf_event_time(event);
 
 	event->total_time_running = run_end - event->tstamp_running;
-
 }
 
 /*
@@ -1954,9 +1977,6 @@ event_sched_out(struct perf_event *event,
 		  struct perf_cpu_context *cpuctx,
 		  struct perf_event_context *ctx)
 {
-	u64 tstamp = perf_event_time(event);
-	u64 delta;
-
 	WARN_ON_ONCE(event->ctx != ctx);
 	lockdep_assert_held(&ctx->lock);
 
@@ -1967,18 +1987,15 @@ event_sched_out(struct perf_event *event,
 	 * via read() for time_enabled, time_running:
 	 */
 	if (event->state == PERF_EVENT_STATE_INACTIVE &&
-	    !event_filter_match(event)) {
-		delta = tstamp - event->tstamp_stopped;
-		event->tstamp_running += delta;
-		event->tstamp_stopped = tstamp;
-	}
+	    !event_filter_match(event))
+		perf_event_tstamp_update(event);
 
 	if (event->state != PERF_EVENT_STATE_ACTIVE)
 		return;
 
 	perf_pmu_disable(event->pmu);
 
-	event->tstamp_stopped = tstamp;
+	event->tstamp_stopped = perf_event_time(event);
 	event->pmu->del(event, 0);
 	event->oncpu = -1;
 	event->state = PERF_EVENT_STATE_INACTIVE;
@@ -2294,7 +2311,6 @@ group_sched_in(struct perf_event *group_event,
 {
 	struct perf_event *event, *partial_group = NULL;
 	struct pmu *pmu = ctx->pmu;
-	u64 now = ctx->time;
 	bool simulate = false;
 
 	if (group_event->state == PERF_EVENT_STATE_OFF)
@@ -2340,12 +2356,10 @@ group_sched_in(struct perf_event *group_event,
 		if (event == partial_group)
 			simulate = true;
 
-		if (simulate) {
-			event->tstamp_running += now - event->tstamp_stopped;
-			event->tstamp_stopped = now;
-		} else {
+		if (simulate)
+			perf_event_tstamp_update(event);
+		else
 			event_sched_out(event, cpuctx, ctx);
-		}
 	}
 	event_sched_out(group_event, cpuctx, ctx);
 
@@ -2390,13 +2404,9 @@ static int group_can_go_on(struct perf_event *event,
 static void add_event_to_ctx(struct perf_event *event,
 			       struct perf_event_context *ctx)
 {
-	u64 tstamp = perf_event_time(event);
-
 	list_add_event(event, ctx);
 	perf_group_attach(event);
-	event->tstamp_enabled = tstamp;
-	event->tstamp_running = tstamp;
-	event->tstamp_stopped = tstamp;
+	perf_event_tstamp_init(event);
 }
 
 static void ctx_sched_out(struct perf_event_context *ctx,

View attachment "check_multiplexing_read.c" of type "text/plain" (3294 bytes)