lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 14 Mar 2022 18:24:19 +0200
From:   Adrian Hunter <adrian.hunter@...el.com>
To:     "Steinar H. Gunderson" <sesse@...gle.com>
Cc:     Peter Zijlstra <peterz@...radead.org>,
        Ingo Molnar <mingo@...hat.com>,
        Arnaldo Carvalho de Melo <acme@...nel.org>,
        Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
        Jiri Olsa <jolsa@...hat.com>,
        Namhyung Kim <namhyung@...nel.org>,
        linux-perf-users@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] perf intel-pt: Synthesize cycle events

On 11/03/2022 19:42, Steinar H. Gunderson wrote:
> On Fri, Mar 11, 2022 at 11:10:30AM +0200, Adrian Hunter wrote:
>>> @@ -1633,7 +1639,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
>>>  	else
>>>  		sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
>>>  
>>> -	if (ptq->sample_ipc)
>>> +	if (ptq->sample_ipc || pt->sample_cycles)
>> This is not quite right.  ptq->sample_ipc is set to indicate when the
>> cycle count is accurate for the current instruction.  It can be weakened
>> by using "Approx IPC" which was introduced for dlfilter-show-cycles.
>> Probably that approach should be followed for a "cycles" event also.
> 
> Thanks for the review!
> 
> I'm not sure if I understand this entirely. The point of the code here
> is to get sample.cyc_cnt computed, even if we're not sampling IPC.
> I've seen the approx IPC code, but I'm not entirely sure how it
> interacts with this?
> 
>>>  		sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
>>>  	if (sample.cyc_cnt) {
>>>  		sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt;
>>> @@ -1643,8 +1649,30 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
>>>  
>>>  	ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
>> There are variables here that are specific to the "instructions" event, so
>> mixing "cycles" with "instructions" means duplicating those, however maybe
>> it would be better not to allow "y" and "i" options at the same time?
> 
> Given that a decode can easily take an hour, it would be really nice to
> be able to keep y and i at the same time :-) (A long-standing pet peeve
> of mine in perf is that you can't show two events side-by-side;
> oprofile did that back in the day, at least on annotations.)
> 
> What specifically do you mean by duplicating? That we need to calculate
> them twice in a way I don't do in my current patch, or something else?
> It feels like I'm missing some context here.
> 
>>> -	return intel_pt_deliver_synth_event(pt, event, &sample,
>>> -					    pt->instructions_sample_type);
>>> +	if (pt->sample_instructions) {
>>> +		err = intel_pt_deliver_synth_event(pt, event, &sample,
>>> +						   pt->instructions_sample_type);
>>> +		if (err)
>>> +			return err;
>>> +	}
>>> +
>>> +	/*
>>> +	 * NOTE: If not doing sampling (e.g. itrace=y0us), we will in practice
>>> +	 * only see cycles being attributed to branches, since CYC packets
>>> +	 * only are emitted together with other packets are emitted.
>>> +	 * We should perhaps consider spreading it out over everything since
>>> +	 * the last CYC packet, ie., since last time sample.cyc_cnt was nonzero.
>>> +	 */
>>> +	if (pt->sample_cycles && sample.cyc_cnt) {
>>> +		sample.id = ptq->pt->cycles_id;
>>> +		sample.stream_id = ptq->pt->cycles_id;
>> A "cycles" sample needs to set the sample period to the number of cycles since the
>> last "cycles" sample.
> 
> OK. If I understand you right, is this as simple as sample.period =
> sample.cyc_cnt?
> 
> /* Steinar */

Perhaps changing it something like below.  What do you think?


diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 062cd25f7cd7..cec0444277f6 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -215,6 +215,8 @@ struct intel_pt_queue {
 	u64 ipc_cyc_cnt;
 	u64 last_in_insn_cnt;
 	u64 last_in_cyc_cnt;
+	u64 last_cy_insn_cnt;
+	u64 last_cy_cyc_cnt;
 	u64 last_br_insn_cnt;
 	u64 last_br_cyc_cnt;
 	unsigned int cbr_seen;
@@ -1666,12 +1668,11 @@ static void intel_pt_prep_sample(struct intel_pt *pt,
 	}
 }
 
-static int intel_pt_synth_instruction_or_cycle_sample(struct intel_pt_queue *ptq)
+static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
 {
 	struct intel_pt *pt = ptq->pt;
 	union perf_event *event = ptq->event_buf;
 	struct perf_sample sample = { .ip = 0, };
-	int err;
 
 	if (intel_pt_skip_event(pt))
 		return 0;
@@ -1685,7 +1686,7 @@ static int intel_pt_synth_instruction_or_cycle_sample(struct intel_pt_queue *ptq
 	else
 		sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
 
-	if (ptq->sample_ipc || pt->sample_cycles)
+	if (ptq->sample_ipc)
 		sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
 	if (sample.cyc_cnt) {
 		sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt;
@@ -1695,30 +1696,40 @@ static int intel_pt_synth_instruction_or_cycle_sample(struct intel_pt_queue *ptq
 
 	ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
 
-	if (pt->sample_instructions) {
-		err = intel_pt_deliver_synth_event(pt, event, &sample,
-						   pt->instructions_sample_type);
-		if (err)
-			return err;
-	}
+	return intel_pt_deliver_synth_event(pt, event, &sample,
+					    pt->instructions_sample_type);
+}
 
-	/*
-	 * NOTE: If not doing sampling (e.g. itrace=y0us), we will in practice
-	 * only see cycles being attributed to branches, since CYC packets
-	 * only are emitted together with other packets are emitted.
-	 * We should perhaps consider spreading it out over everything since
-	 * the last CYC packet, ie., since last time sample.cyc_cnt was nonzero.
-	 */
-	if (pt->sample_cycles && sample.cyc_cnt) {
-		sample.id = ptq->pt->cycles_id;
-		sample.stream_id = ptq->pt->cycles_id;
-		err = intel_pt_deliver_synth_event(pt, event, &sample,
-						   pt->cycles_sample_type);
-		if (err)
-			return err;
+static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	u64 period;
+
+	if (pt->synth_opts.quick)
+		period = 1;
+	else
+		period = ptq->ipc_cyc_cnt - ptq->last_cy_cyc_cnt;
+
+	if (!period || intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->cycles_id;
+	sample.stream_id = ptq->pt->cycles_id;
+	sample.period = period;
+
+	if (ptq->sample_ipc)
+		sample.cyc_cnt = sample.period;
+	if (sample.cyc_cnt) {
+		sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_cy_insn_cnt;
+		ptq->last_cy_insn_cnt = ptq->ipc_insn_cnt;
+		ptq->last_cy_cyc_cnt = ptq->ipc_cyc_cnt;
 	}
 
-	return 0;
+	return intel_pt_deliver_synth_event(pt, event, &sample, pt->cycles_sample_type);
 }
 
 static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
@@ -2457,11 +2468,17 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
 		}
 	}
 
-	if ((pt->sample_instructions || pt->sample_cycles) &&
-	    (state->type & INTEL_PT_INSTRUCTION)) {
-		err = intel_pt_synth_instruction_or_cycle_sample(ptq);
-		if (err)
-			return err;
+	if (state->type & INTEL_PT_INSTRUCTION) {
+		if (pt->sample_instructions) {
+			err = intel_pt_synth_instruction_sample(ptq);
+			if (err)
+				return err;
+		}
+		if (pt->sample_cycles) {
+			err = intel_pt_synth_cycle_sample(ptq);
+			if (err)
+				return err;
+		}
 	}
 
 	if (pt->sample_transactions && (state->type & INTEL_PT_TRANSACTION)) {


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ