lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4c095bcf-8877-dad6-e291-48e5d025b09c@intel.com>
Date:   Thu, 29 Jun 2017 22:56:25 +0300
From:   Adrian Hunter <adrian.hunter@...el.com>
To:     Arnaldo Carvalho de Melo <acme@...nel.org>
Cc:     Andi Kleen <ak@...ux.intel.com>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH V2 25/37] perf script: Add synthesized Intel PT power and
 ptwrite events

On 06/28/2017 11:26 PM, Arnaldo Carvalho de Melo wrote:
> Em Wed, Jun 28, 2017 at 08:21:37PM +0000, Hunter, Adrian escreveu:
>> Sorry for the top-post...
>>
>> Yeah, I've now mixed up the variable attribute:
>>
>> 	https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#Common-Variable-Attributes
>>  
>> with the type attribute:
>>
>> 	https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#Common-Type-Attributes
>>
>> Late here, so maybe it will make more sense tomorrow.
> 
> Right, and I've not been able to focus on this, but I think the problem
> is with packed mixed with unnamed unions :-\

Another possibility is to avoid packed altogether e.g.

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index ea8534dd44b6..83cdc0a61fd6 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1216,10 +1216,10 @@ static void print_sample_pt_spacing(int len)
 
 static void print_sample_synth_ptwrite(struct perf_sample *sample)
 {
-	struct perf_synth_intel_ptwrite *data = sample->raw_data;
+	struct perf_synth_intel_ptwrite *data = perf_sample__synth_ptr(sample);
 	int len;
 
-	if (sample->raw_size < sizeof(*data))
+	if (perf_sample__bad_synth_size(sample, *data))
 		return;
 
 	len = printf(" IP: %u payload: %#" PRIx64 " ",
@@ -1229,10 +1229,10 @@ static void print_sample_synth_ptwrite(struct perf_sample *sample)
 
 static void print_sample_synth_mwait(struct perf_sample *sample)
 {
-	struct perf_synth_intel_mwait *data = sample->raw_data;
+	struct perf_synth_intel_mwait *data = perf_sample__synth_ptr(sample);
 	int len;
 
-	if (sample->raw_size < sizeof(*data))
+	if (perf_sample__bad_synth_size(sample, *data))
 		return;
 
 	len = printf(" hints: %#x extensions: %#x ",
@@ -1242,10 +1242,10 @@ static void print_sample_synth_mwait(struct perf_sample *sample)
 
 static void print_sample_synth_pwre(struct perf_sample *sample)
 {
-	struct perf_synth_intel_pwre *data = sample->raw_data;
+	struct perf_synth_intel_pwre *data = perf_sample__synth_ptr(sample);
 	int len;
 
-	if (sample->raw_size < sizeof(*data))
+	if (perf_sample__bad_synth_size(sample, *data))
 		return;
 
 	len = printf(" hw: %u cstate: %u sub-cstate: %u ",
@@ -1255,10 +1255,10 @@ static void print_sample_synth_pwre(struct perf_sample *sample)
 
 static void print_sample_synth_exstop(struct perf_sample *sample)
 {
-	struct perf_synth_intel_exstop *data = sample->raw_data;
+	struct perf_synth_intel_exstop *data = perf_sample__synth_ptr(sample);
 	int len;
 
-	if (sample->raw_size < sizeof(*data))
+	if (perf_sample__bad_synth_size(sample, *data))
 		return;
 
 	len = printf(" IP: %u ", data->ip);
@@ -1267,10 +1267,10 @@ static void print_sample_synth_exstop(struct perf_sample *sample)
 
 static void print_sample_synth_pwrx(struct perf_sample *sample)
 {
-	struct perf_synth_intel_pwrx *data = sample->raw_data;
+	struct perf_synth_intel_pwrx *data = perf_sample__synth_ptr(sample);
 	int len;
 
-	if (sample->raw_size < sizeof(*data))
+	if (perf_sample__bad_synth_size(sample, *data))
 		return;
 
 	len = printf(" deepest cstate: %u last cstate: %u wake reason: %#x ",
@@ -1281,11 +1281,11 @@ static void print_sample_synth_pwrx(struct perf_sample *sample)
 
 static void print_sample_synth_cbr(struct perf_sample *sample)
 {
-	struct perf_synth_intel_cbr *data = sample->raw_data;
+	struct perf_synth_intel_cbr *data = perf_sample__synth_ptr(sample);
 	unsigned int percent, freq;
 	int len;
 
-	if (sample->raw_size < sizeof(*data))
+	if (perf_sample__bad_synth_size(sample, *data))
 		return;
 
 	freq = (le32_to_cpu(data->freq) + 500) / 1000;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index d93a6825ce09..9967c87af7a6 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -266,11 +266,16 @@ enum perf_synth_id {
 };
 
 /*
- * Raw data formats for synthesized events. Note that raw data plus the raw data
- * size (4 bytes) must align to 8-bytes.
+ * Raw data formats for synthesized events. Note that 4 bytes of padding are
+ * present to match the 'size' member of PERF_SAMPLE_RAW data which is always
+ * 8-byte aligned. That means we must dereference raw_data with an offset of 4.
+ * Refer perf_sample__synth_ptr() and perf_synth__raw_data().  It also means the
+ * structure sizes are 4 bytes bigger than the raw_size, refer
+ * perf_synth__raw_size().
  */
 
 struct perf_synth_intel_ptwrite {
+	u32 padding;
 	union {
 		struct {
 			u32	ip		:  1,
@@ -279,9 +284,10 @@ struct perf_synth_intel_ptwrite {
 		u32	flags;
 	};
 	u64	payload;
-} __packed;
+};
 
 struct perf_synth_intel_mwait {
+	u32 padding;
 	u32 reserved;
 	union {
 		struct {
@@ -292,9 +298,10 @@ struct perf_synth_intel_mwait {
 		};
 		u64	payload;
 	};
-} __packed;
+};
 
 struct perf_synth_intel_pwre {
+	u32 padding;
 	u32 reserved;
 	union {
 		struct {
@@ -306,9 +313,10 @@ struct perf_synth_intel_pwre {
 		};
 		u64	payload;
 	};
-} __packed;
+};
 
 struct perf_synth_intel_exstop {
+	u32 padding;
 	union {
 		struct {
 			u32	ip		:  1,
@@ -319,6 +327,7 @@ struct perf_synth_intel_exstop {
 };
 
 struct perf_synth_intel_pwrx {
+	u32 padding;
 	u32 reserved;
 	union {
 		struct {
@@ -329,9 +338,10 @@ struct perf_synth_intel_pwrx {
 		};
 		u64	payload;
 	};
-} __packed;
+};
 
 struct perf_synth_intel_cbr {
+	u32 padding;
 	union {
 		struct {
 			u32	cbr		:  8,
@@ -346,6 +356,24 @@ struct perf_synth_intel_cbr {
 };
 
 /*
+ * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get
+ * 8-byte alignment.
+ */
+static inline void *perf_sample__synth_ptr(struct perf_sample *sample)
+{
+	return sample->raw_data - 4;
+}
+
+static inline void *perf_synth__raw_data(void *p)
+{
+	return p + 4;
+}
+
+#define perf_synth__raw_size(d) (sizeof(d) - 4)
+
+#define perf_sample__bad_synth_size(s, d) ((s)->raw_size < sizeof(d) - 4)
+
+/*
  * The kernel collects the number of events it couldn't send in a stretch and
  * when possible sends this number in a PERF_RECORD_LOST event. The number of
  * such "chunks" of lost events is stored in .nr_events[PERF_EVENT_LOST] while
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 754e92ee6c3e..b58f9fd1e2ee 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1283,8 +1283,8 @@ static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq)
 	raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
 	raw.payload = cpu_to_le64(ptq->state->ptw_payload);
 
-	sample.raw_size = sizeof(raw);
-	sample.raw_data = &raw;
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
 
 	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
 					    pt->ptwrites_sample_type);
@@ -1311,8 +1311,8 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
 	raw.freq = cpu_to_le32(raw.cbr * pt->cbr2khz);
 	raw.reserved3 = 0;
 
-	sample.raw_size = sizeof(raw);
-	sample.raw_data = &raw;
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
 
 	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
 					    pt->pwr_events_sample_type);
@@ -1336,8 +1336,8 @@ static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
 	raw.reserved = 0;
 	raw.payload = cpu_to_le64(ptq->state->mwait_payload);
 
-	sample.raw_size = sizeof(raw);
-	sample.raw_data = &raw;
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
 
 	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
 					    pt->pwr_events_sample_type);
@@ -1361,8 +1361,8 @@ static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
 	raw.reserved = 0;
 	raw.payload = cpu_to_le64(ptq->state->pwre_payload);
 
-	sample.raw_size = sizeof(raw);
-	sample.raw_data = &raw;
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
 
 	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
 					    pt->pwr_events_sample_type);
@@ -1386,8 +1386,8 @@ static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
 	raw.flags = 0;
 	raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
 
-	sample.raw_size = sizeof(raw);
-	sample.raw_data = &raw;
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
 
 	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
 					    pt->pwr_events_sample_type);
@@ -1411,8 +1411,8 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
 	raw.reserved = 0;
 	raw.payload = cpu_to_le64(ptq->state->pwrx_payload);
 
-	sample.raw_size = sizeof(raw);
-	sample.raw_data = &raw;
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
 
 	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
 					    pt->pwr_events_sample_type);



>  
>> -----Original Message-----
>> From: Arnaldo Carvalho de Melo [mailto:acme@...nel.org] 
>> Sent: Wednesday, June 28, 2017 9:54 PM
>> To: Hunter, Adrian <adrian.hunter@...el.com>
>> Cc: Andi Kleen <ak@...ux.intel.com>; linux-kernel@...r.kernel.org
>> Subject: Re: [PATCH V2 25/37] perf script: Add synthesized Intel PT power and ptwrite events
>>
>> Em Wed, Jun 28, 2017 at 08:40:25PM +0300, Adrian Hunter escreveu:
>>> On 06/28/2017 04:04 PM, Arnaldo Carvalho de Melo wrote:
>>>> Em Fri, May 26, 2017 at 11:17:26AM +0300, Adrian Hunter escreveu:
>>>>> Add definitions for synthesized Intel PT events for power and ptwrite.
>>>>  
>>>>> +++ b/tools/perf/util/event.h
>>>>> +/*
>>>>> + * Raw data formats for synthesized events. Note that raw data 
>>>>> +plus the raw data
>>>>> + * size (4 bytes) must align to 8-bytes.
>>>>> + */
>>>>> +
>>>>> +struct perf_synth_intel_ptwrite {
>>>>> +	union {
>>>>> +		struct {
>>>>> +			u32	ip		:  1,
>>>>> +				reserved	: 31;
>>>>> +		};
>>>>> +		u32	flags;
>>>>> +	};
>>>>> +	u64	payload;
>>>>> +} __packed;
>>>>
>>>>
>>>> some versions of clang and gcc dislike this __packed here:
>>>>
>>>> In file included from builtin-script.c:5:
>>>> In file included from /git/linux/tools/perf/util/debug.h:8:
>>>> /git/linux/tools/perf/util/event.h:274:2: error: packed attribute is unnecessary for (null) [-Werror,-Wpacked]
>>>>         union {
>>>>         ^
>>>> /git/linux/tools/perf/util/event.h:285:6: error: packed attribute is unnecessary for 'reserved' [-Werror,-Wpacked]
>>>>         u32 reserved;
>>>>             ^
>>>> /git/linux/tools/perf/util/event.h:298:6: error: packed attribute is unnecessary for 'reserved' [-Werror,-Wpacked]
>>>>         u32 reserved;
>>>>             ^
>>>> /git/linux/tools/perf/util/event.h:322:6: error: packed attribute is unnecessary for 'reserved' [-Werror,-Wpacked]
>>>>         u32 reserved;
>>>>             ^
>>>> 4 errors generated.
>>>> mv: can't rename '/tmp/build/perf/.builtin-script.o.tmp': No such 
>>>> file or directory
>>>>
>>>> /git/linux/tools/build/Makefile.build:101: recipe for target 
>>>> '/tmp/build/perf/builtin-script.o' failed
>>>>
>>>> Failing in various distros:
>>>>
>>>> [root@...et ~]# waitp 3940 ; time dm
>>>>    1 92.3684147260 alpine:3.4: FAIL
>>>>    2 95.9136365930 alpine:3.5: FAIL
>>>>    3 104.8328303770 alpine:3.6: FAIL
>>>>    4 121.6584964930 alpine:edge: FAIL
>>>>    5 37.2536373490 android-ndk:r12b-arm: Ok
>>>>    6 83.9077612370 archlinux:latest: Ok
>>>>    7 14.7094639200 centos:5: FAIL
>>>>    8 16.6371634320 centos:6: FAIL
>>>>
>>>> Investigating...
>>>
>>> Re-reading the documentation for __packed, it seems like the following 
>>> might be better:
>>
>> Humm, can you provide the URL for such docs? I always saw packed as an attribute for a struct, not for a member... For members "aligned" is what I'm used to see:
>>
>>    __attribute__ ((aligned (8)))
>>
>> In the kernel sources there are a few such cases as you suggest:
>>
>> [acme@...et linux]$ find include/ -name "*.h"| xargs grep -w __packed | grep -v } | grep -v "struct __packed" | wc -l
>> 12
>> [acme@...et linux]$
>>
>> But most are the other way, i.e. tagging the packed attribute to the whole struct, as you originally did :-\
>>
>> - Arnaldo
>>  
>>> diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 
>>> c283603f59c7..a7547cb3b760 100644
>>> --- a/tools/perf/util/event.h
>>> +++ b/tools/perf/util/event.h
>>> @@ -278,8 +278,8 @@ struct perf_synth_intel_ptwrite {
>>>  		};
>>>  		u32	flags;
>>>  	};
>>> -	u64	payload;
>>> -} __packed;
>>> +	u64	payload __packed;
>>> +};
>>>  
>>>  struct perf_synth_intel_mwait {
>>>  	u32 reserved;
>>> @@ -291,8 +291,8 @@ struct perf_synth_intel_mwait {
>>>  				reserved2	: 30;
>>>  		};
>>>  		u64	payload;
>>> -	};
>>> -} __packed;
>>> +	} __packed;
>>> +};
>>>  
>>>  struct perf_synth_intel_pwre {
>>>  	u32 reserved;
>>> @@ -305,8 +305,8 @@ struct perf_synth_intel_pwre {
>>>  				reserved2	: 48;
>>>  		};
>>>  		u64	payload;
>>> -	};
>>> -} __packed;
>>> +	} __packed;
>>> +};
>>>  
>>>  struct perf_synth_intel_exstop {
>>>  	union {
>>> @@ -328,8 +328,8 @@ struct perf_synth_intel_pwrx {
>>>  				reserved1	: 52;
>>>  		};
>>>  		u64	payload;
>>> -	};
>>> -} __packed;
>>> +	} __packed;
>>> +};
>>>  
>>>  struct perf_synth_intel_cbr {
>>>  	union {
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ