lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <c29a4cdc-9419-4f1e-92bc-a75c3b10e37d@intel.com>
Date: Mon, 13 Jan 2025 10:15:16 +0200
From: Adrian Hunter <adrian.hunter@...el.com>
To: Tavian Barnes <tavianator@...ianator.com>,
 linux-perf-users@...r.kernel.org
Cc: Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
 Arnaldo Carvalho de Melo <acme@...nel.org>,
 Namhyung Kim <namhyung@...nel.org>, Mark Rutland <mark.rutland@....com>,
 Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
 Jiri Olsa <jolsa@...nel.org>, Ian Rogers <irogers@...gle.com>,
 "Liang, Kan" <kan.liang@...ux.intel.com>, Andrew Kreimer
 <algonell@...il.com>, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] perf intel-pt: don't zero the whole perf_sample

On 11/01/25 19:56, Tavian Barnes wrote:
> C designated initializers like
> 
>     struct perf_sample sample = { .ip = 0, };
> 
> set every unmentioned field of the struct to zero.  But since
> sizeof(struct perf_sample) == 1384, this takes a long time.
> 
> struct perf_sample does not need to be fully initialized, and even

Yes it does need to be fully initialized.  Leaving members
uninitialized in the hope that they never get used adds to
code complexity e.g. how do you know they never are used,
or future members never will be used.

> .ip = 0 is unnecessary because intel_pt_prep_*_sample() will initialize
> it.  Skipping the initialization saves about 2.5% of the execution time
> when running
> 
>     $ perf script --itrace=i0
> 
> Signed-off-by: Tavian Barnes <tavianator@...ianator.com>
> ---
>  tools/perf/util/intel-pt.c | 28 ++++++++++++++--------------
>  1 file changed, 14 insertions(+), 14 deletions(-)
> 
> diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
> index 30be6dfe09eb..c829398c5bb9 100644
> --- a/tools/perf/util/intel-pt.c
> +++ b/tools/perf/util/intel-pt.c
> @@ -1764,7 +1764,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
>  {
>  	struct intel_pt *pt = ptq->pt;
>  	union perf_event *event = ptq->event_buf;
> -	struct perf_sample sample = { .ip = 0, };
> +	struct perf_sample sample;
>  	struct dummy_branch_stack {
>  		u64			nr;
>  		u64			hw_idx;
> @@ -1835,7 +1835,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
>  {
>  	struct intel_pt *pt = ptq->pt;
>  	union perf_event *event = ptq->event_buf;
> -	struct perf_sample sample = { .ip = 0, };
> +	struct perf_sample sample;
>  
>  	if (intel_pt_skip_event(pt))
>  		return 0;
> @@ -1867,7 +1867,7 @@ static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq)
>  {
>  	struct intel_pt *pt = ptq->pt;
>  	union perf_event *event = ptq->event_buf;
> -	struct perf_sample sample = { .ip = 0, };
> +	struct perf_sample sample;
>  	u64 period = 0;
>  
>  	if (ptq->sample_ipc)
> @@ -1894,7 +1894,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
>  {
>  	struct intel_pt *pt = ptq->pt;
>  	union perf_event *event = ptq->event_buf;
> -	struct perf_sample sample = { .ip = 0, };
> +	struct perf_sample sample;
>  
>  	if (intel_pt_skip_event(pt))
>  		return 0;
> @@ -1927,7 +1927,7 @@ static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq)
>  {
>  	struct intel_pt *pt = ptq->pt;
>  	union perf_event *event = ptq->event_buf;
> -	struct perf_sample sample = { .ip = 0, };
> +	struct perf_sample sample;
>  	struct perf_synth_intel_ptwrite raw;
>  
>  	if (intel_pt_skip_event(pt))
> @@ -1953,7 +1953,7 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
>  {
>  	struct intel_pt *pt = ptq->pt;
>  	union perf_event *event = ptq->event_buf;
> -	struct perf_sample sample = { .ip = 0, };
> +	struct perf_sample sample;
>  	struct perf_synth_intel_cbr raw;
>  	u32 flags;
>  
> @@ -1983,7 +1983,7 @@ static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq)
>  {
>  	struct intel_pt *pt = ptq->pt;
>  	union perf_event *event = ptq->event_buf;
> -	struct perf_sample sample = { .ip = 0, };
> +	struct perf_sample sample;
>  	struct perf_synth_intel_psb raw;
>  
>  	if (intel_pt_skip_event(pt))
> @@ -2009,7 +2009,7 @@ static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
>  {
>  	struct intel_pt *pt = ptq->pt;
>  	union perf_event *event = ptq->event_buf;
> -	struct perf_sample sample = { .ip = 0, };
> +	struct perf_sample sample;
>  	struct perf_synth_intel_mwait raw;
>  
>  	if (intel_pt_skip_event(pt))
> @@ -2034,7 +2034,7 @@ static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
>  {
>  	struct intel_pt *pt = ptq->pt;
>  	union perf_event *event = ptq->event_buf;
> -	struct perf_sample sample = { .ip = 0, };
> +	struct perf_sample sample;
>  	struct perf_synth_intel_pwre raw;
>  
>  	if (intel_pt_skip_event(pt))
> @@ -2059,7 +2059,7 @@ static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
>  {
>  	struct intel_pt *pt = ptq->pt;
>  	union perf_event *event = ptq->event_buf;
> -	struct perf_sample sample = { .ip = 0, };
> +	struct perf_sample sample;
>  	struct perf_synth_intel_exstop raw;
>  
>  	if (intel_pt_skip_event(pt))
> @@ -2084,7 +2084,7 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
>  {
>  	struct intel_pt *pt = ptq->pt;
>  	union perf_event *event = ptq->event_buf;
> -	struct perf_sample sample = { .ip = 0, };
> +	struct perf_sample sample;
>  	struct perf_synth_intel_pwrx raw;
>  
>  	if (intel_pt_skip_event(pt))
> @@ -2235,7 +2235,7 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack,
>  static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id)
>  {
>  	const struct intel_pt_blk_items *items = &ptq->state->items;
> -	struct perf_sample sample = { .ip = 0, };
> +	struct perf_sample sample;
>  	union perf_event *event = ptq->event_buf;
>  	struct intel_pt *pt = ptq->pt;
>  	u64 sample_type = evsel->core.attr.sample_type;
> @@ -2407,7 +2407,7 @@ static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq)
>  {
>  	struct intel_pt *pt = ptq->pt;
>  	union perf_event *event = ptq->event_buf;
> -	struct perf_sample sample = { .ip = 0, };
> +	struct perf_sample sample;
>  	struct {
>  		struct perf_synth_intel_evt cfe;
>  		struct perf_synth_intel_evd evd[INTEL_PT_MAX_EVDS];
> @@ -2446,7 +2446,7 @@ static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq)
>  {
>  	struct intel_pt *pt = ptq->pt;
>  	union perf_event *event = ptq->event_buf;
> -	struct perf_sample sample = { .ip = 0, };
> +	struct perf_sample sample;
>  	struct perf_synth_intel_iflag_chg raw;
>  
>  	if (intel_pt_skip_event(pt))


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ