linux-kernel - Re: [PATCH V9 8/8] perf tools: handle PERF_RECORD_LOST

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20150511192225.GC5456@kernel.org>
Date:	Mon, 11 May 2015 16:22:25 -0300
From:	Arnaldo Carvalho de Melo <acme@...radead.org>
To:	Kan Liang <kan.liang@...el.com>
Cc:	peterz@...radead.org, mingo@...nel.org, eranian@...gle.com,
	andi@...stfloor.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH V9 8/8] perf tools: handle PERF_RECORD_LOST_SAMPLES

Em Sun, May 10, 2015 at 03:13:15PM -0400, Kan Liang escreveu:
> From: Kan Liang <kan.liang@...el.com>
> 
> This patch modified the perf tool to handle the new RECORD type,
> PERF_RECORD_LOST_SAMPLES.
> The number of lost-sample events is stored in
> .nr_events[PERF_RECORD_LOST_SAMPLES]. While the exact number of samples
> which the kernel dropped is stored in total_lost_samples.
> When the percentage of dropped samples is greater than 5%, a warning
> will be sent out.
> 
> Here are some examples:
> 
> Eg 1, Recording different frequently-occurring events is safe with the
>       patch. Only a very low drop rate is associated with such actions.
> 
> $ perf record -e '{cycles:p,instructions:p}' -c 20003 --no-time ~/tchain
> ~/tchain
> [perf record: Woken up 148 times to write data]
> [perf record: Captured and wrote 36.922 MB perf.data (1206322 samples)]
> 
> $ perf report -D | tail
>           SAMPLE events:     120243
>            MMAP2 events:          5
>     LOST_SAMPLES events:         24
>   FINISHED_ROUND events:         15
> cycles:p stats:
>            TOTAL events:      59348
>           SAMPLE events:      59348
> instructions:p stats:
>            TOTAL events:      60895
>           SAMPLE events:      60895

The example doesn't show which of cycles:p or instructions:p got lost,
isn't that possible? Guess not from the patch, but should, no? I.e. what
is PERF_SAMPLE_ID for then?

- Arnaldo

> 
> $ perf report --stdio --group
>  # To display the perf.data header info, please use
>  --header/--header-only options.
>  #
>  #
>  # Total Lost Samples: 24
>  #
>  # Samples: 120K of event 'anon group { cycles:p, instructions:p }'
>  # Event count (approx.): 24048600000
>  #
>  #         Overhead  Command      Shared Object     Symbol
>  # ................  ...........  ................
>  ..................................
>  #
>     99.74%  99.86%  tchain_edit  tchain_edit       [.] f3
>      0.09%   0.02%  tchain_edit  tchain_edit       [.] f2
>      0.04%   0.00%  tchain_edit  [kernel.vmlinux]  [k] ixgbe_read_reg
> 
> Eg 2, Recording the same thing multiple times can lead to high drop
>       rate, but it is not a useful configuration.
> 
> $ perf record -e '{cycles:p,cycles:p}' -c 20003 --no-time ~/tchain
> [perf record: Woken up 1 times to write data]
> Warning:
> Processed 600592 samples and lost 99.73% samples!
> [perf record: Captured and wrote 0.121 MB perf.data (1629 samples)]
> 
> Signed-off-by: Kan Liang <kan.liang@...el.com>
> ---
>  tools/perf/builtin-report.c |  1 +
>  tools/perf/util/event.c     |  9 +++++++++
>  tools/perf/util/event.h     | 17 +++++++++++++++++
>  tools/perf/util/machine.c   | 10 ++++++++++
>  tools/perf/util/machine.h   |  2 ++
>  tools/perf/util/session.c   | 19 +++++++++++++++++++
>  tools/perf/util/tool.h      |  1 +
>  7 files changed, 59 insertions(+)
> 
> diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> index 7c73ae5..485b7e9 100644
> --- a/tools/perf/builtin-report.c
> +++ b/tools/perf/builtin-report.c
> @@ -318,6 +318,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
>  {
>  	struct perf_evsel *pos;
>  
> +	fprintf(stdout, "#\n# Total Lost Samples: %lu\n#\n", evlist->stats.total_lost_samples);
>  	evlist__for_each(evlist, pos) {
>  		struct hists *hists = evsel__hists(pos);
>  		const char *evname = perf_evsel__name(pos);
> diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
> index db52609..2daadc8 100644
> --- a/tools/perf/util/event.c
> +++ b/tools/perf/util/event.c
> @@ -25,6 +25,7 @@ static const char *perf_event__names[] = {
>  	[PERF_RECORD_SAMPLE]			= "SAMPLE",
>  	[PERF_RECORD_AUX]			= "AUX",
>  	[PERF_RECORD_ITRACE_START]		= "ITRACE_START",
> +	[PERF_RECORD_LOST_SAMPLES]		= "LOST_SAMPLES",
>  	[PERF_RECORD_HEADER_ATTR]		= "ATTR",
>  	[PERF_RECORD_HEADER_EVENT_TYPE]		= "EVENT_TYPE",
>  	[PERF_RECORD_HEADER_TRACING_DATA]	= "TRACING_DATA",
> @@ -713,6 +714,14 @@ int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused,
>  	return machine__process_itrace_start_event(machine, event);
>  }
>  
> +int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused,
> +				     union perf_event *event,
> +				     struct perf_sample *sample,
> +				     struct machine *machine)
> +{
> +	return machine__process_lost_samples_event(machine, event, sample);
> +}
> +
>  size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
>  {
>  	return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n",
> diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
> index 7eecd5e..e02996a 100644
> --- a/tools/perf/util/event.h
> +++ b/tools/perf/util/event.h
> @@ -52,6 +52,11 @@ struct lost_event {
>  	u64 lost;
>  };
>  
> +struct lost_samples_event {
> +	struct perf_event_header header;
> +	u64 lost;
> +};
> +
>  /*
>   * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID
>   */
> @@ -235,6 +240,12 @@ enum auxtrace_error_type {
>   * total_lost tells exactly how many events the kernel in fact lost, i.e. it is
>   * the sum of all struct lost_event.lost fields reported.
>   *
> + * The kernel discards mixed up samples and sends the number in a
> + * PERF_RECORD_LOST_SAMPLES event. The number of lost-samples events is stored
> + * in .nr_events[PERF_RECORD_LOST_SAMPLES] while total_lost_samples tells
> + * exactly how many samples the kernel in fact dropped, i.e. it is the sum of
> + * all struct lost_samples_event.lost fields reported.
> + *
>   * The total_period is needed because by default auto-freq is used, so
>   * multipling nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get
>   * the total number of low level events, it is necessary to to sum all struct
> @@ -244,6 +255,7 @@ struct events_stats {
>  	u64 total_period;
>  	u64 total_non_filtered_period;
>  	u64 total_lost;
> +	u64 total_lost_samples;
>  	u64 total_invalid_chains;
>  	u32 nr_events[PERF_RECORD_HEADER_MAX];
>  	u32 nr_non_filtered_samples;
> @@ -342,6 +354,7 @@ union perf_event {
>  	struct comm_event		comm;
>  	struct fork_event		fork;
>  	struct lost_event		lost;
> +	struct lost_samples_event	lost_samples;
>  	struct read_event		read;
>  	struct throttle_event		throttle;
>  	struct sample_event		sample;
> @@ -390,6 +403,10 @@ int perf_event__process_lost(struct perf_tool *tool,
>  			     union perf_event *event,
>  			     struct perf_sample *sample,
>  			     struct machine *machine);
> +int perf_event__process_lost_samples(struct perf_tool *tool,
> +				     union perf_event *event,
> +				     struct perf_sample *sample,
> +				     struct machine *machine);
>  int perf_event__process_aux(struct perf_tool *tool,
>  			    union perf_event *event,
>  			    struct perf_sample *sample,
> diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
> index 2f47110..991a342 100644
> --- a/tools/perf/util/machine.c
> +++ b/tools/perf/util/machine.c
> @@ -458,6 +458,14 @@ int machine__process_lost_event(struct machine *machine __maybe_unused,
>  	return 0;
>  }
>  
> +int machine__process_lost_samples_event(struct machine *machine __maybe_unused,
> +					union perf_event *event, struct perf_sample *sample)
> +{
> +	dump_printf(": id:%" PRIu64 ": lost samples :%" PRIu64 "\n",
> +		    sample->id, event->lost_samples.lost);
> +	return 0;
> +}
> +
>  static struct dso*
>  machine__module_dso(struct machine *machine, struct kmod_path *m,
>  		    const char *filename)
> @@ -1351,6 +1359,8 @@ int machine__process_event(struct machine *machine, union perf_event *event,
>  		ret = machine__process_aux_event(machine, event); break;
>  	case PERF_RECORD_ITRACE_START:
>  		ret = machine__process_itrace_start_event(machine, event);
> +	case PERF_RECORD_LOST_SAMPLES:
> +		ret = machine__process_lost_samples_event(machine, event, sample); break;
>  		break;
>  	default:
>  		ret = -1;
> diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
> index 1d99296..7ba5e8f 100644
> --- a/tools/perf/util/machine.h
> +++ b/tools/perf/util/machine.h
> @@ -81,6 +81,8 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
>  				struct perf_sample *sample);
>  int machine__process_lost_event(struct machine *machine, union perf_event *event,
>  				struct perf_sample *sample);
> +int machine__process_lost_samples_event(struct machine *machine, union perf_event *event,
> +					struct perf_sample *sample);
>  int machine__process_aux_event(struct machine *machine,
>  			       union perf_event *event);
>  int machine__process_itrace_start_event(struct machine *machine,
> diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
> index e722107..4a5a609 100644
> --- a/tools/perf/util/session.c
> +++ b/tools/perf/util/session.c
> @@ -325,6 +325,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
>  		tool->exit = process_event_stub;
>  	if (tool->lost == NULL)
>  		tool->lost = perf_event__process_lost;
> +	if (tool->lost_samples == NULL)
> +		tool->lost_samples = perf_event__process_lost_samples;
>  	if (tool->aux == NULL)
>  		tool->aux = perf_event__process_aux;
>  	if (tool->itrace_start == NULL)
> @@ -606,6 +608,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
>  	[PERF_RECORD_SAMPLE]		  = perf_event__all64_swap,
>  	[PERF_RECORD_AUX]		  = perf_event__aux_swap,
>  	[PERF_RECORD_ITRACE_START]	  = perf_event__itrace_start_swap,
> +	[PERF_RECORD_LOST_SAMPLES]	  = perf_event__all64_swap,
>  	[PERF_RECORD_HEADER_ATTR]	  = perf_event__hdr_attr_swap,
>  	[PERF_RECORD_HEADER_EVENT_TYPE]	  = perf_event__event_type_swap,
>  	[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
> @@ -1049,6 +1052,10 @@ static int machines__deliver_event(struct machines *machines,
>  		if (tool->lost == perf_event__process_lost)
>  			evlist->stats.total_lost += event->lost.lost;
>  		return tool->lost(tool, event, sample, machine);
> +	case PERF_RECORD_LOST_SAMPLES:
> +		if (tool->lost_samples == perf_event__process_lost_samples)
> +			evlist->stats.total_lost_samples += event->lost_samples.lost;
> +		return tool->lost_samples(tool, event, sample, machine);
>  	case PERF_RECORD_READ:
>  		return tool->read(tool, event, sample, evsel, machine);
>  	case PERF_RECORD_THROTTLE:
> @@ -1286,6 +1293,18 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
>  			    stats->nr_events[PERF_RECORD_LOST]);
>  	}
>  
> +	if (session->tool->lost_samples == perf_event__process_lost_samples) {
> +		double drop_rate;
> +
> +		drop_rate = (double)stats->total_lost_samples /
> +			    (double) (stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples);
> +		if (drop_rate > 0.05) {
> +			ui__warning("Processed %lu samples and lost %3.2f%% samples!\n\n",
> +				    stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples,
> +				    drop_rate * 100.0);
> +		}
> +	}
> +
>  	if (stats->nr_unknown_events != 0) {
>  		ui__warning("Found %u unknown events!\n\n"
>  			    "Is this an older tool processing a perf.data "
> diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
> index 7f282ad..c307dd4 100644
> --- a/tools/perf/util/tool.h
> +++ b/tools/perf/util/tool.h
> @@ -43,6 +43,7 @@ struct perf_tool {
>  			fork,
>  			exit,
>  			lost,
> +			lost_samples,
>  			aux,
>  			itrace_start,
>  			throttle,
> -- 
> 1.8.3.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/