lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Zq0jlcPFijZFjyXP@google.com>
Date: Fri, 2 Aug 2024 11:21:09 -0700
From: Namhyung Kim <namhyung@...nel.org>
To: Ben Gainey <ben.gainey@....com>
Cc: peterz@...radead.org, mingo@...hat.com, acme@...nel.org,
	james.clark@....com, mark.rutland@....com,
	alexander.shishkin@...ux.intel.com, jolsa@...nel.org,
	irogers@...gle.com, adrian.hunter@...el.com,
	linux-perf-users@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v10 1/2] tools/perf: Correctly calculate sample period
 for inherited SAMPLE_READ values

On Thu, Aug 01, 2024 at 01:30:29PM +0100, Ben Gainey wrote:
> Sample period calculation in deliver_sample_value is updated to
> calculate the per-thread period delta for events that are inherit +
> PERF_SAMPLE_READ. When the sampling event has this configuration, the
> read_format.id is used with the tid from the sample to lookup the
> storage of the previously accumulated counter total before calculating
> the delta. All existing valid configurations where read_format.value
> represents some global value continue to use just the read_format.id to
> locate the storage of the previously accumulated total.
> 
> perf_sample_id is modified to support tracking per-thread
> values, along with the existing global per-id values. In the
> per-thread case, values are stored in a hash by tid within the
> perf_sample_id, and are dynamically allocated as the number is not known
> ahead of time.
> 
> Signed-off-by: Ben Gainey <ben.gainey@....com>
> ---
>  tools/lib/perf/evsel.c                  | 48 +++++++++++++++++++
>  tools/lib/perf/include/internal/evsel.h | 63 ++++++++++++++++++++++++-
>  tools/perf/util/session.c               | 25 ++++++----
>  3 files changed, 126 insertions(+), 10 deletions(-)
> 
> diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c
> index c07160953224..abdae2f9498b 100644
> --- a/tools/lib/perf/evsel.c
> +++ b/tools/lib/perf/evsel.c
> @@ -5,6 +5,7 @@
>  #include <perf/evsel.h>
>  #include <perf/cpumap.h>
>  #include <perf/threadmap.h>
> +#include <linux/hash.h>
>  #include <linux/list.h>
>  #include <internal/evsel.h>
>  #include <linux/zalloc.h>
> @@ -23,6 +24,7 @@ void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr,
>  		      int idx)
>  {
>  	INIT_LIST_HEAD(&evsel->node);
> +	INIT_LIST_HEAD(&evsel->per_stream_periods);
>  	evsel->attr = *attr;
>  	evsel->idx  = idx;
>  	evsel->leader = evsel;
> @@ -531,10 +533,56 @@ int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
>  
>  void perf_evsel__free_id(struct perf_evsel *evsel)
>  {
> +	struct perf_sample_id_period *pos, *n;
> +
>  	xyarray__delete(evsel->sample_id);
>  	evsel->sample_id = NULL;
>  	zfree(&evsel->id);
>  	evsel->ids = 0;
> +
> +	perf_evsel_for_each_per_thread_period_safe(evsel, n, pos) {
> +		list_del_init(&pos->node);
> +		free(pos);
> +	}
> +}
> +
> +bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evsel)
> +{
> +	return (evsel->attr.sample_type & PERF_SAMPLE_READ)
> +		&& (evsel->attr.sample_type & PERF_SAMPLE_TID)
> +		&& evsel->attr.inherit;

Nitpick: I believe the coding style wants to put the operators
at the end of the line like

	return (evsel->attr.sample_type & PERF_SAMPLE_READ) &&
		(evsel->attr.sample_type & PERF_SAMPLE_TID) &&
		evsel->attr.inherit;

Thanks,
Namhyung

> +}
> +
> +u64 *perf_sample_id__get_period_storage(struct perf_sample_id *sid, u32 tid, bool per_thread)
> +{
> +	struct hlist_head *head;
> +	struct perf_sample_id_period *res;
> +	int hash;
> +
> +	if (!per_thread)
> +		return &sid->period;
> +
> +	hash = hash_32(tid, PERF_SAMPLE_ID__HLIST_BITS);
> +	head = &sid->periods[hash];
> +
> +	hlist_for_each_entry(res, head, hnode)
> +		if (res->tid == tid)
> +			return &res->period;
> +
> +	if (sid->evsel == NULL)
> +		return NULL;
> +
> +	res = zalloc(sizeof(struct perf_sample_id_period));
> +	if (res == NULL)
> +		return NULL;
> +
> +	INIT_LIST_HEAD(&res->node);
> +	res->tid = tid;
> +
> +	list_add_tail(&res->node, &sid->evsel->per_stream_periods);
> +	hlist_add_head(&res->hnode, &sid->periods[hash]);
> +
> +	return &res->period;
>  }
>  
>  void perf_counts_values__scale(struct perf_counts_values *count,
> diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h
> index 5cd220a61962..ea78defa77d0 100644
> --- a/tools/lib/perf/include/internal/evsel.h
> +++ b/tools/lib/perf/include/internal/evsel.h
> @@ -11,6 +11,32 @@
>  struct perf_thread_map;
>  struct xyarray;
>  
> +/**
> + * The per-thread accumulated period storage node.
> + */
> +struct perf_sample_id_period {
> +	struct list_head	node;
> +	struct hlist_node	hnode;
> +	/* Holds total ID period value for PERF_SAMPLE_READ processing. */
> +	u64			period;
> +	/* The TID that the values belongs to */
> +	u32			tid;
> +};
> +
> +/**
> + * perf_evsel_for_each_per_thread_period_safe - safely iterate thru all the
> + * per_stream_periods
> + * @evlist:perf_evsel instance to iterate
> + * @item: struct perf_sample_id_period iterator
> + * @tmp: struct perf_sample_id_period temp iterator
> + */
> +#define perf_evsel_for_each_per_thread_period_safe(evsel, tmp, item) \
> +	list_for_each_entry_safe(item, tmp, &(evsel)->per_stream_periods, node)
> +
> +
> +#define PERF_SAMPLE_ID__HLIST_BITS 4
> +#define PERF_SAMPLE_ID__HLIST_SIZE (1 << PERF_SAMPLE_ID__HLIST_BITS)
> +
>  /*
>   * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are
>   * more than one entry in the evlist.
> @@ -34,8 +60,32 @@ struct perf_sample_id {
>  	pid_t			 machine_pid;
>  	struct perf_cpu		 vcpu;
>  
> -	/* Holds total ID period value for PERF_SAMPLE_READ processing. */
> -	u64			 period;
> +	/*
> +	 * Per-thread, and global event counts are mutually exclusive:
> +	 * Whilst it is possible to combine events into a group with differing
> +	 * values of PERF_SAMPLE_READ, it is not valid to have inconsistent
> +	 * values for `inherit`. Therefore it is not possible to have a
> +	 * situation where a per-thread event is sampled as a global event;
> +	 * all !inherit groups are global, and all groups where the sampling
> +	 * event is inherit + PERF_SAMPLE_READ will be per-thread. Any event
> +	 * that is part of such a group that is inherit but not PERF_SAMPLE_READ
> +	 * will be read as per-thread. If such an event can also trigger a
> +	 * sample (such as with sample_period > 0) then it will not cause
> +	 * `read_format` to be included in its PERF_RECORD_SAMPLE, and
> +	 * therefore will not expose the per-thread group members as global.
> +	 */
> +	union {
> +		/*
> +		 * Holds total ID period value for PERF_SAMPLE_READ processing
> +		 * (when period is not per-thread).
> +		 */
> +		u64			period;
> +		/*
> +		 * Holds total ID period value for PERF_SAMPLE_READ processing
> +		 * (when period is per-thread).
> +		 */
> +		struct hlist_head	periods[PERF_SAMPLE_ID__HLIST_SIZE];
> +	};
>  };
>  
>  struct perf_evsel {
> @@ -58,6 +108,10 @@ struct perf_evsel {
>  	u32			 ids;
>  	struct perf_evsel	*leader;
>  
> +	/* For events where the read_format value is per-thread rather than
> +	 * global, stores the per-thread cumulative period */
> +	struct list_head	per_stream_periods;
> +
>  	/* parse modifier helper */
>  	int			 nr_members;
>  	/*
> @@ -88,4 +142,9 @@ int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter);
>  int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
>  void perf_evsel__free_id(struct perf_evsel *evsel);
>  
> +bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evsel);
> +
> +u64 *perf_sample_id__get_period_storage(struct perf_sample_id *sid, u32 tid,
> +					bool per_thread);
> +
>  #endif /* __LIBPERF_INTERNAL_EVSEL_H */
> diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
> index 5596bed1b8c8..fac0557ff6ea 100644
> --- a/tools/perf/util/session.c
> +++ b/tools/perf/util/session.c
> @@ -1474,18 +1474,24 @@ static int deliver_sample_value(struct evlist *evlist,
>  				union perf_event *event,
>  				struct perf_sample *sample,
>  				struct sample_read_value *v,
> -				struct machine *machine)
> +				struct machine *machine,
> +				bool per_thread)
>  {
>  	struct perf_sample_id *sid = evlist__id2sid(evlist, v->id);
>  	struct evsel *evsel;
> +	u64 *storage = NULL;
>  
>  	if (sid) {
> +		storage = perf_sample_id__get_period_storage(sid, sample->tid, per_thread);
> +	}
> +
> +	if (storage) {
>  		sample->id     = v->id;
> -		sample->period = v->value - sid->period;
> -		sid->period    = v->value;
> +		sample->period = v->value - *storage;
> +		*storage       = v->value;
>  	}
>  
> -	if (!sid || sid->evsel == NULL) {
> +	if (!storage || sid->evsel == NULL) {
>  		++evlist->stats.nr_unknown_id;
>  		return 0;
>  	}
> @@ -1506,14 +1512,15 @@ static int deliver_sample_group(struct evlist *evlist,
>  				union  perf_event *event,
>  				struct perf_sample *sample,
>  				struct machine *machine,
> -				u64 read_format)
> +				u64 read_format,
> +				bool per_thread)
>  {
>  	int ret = -EINVAL;
>  	struct sample_read_value *v = sample->read.group.values;
>  
>  	sample_read_group__for_each(v, sample->read.group.nr, read_format) {
>  		ret = deliver_sample_value(evlist, tool, event, sample, v,
> -					   machine);
> +					   machine, per_thread);
>  		if (ret)
>  			break;
>  	}
> @@ -1528,6 +1535,7 @@ static int evlist__deliver_sample(struct evlist *evlist, struct perf_tool *tool,
>  	/* We know evsel != NULL. */
>  	u64 sample_type = evsel->core.attr.sample_type;
>  	u64 read_format = evsel->core.attr.read_format;
> +	bool per_thread = perf_evsel__attr_has_per_thread_sample_period(&evsel->core);
>  
>  	/* Standard sample delivery. */
>  	if (!(sample_type & PERF_SAMPLE_READ))
> @@ -1536,10 +1544,11 @@ static int evlist__deliver_sample(struct evlist *evlist, struct perf_tool *tool,
>  	/* For PERF_SAMPLE_READ we have either single or group mode. */
>  	if (read_format & PERF_FORMAT_GROUP)
>  		return deliver_sample_group(evlist, tool, event, sample,
> -					    machine, read_format);
> +					    machine, read_format, per_thread);
>  	else
>  		return deliver_sample_value(evlist, tool, event, sample,
> -					    &sample->read.one, machine);
> +					    &sample->read.one, machine,
> +					    per_thread);
>  }
>  
>  static int machines__deliver_event(struct machines *machines,
> -- 
> 2.45.2
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ