linux-kernel - Re: [PATCH v3 2/2] perf/sdt: Directly record SDT events with 'perf record'

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20170228144552.d4f5c707169d7526acf8a37f@kernel.org>
Date:   Tue, 28 Feb 2017 14:45:52 +0900
From:   Masami Hiramatsu <mhiramat@...nel.org>
To:     Ravi Bangoria <ravi.bangoria@...ux.vnet.ibm.com>
Cc:     mingo@...hat.com, acme@...nel.org, masami.hiramatsu.pt@...achi.com,
        brendan.d.gregg@...il.com, peterz@...radead.org,
        alexander.shishkin@...ux.intel.com, wangnan0@...wei.com,
        jolsa@...nel.org, ak@...ux.intel.com, treeze.taeung@...il.com,
        mathieu.poirier@...aro.org, hekuang@...wei.com,
        sukadev@...ux.vnet.ibm.com, ananth@...ibm.com,
        naveen.n.rao@...ux.vnet.ibm.com, adrian.hunter@...el.com,
        linux-kernel@...r.kernel.org, hemant@...ux.vnet.ibm.com
Subject: Re: [PATCH v3 2/2] perf/sdt: Directly record SDT events with 'perf
 record'

On Fri, 24 Feb 2017 13:13:25 +0530
Ravi Bangoria <ravi.bangoria@...ux.vnet.ibm.com> wrote:

> From: Hemant Kumar <hemant@...ux.vnet.ibm.com>
> 
> Add support for directly recording SDT events which are present in
> the probe cache. Without this patch, we could probe into SDT events
> using 'perf probe' and 'perf record'. With this patch, we can probe
> the SDT events directly using 'perf record'.
> 
> For example :
> 
>   $ perf list sdt
>     sdt_libpthread:mutex_entry                         [SDT event]
>     sdt_libc:setjmp                                    [SDT event]
>     ...
> 
>   $ perf record -a -e sdt_libc:setjmp
>     ^C[ perf record: Woken up 1 times to write data ]
>     [ perf record: Captured and wrote 0.286 MB perf.data (1065 samples) ]
> 
>   $ perf script
>          bash   803 [002]  6492.190311: sdt_libc:setjmp: (7f1d503b56a1)
>         login   488 [001]  6496.791583: sdt_libc:setjmp: (7ff3013d56a1)
>       fprintd 11038 [003]  6496.808032: sdt_libc:setjmp: (7fdedf5936a1)
> 
> Recording on SDT events with same provider and marker names is also
> supported:
> 
>   $ readelf -n /usr/lib64/libpthread-2.24.so | grep -A2 Provider
>       Provider: libpthread
>       Name: mutex_entry
>       Location: 0x0000000000009ddb, Base: 0x00000000000139cc, ...
>     --
>       Provider: libpthread
>       Name: mutex_entry
>       Location: 0x000000000000bcbb, Base: 0x00000000000139cc, ...
> 
>   $ perf record -a -e sdt_libpthread:mutex_entry
>     Info: Recording on 2 occurrences of sdt_libpthread:mutex_entry
>     ^C[ perf record: Woken up 1 times to write data ]
>     [ perf record: Captured and wrote 0.197 MB perf.data (31 samples) ]
> 
>   $ perf script
>       in:imjournal 442 [000]  6625.701833:   sdt_libpthread:mutex_entry: (7fb1a1940ddb)
>      rs:main Q:Reg 443 [001]  6625.701889: sdt_libpthread:mutex_entry_1: (7fb1a1942cbb)
> 
> 
> After invoking 'perf record', behind the scenes, it checks whether
> the event specified is an SDT event by using the flag '%' or string
> 'sdt_'. After that, it first checks whether event already exists
> with that *name* in uprobe_events file. If found, it records that
> particular event. Otherwise, it does a lookup of the probe cache to
> find out the SDT event. If its not present, it throws an error.
> If found, it again tries to find existing events from uprobe_events
> file, but this time it uses *address* and *filename* for comparison.
> Finally it writes new events into the uprobe_events file and starts
> recording. It also maintains a list of the event names that were
> written to uprobe_events file for this session. After finishing the
> record session, it removes the events from the uprobe_events file
> using the maintained name list.

OK, the behavior looks good. However, the implementation seems
too complex, could you make it simpler?
I have some comments on the code, see below.

[...]
> diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
> index 1fcebc3..d255a6d 100644
> --- a/tools/perf/builtin-probe.c
> +++ b/tools/perf/builtin-probe.c
> @@ -45,6 +45,7 @@
>  #define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*"
>  #define DEFAULT_FUNC_FILTER "!_*"
>  #define DEFAULT_LIST_FILTER "*"
> +#define MAX_EVENT_LENGTH 512
>  
>  /* Session management structure */
>  static struct {
> @@ -312,6 +313,19 @@ static void pr_err_with_code(const char *msg, int err)
>  	pr_err("\n");
>  }
>  
> +/* Show how to use the event. */
> +static void record_syntax_hint(const char *group, const char *event)
> +{
> +	char ge[MAX_EVENT_LENGTH];
> +
> +	pr_info("\nYou can now use it in all perf tools, such as:\n\n");
> +	pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", group, event);
> +
> +	snprintf(ge, strlen(group) + strlen(event) + 2, "%s:%s", group, event);
> +	if (is_sdt_event(ge))
> +		pr_info("Hint: SDT event can be directly recorded with 'perf record'. No need to create probe manually.\n");
> +}

Here, we don't need this message because anyway perf_add_probe_events already
created this probe...

> +
>  static int perf_add_probe_events(struct perf_probe_event *pevs, int npevs)
>  {
>  	int ret;
> @@ -356,11 +370,8 @@ static int perf_add_probe_events(struct perf_probe_event *pevs, int npevs)
>  	}
>  
>  	/* Note that it is possible to skip all events because of blacklist */
> -	if (event) {
> -		/* Show how to use the event. */
> -		pr_info("\nYou can now use it in all perf tools, such as:\n\n");
> -		pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", group, event);
> -	}
> +	if (event)
> +		record_syntax_hint(group, event);
>  
>  out_cleanup:
>  	cleanup_perf_probe_events(pevs, npevs);
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index bc84a37..fa9b345 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -39,6 +39,7 @@
>  #include "util/trigger.h"
>  #include "util/perf-hooks.h"
>  #include "asm/bug.h"
> +#include "util/probe-file.h"
>  
>  #include <unistd.h>
>  #include <sched.h>
> @@ -73,6 +74,7 @@ struct record {
>  	bool			timestamp_filename;
>  	struct switch_output	switch_output;
>  	unsigned long long	samples;
> +	struct list_head	sdt_event_list;
>  };
>  
>  static volatile int auxtrace_record__snapshot_started;
> @@ -1503,6 +1505,25 @@ static struct record record = {
>  	},
>  };
>  
> +void sdt_event_list__add(struct list_head *sdt_event_list)
> +{
> +	if (list_empty(sdt_event_list))
> +		return;
> +	list_splice(sdt_event_list, &record.sdt_event_list);
> +}
> +
> +bool is_cmd_record(void)
> +{
> +	return (record.evlist != NULL);
> +}
> +
> +void sdt_event_list__remove(void)
> +{
> +#ifdef HAVE_LIBELF_SUPPORT
> +	remove_sdt_event_list(&record.sdt_event_list);
> +#endif
> +}
> +
>  const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
>  	"\n\t\t\t\tDefault: fp";
>  
> @@ -1671,6 +1692,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
>  	if (rec->evlist == NULL)
>  		return -ENOMEM;
>  
> +	INIT_LIST_HEAD(&rec->sdt_event_list);
>  	err = perf_config(perf_record_config, rec);
>  	if (err)
>  		return err;
> @@ -1841,6 +1863,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
>  	perf_evlist__delete(rec->evlist);
>  	symbol__exit();
>  	auxtrace_record__free(rec->itr);
> +	sdt_event_list__remove();
>  	return err;
>  }
>  
> diff --git a/tools/perf/perf.h b/tools/perf/perf.h
> index 1c27d94..4ad12bb 100644
> --- a/tools/perf/perf.h
> +++ b/tools/perf/perf.h
> @@ -76,4 +76,6 @@ struct record_opts {
>  struct option;
>  extern const char * const *record_usage;
>  extern struct option *record_options;
> +
> +bool is_cmd_record(void);
>  #endif
> diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
> index 67a8aeb..7d1f042 100644
> --- a/tools/perf/util/parse-events.c
> +++ b/tools/perf/util/parse-events.c
> @@ -1726,12 +1726,66 @@ static void parse_events_print_error(struct parse_events_error *err,
>  
>  #undef MAX_WIDTH
>  
> +/* SDT event needs LIBELF support for creating a probe point */
> +#ifdef HAVE_LIBELF_SUPPORT
> +static int parse_sdt_event(struct perf_evlist *evlist, const char *str,
> +			   struct parse_events_error *err)
> +{
> +	char *ptr = NULL;
> +	int ret;
> +	struct list_head *sdt_evlist;
> +	struct sdt_event_list *event;
> +
> +	if (str[0] == '%')
> +		str++;
> +
> +	ptr = strdup(str);
> +	if (ptr == NULL)
> +		return -ENOMEM;
> +
> +	sdt_evlist = zalloc(sizeof(*sdt_evlist));
> +	if (!sdt_evlist) {
> +		free(ptr);
> +		pr_err("Error in sdt_evlist memory allocation\n");

Could you make this message pr_debug() and/or use the 'err' ?

> +		return -ENOMEM;
> +	}
> +	INIT_LIST_HEAD(sdt_evlist);
> +
> +	/*
> +	 * If there is an error in this call, no need to free
> +	 * up sdt_evlist, its already free'ed up in the previous
> +	 * call. Free up 'ptr' though.
> +	 */
> +	ret = add_sdt_event(ptr, sdt_evlist, err);
> +	if (!ret) {
> +		list_for_each_entry(event, sdt_evlist, list) {
> +			ret = parse_events(evlist, event->event_info, err);
> +			if (ret < 0)
> +				goto ret;
> +		}
> +		/* Add it to the record struct */
> +		sdt_event_list__add(sdt_evlist);
> +	}
> +
> +ret:
> +	free(ptr);
> +	return ret;
> +}
> +#endif /* HAVE_LIBELF_SUPPORT */
> +
>  int parse_events_option(const struct option *opt, const char *str,
>  			int unset __maybe_unused)
>  {
>  	struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
>  	struct parse_events_error err = { .idx = 0, };
> -	int ret = parse_events(evlist, str, &err);
> +	int ret = 0;
> +
> +#ifdef HAVE_LIBELF_SUPPORT
> +	if (is_sdt_event((char *)str) && is_cmd_record())
> +		ret = parse_sdt_event(evlist, str, &err);
> +	else
> +#endif
> +		ret = parse_events(evlist, str, &err);
>  
>  	if (ret)
>  		parse_events_print_error(&err, str);
> diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
> index 1af6a26..205f682 100644
> --- a/tools/perf/util/parse-events.h
> +++ b/tools/perf/util/parse-events.h
> @@ -196,4 +196,6 @@ int is_valid_tracepoint(const char *event_string);
>  int valid_event_mount(const char *eventfs);
>  char *parse_events_formats_error_string(char *additional_terms);
>  
> +void sdt_event_list__add(struct list_head *sdt_event_list);
> +void sdt_event_list__remove(void);
>  #endif /* __PERF_PARSE_EVENTS_H */
> diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
> index 2b1409f..a19b00d 100644
> --- a/tools/perf/util/probe-event.c
> +++ b/tools/perf/util/probe-event.c
> @@ -1293,7 +1293,7 @@ int parse_line_range_desc(const char *arg, struct line_range *lr)
>  	return err;
>  }
>  
> -static int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev)
> +int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev)
>  {
>  	char *ptr;
>  
> @@ -3166,6 +3166,12 @@ static int find_cached_events_all(struct perf_probe_event *pev,
>  	return ret;
>  }
>  
> +int find_sdt_events_from_cache(struct perf_probe_event *pev,
> +			       struct probe_trace_event **tevs)
> +{
> +	return find_cached_events_all(pev, tevs);
> +}

Would we really need this wrapper? (or just rename find_sdt_events_all and expose it)

> +
>  static int find_probe_trace_events_from_cache(struct perf_probe_event *pev,
>  					      struct probe_trace_event **tevs)
>  {
> @@ -3476,3 +3482,39 @@ int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
>  		tvar->name = NULL;
>  	return 0;
>  }
> +
> +/*
> + * Record session for SDT events has ended. Delete the SDT events
> + * from uprobe_events file that were created initially.
> + */
> +void remove_sdt_event_list(struct list_head *sdt_events)
> +{
> +	struct sdt_event_list *event;
> +	struct strfilter *filter = NULL;
> +	const char *err = NULL;
> +	int ret = 0;
> +
> +	if (list_empty(sdt_events))
> +		return;
> +
> +	list_for_each_entry(event, sdt_events, list) {
> +		if (event->existing_event)
> +			continue;
> +		if (!filter) {
> +			filter = strfilter__new(event->event_info, &err);
> +			if (!filter)
> +				goto free_list;
> +		} else {
> +			ret = strfilter__or(filter, event->event_info, &err);
> +		}
> +	}
> +
> +	if (filter)
> +		ret = del_perf_probe_events(filter);
> +
> +	if (ret)
> +		pr_err("Error in deleting the SDT list\n");

I think we don't have to care about errors while removing SDT events,
since we have no exclusive lock on that, someone can use it in parallel.
We can just ignore it.

> +
> +free_list:
> +	free_sdt_list(sdt_events);
> +}
> diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
> index 5d4e940..5ec648e 100644
> --- a/tools/perf/util/probe-event.h
> +++ b/tools/perf/util/probe-event.h
> @@ -182,4 +182,8 @@ struct map *get_target_map(const char *target, bool user);
>  void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
>  					   int ntevs);
>  
> +int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev);
> +
> +int find_sdt_events_from_cache(struct perf_probe_event *pev,
> +			       struct probe_trace_event **tevs);
>  #endif /*_PROBE_EVENT_H */
> diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
> index 436b647..c941f38 100644
> --- a/tools/perf/util/probe-file.c
> +++ b/tools/perf/util/probe-file.c
> @@ -27,8 +27,10 @@
>  #include "probe-event.h"
>  #include "probe-file.h"
>  #include "session.h"
> +#include "probe-finder.h"
>  
>  #define MAX_CMDLEN 256
> +#define MAX_EVENT_LENGTH 512
>  
>  static void print_open_warning(int err, bool uprobe)
>  {
> @@ -933,3 +935,377 @@ bool probe_type_is_available(enum probe_type type)
>  
>  	return ret;
>  }
> +
> +void free_sdt_list(struct list_head *sdt_events)
> +{
> +	struct sdt_event_list *tmp, *ptr;
> +
> +	if (list_empty(sdt_events))
> +		return;
> +	list_for_each_entry_safe(tmp, ptr, sdt_events, list) {
> +		list_del(&tmp->list);
> +		free(tmp->event_info);
> +		free(tmp);
> +	}
> +}
> +
> +static int alloc_exst_sdt_event(struct exst_sdt_event_list **esl)
> +{
> +	struct exst_sdt_event_list *tmp;
> +
> +	tmp = zalloc(sizeof(*tmp));
> +	if (!tmp)
> +		return -ENOMEM;
> +
> +	tmp->tev = zalloc(sizeof(struct probe_trace_event));
> +	if (!tmp->tev) {
> +		free(tmp);
> +		return -ENOMEM;
> +	}
> +
> +	tmp->match = false;
> +	*esl = tmp;
> +	return 0;
> +}
> +
> +static void free_exst_sdt_event(struct exst_sdt_event_list *esl)
> +{
> +	if (!esl)
> +		return;
> +
> +	if (esl->tev) {
> +		free(esl->tev->args);
> +		free(esl->tev);
> +	}
> +
> +	free(esl);
> +}
> +
> +static void probe_file__free_exst_sdt_list(struct exst_sdt_event_list *esl)
> +{
> +	struct list_head *pos, *q;
> +	struct exst_sdt_event_list *tmp;
> +
> +	list_for_each_safe(pos, q, &(esl->list)) {
> +		tmp = list_entry(pos, struct exst_sdt_event_list, list);
> +		free_exst_sdt_event(tmp);
> +		list_del(pos);
> +	}
> +}
> +
> +/*
> + * Look into uprobe_events file and prepare list of sdt events
> + * whose probepoint is already registered.
> + */
> +static int probe_file__get_exst_sdt_list(struct exst_sdt_event_list *esl)
> +{
> +	int fd, ret = 0;
> +	struct strlist *rawlist;
> +	struct str_node *ent;
> +	struct exst_sdt_event_list *tmp = NULL;
> +
> +	fd = probe_file__open(PF_FL_RW | PF_FL_UPROBE);
> +	if (fd < 0)
> +		return fd;
> +
> +	rawlist = probe_file__get_rawlist(fd);
> +
> +	strlist__for_each_entry(ent, rawlist) {
> +		ret = alloc_exst_sdt_event(&tmp);
> +		if (ret < 0)
> +			goto error;
> +
> +		ret = parse_probe_trace_command(ent->s, tmp->tev);
> +		if (ret < 0) {
> +			free_exst_sdt_event(tmp);
> +			goto error;
> +		}
> +
> +		if (!strncmp(tmp->tev->group, "sdt_", 4))
> +			list_add_tail(&(tmp->list), &(esl->list));
> +		else
> +			free_exst_sdt_event(tmp);
> +	}
> +	return 0;
> +
> +error:
> +	probe_file__free_exst_sdt_list(esl);
> +	return ret;
> +}
> +
> +/*
> + * Remove ith tev from pev->tevs list and shift remaining
> + * tevs(i+1 to pev->ntevs) one step.
> + */
> +static void shift_pev(struct perf_probe_event *pev, int i)
> +{
> +	int j;
> +
> +	free(pev->tevs[i].event);
> +	free(pev->tevs[i].group);
> +	free(pev->tevs[i].args);
> +	free(pev->tevs[i].point.realname);
> +	free(pev->tevs[i].point.symbol);
> +	free(pev->tevs[i].point.module);
> +
> +	/*
> +	 * If ith element is last element, no need to shift,
> +	 * just decrement pev->ntevs.
> +	 */
> +	if (i == pev->ntevs - 1)
> +		goto ret;
> +
> +	for (j = i; j < pev->ntevs - 1; j++) {
> +		pev->tevs[j].event          = pev->tevs[j + 1].event;
> +		pev->tevs[j].group          = pev->tevs[j + 1].group;
> +		pev->tevs[j].nargs          = pev->tevs[j + 1].nargs;
> +		pev->tevs[j].uprobes        = pev->tevs[j + 1].uprobes;
> +		pev->tevs[j].args           = pev->tevs[j + 1].args;
> +		pev->tevs[j].point.realname = pev->tevs[j + 1].point.realname;
> +		pev->tevs[j].point.symbol   = pev->tevs[j + 1].point.symbol;
> +		pev->tevs[j].point.module   = pev->tevs[j + 1].point.module;
> +		pev->tevs[j].point.offset   = pev->tevs[j + 1].point.offset;
> +		pev->tevs[j].point.address  = pev->tevs[j + 1].point.address;
> +		pev->tevs[j].point.retprobe = pev->tevs[j + 1].point.retprobe;
> +	}
> +
> +ret:
> +	pev->ntevs--;
> +}

This code smells no good...
Why don't you make a list of newly added probes ?

> +
> +/* Compare address and filename */
> +static bool is_sdt_match(struct probe_trace_event *tev1,
> +			 struct probe_trace_event *tev2)
> +{
> +	return (tev1->point.address == tev2->point.address &&
> +		!(strcmp(tev1->point.module, tev2->point.module)));
> +}
> +
> +/*
> + * Filter out all those pev->tevs which already exists in uprobe_events.
> + * Return 'true' if any matching entry found otherwise return 'false'.
> + */
> +static bool filter_exst_sdt_events_from_pev(struct perf_probe_event *pev,
> +					    struct exst_sdt_event_list *esl)
> +{
> +	int i;
> +	bool ret = false;
> +	struct exst_sdt_event_list *tmp;
> +
> +	list_for_each_entry(tmp, &(esl->list), list) {
> +		for (i = 0; i < pev->ntevs; i++) {
> +			if (is_sdt_match(&(pev->tevs[i]), tmp->tev)) {
> +				shift_pev(pev, i);
> +				tmp->match = true;
> +				ret = true;
> +			}
> +		}
> +	}
> +
> +	return ret;
> +}
> +
> +static int list_add_sdt_event(struct list_head *sdt_events,
> +			      bool existing_event,
> +			      struct probe_trace_event *tev)
> +{
> +	struct sdt_event_list *tmp;
> +
> +	tmp = zalloc(sizeof(*tmp));
> +	if (!tmp)
> +		return -ENOMEM;
> +
> +	tmp->existing_event = existing_event;
> +
> +	INIT_LIST_HEAD(&tmp->list);
> +	tmp->event_info = zalloc(MAX_EVENT_LENGTH * sizeof(char));
> +	if (!tmp->event_info) {
> +		free(tmp);
> +		return -ENOMEM;
> +	}
> +
> +	snprintf(tmp->event_info, strlen(tev->group) + strlen(tev->event) + 2,
> +		 "%s:%s", tev->group, tev->event);
> +
> +	list_add(&tmp->list, sdt_events);
> +
> +	return 0;
> +}
> +
> +static void print_exst_sdt_events(struct exst_sdt_event_list *tmp)
> +{
> +	static bool msg_head;
> +
> +	if (!msg_head) {
> +		pr_info("Matching event(s) from uprobe_events:\n");
> +		msg_head = true;
> +	}
> +
> +	pr_info("   %s:%s  0x%" PRIx64 "@%s\n", tmp->tev->group,
> +		tmp->tev->event, tmp->tev->point.address,
> +		tmp->tev->point.module);
> +}
> +
> +static void print_exst_sdt_event_footer(void)
> +{
> +	pr_info("Use 'perf probe -d <event>' to delete event(s).\n\n");
> +}
> +
> +/*
> + * If there is entry with the same name in uprobe_events, record it.
> + * Return value  0: no error, not found
> + *              <0: error
> + *              >0: found
> + */
> +static int probe_file__add_exst_sdt_event(struct exst_sdt_event_list *esl,
> +					  struct list_head *sdt_events,
> +					  struct perf_probe_event *pev)
> +{
> +	struct exst_sdt_event_list *tmp;
> +	int ret = 0;
> +
> +	list_for_each_entry(tmp, &(esl->list), list) {
> +		if (strcmp(tmp->tev->group, pev->group) ||
> +		    strcmp(tmp->tev->event, pev->event))
> +			continue;
> +
> +		tmp->match = true;
> +
> +		ret = list_add_sdt_event(sdt_events, true, tmp->tev);
> +		if (ret < 0)
> +			return ret;
> +
> +		print_exst_sdt_events(tmp);
> +		print_exst_sdt_event_footer();
> +		if (pev->ntevs > 1)
> +			pr_warning("Warning: Found %d events from probe-cache with name '%s:%s'.\n"
> +				"\t Since probe point already exists with this name, recording only 1 event.\n"
> +				"Hint: Please use 'perf probe -d %s:%s*' to allow record on all events.\n\n",
> +				pev->ntevs, pev->group, pev->event, pev->group, pev->event);
> +		return 1;
> +	}
> +	return 0;
> +}
> +
> +int add_sdt_event(char *event, struct list_head *sdt_events,
> +		  struct parse_events_error *err)
> +{
> +	struct perf_probe_event *pev;
> +	int ret = 0, i, ctr = 0, found = 0, exst_ctr = 0;
> +	char *str;
> +	struct exst_sdt_event_list *tmp;
> +	struct exst_sdt_event_list esl;
> +	bool filter;
> +
> +	pev = zalloc(sizeof(*pev));
> +	if (!pev)
> +		return -ENOMEM;
> +
> +	pev->sdt = true;
> +	pev->uprobes = true;
> +
> +	str = strdup(event);
> +	if (!str)
> +		return -ENOMEM;
> +
> +	/*
> +	 * Parse event to find the group name and event name of
> +	 * the sdt event.
> +	 */
> +	ret = parse_perf_probe_event_name(&event, pev);
> +	if (ret) {
> +		pr_err("Error in parsing sdt event %s\n", str);
> +		free(pev);
> +		goto free_str;
> +	}
> +
> +	probe_conf.max_probes = MAX_PROBES;
> +	probe_conf.force_add = 1;
> +
> +	/*
> +	 * Find the sdt event from the cache. We deliberately check failure
> +	 * of this function after checking entries in uprobe_events. Because,
> +	 * we may find matching entry from uprobe_events and in that case we
> +	 * should continue recording that event.
> +	 */
> +	pev->ntevs = find_sdt_events_from_cache(pev, &pev->tevs);
> +
> +	/* Prepare list of existing sdt events from uprobe_events */
> +	INIT_LIST_HEAD(&esl.list);
> +	ret = probe_file__get_exst_sdt_list(&esl);
> +	if (ret < 0)
> +		goto free_str;
> +
> +	/* If there is entry with the same name in uprobe_events, record it. */
> +	found = probe_file__add_exst_sdt_event(&esl, sdt_events, pev);
> +	if (found) {
> +		ret = (found > 0) ? 0 : found;
> +		goto free_str;
> +	}

Curious, this should be done before searching cached SDT, so that
we can just skip listing up existing probes.

> +
> +	/* Reaching here means no matching entry found in uprobe_events. */
> +	filter = filter_exst_sdt_events_from_pev(pev, &esl);
> +	if (!filter && pev->ntevs == 0) {
> +		pr_err("%s:%s not found in the cache\n", pev->group,
> +			pev->event);
> +		ret = -EINVAL;
> +		goto free_str;
> +	} else if (pev->ntevs < 0) {
> +		err->str = strdup("Cache lookup failed.\n");
> +		ret = pev->ntevs;
> +		goto free_str;
> +	}

Again, why is the esl needed here? We can remove events by using it's name.

What I expected was,

0) add "generated_sdts" in record structure.
1) search existing events by using its name from tracing/events/.
(Don't need to touch uprobe_events)
2) if it does not exist, add new probe points and store its name in "generated_sdts".
3) record events
4) remove the events by using "generated_sdts" which only has the event name.

Thank you,

> +
> +	/* Create probe points for new events. */
> +	ret = apply_perf_probe_events(pev, 1);
> +	if (ret) {
> +		pr_err("Error in adding SDT event : %s\n", event);
> +		goto free_str;
> +	}
> +
> +	/* Add existing event names to "sdt_events" list */
> +	list_for_each_entry(tmp, &(esl.list), list) {
> +		if (!tmp->match)
> +			continue;
> +
> +		ret = list_add_sdt_event(sdt_events, true, tmp->tev);
> +		if (ret < 0)
> +			goto free_str;
> +		print_exst_sdt_events(tmp);
> +		ctr++;
> +		exst_ctr++;
> +	}
> +	if (exst_ctr)
> +		print_exst_sdt_event_footer();
> +
> +	/* Add new event names to "sdt_events" list */
> +	for (i = 0; i < pev->ntevs; i++) {
> +		ret = list_add_sdt_event(sdt_events, false, &(pev->tevs[i]));
> +		if (ret < 0)
> +			goto free_str;
> +
> +		ctr++;
> +	}
> +
> +	/* Print warning for multiple events */
> +	if (ctr > 1)
> +		pr_warning("Warning: Recording on %d occurrences of %s:%s\n",
> +			   ctr, pev->group, pev->event);
> +
> +free_str:
> +	/*
> +	 * User may ask for multiple events in the same record command like,
> +	 *    perf record -a -e sdt_lib1:* -e sdt_a:b
> +	 *
> +	 * If sdt_lib1:* events are already added and there is some failure
> +	 * for sdt_a:b, we need to clean sdt_lib1:* events from
> +	 * record.sdt_event_list
> +	 */
> +	if (ret < 0)
> +		sdt_event_list__remove();
> +
> +	free(str);
> +	probe_file__free_exst_sdt_list(&esl);
> +	cleanup_perf_probe_events(pev, 1);
> +	return ret;
> +}
> diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h
> index eba44c3..e785eb6 100644
> --- a/tools/perf/util/probe-file.h
> +++ b/tools/perf/util/probe-file.h
> @@ -4,6 +4,7 @@
>  #include "strlist.h"
>  #include "strfilter.h"
>  #include "probe-event.h"
> +#include "parse-events.h"
>  
>  /* Cache of probe definitions */
>  struct probe_cache_entry {
> @@ -19,6 +20,28 @@ struct probe_cache {
>  	struct list_head entries;
>  };
>  
> +struct sdt_event_list {
> +	char *event_info;
> +	struct list_head list;
> +
> +	/*
> +	 * Flag to check whether this event already exists in urobe_events
> +	 * file. This helps at a time of freeing sdt_event_list.
> +	 */
> +	bool existing_event;
> +};
> +
> +/*
> + * This maintains list of sdt events which are already exists in
> + * uprobe_events file at the time of creating probe point for any
> + * sdt event with 'perf record'.
> + */
> +struct exst_sdt_event_list {
> +	struct probe_trace_event *tev;
> +	struct list_head list;
> +	bool match;	/* Does this event match with any new event */
> +};
> +
>  enum probe_type {
>  	PROBE_TYPE_U = 0,
>  	PROBE_TYPE_S,
> @@ -64,6 +87,10 @@ struct probe_cache_entry *probe_cache__find_by_name(struct probe_cache *pcache,
>  					const char *group, const char *event);
>  int probe_cache__show_all_caches(struct strfilter *filter);
>  bool probe_type_is_available(enum probe_type type);
> +int add_sdt_event(char *event, struct list_head *sdt_events,
> +		  struct parse_events_error *err);
> +void remove_sdt_event_list(struct list_head *sdt_event_list);
> +void free_sdt_list(struct list_head *sdt_events);
>  #else	/* ! HAVE_LIBELF_SUPPORT */
>  static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused)
>  {
> -- 
> 2.9.3
> 


-- 
Masami Hiramatsu <mhiramat@...nel.org>