lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aS_PYGilTIpuVZt8@google.com>
Date: Tue, 2 Dec 2025 21:49:20 -0800
From: Namhyung Kim <namhyung@...nel.org>
To: Arnaldo Carvalho de Melo <acme@...nel.org>,
	Ian Rogers <irogers@...gle.com>,
	James Clark <james.clark@...aro.org>
Cc: Jiri Olsa <jolsa@...nel.org>, Adrian Hunter <adrian.hunter@...el.com>,
	Peter Zijlstra <peterz@...radead.org>,
	Ingo Molnar <mingo@...nel.org>, LKML <linux-kernel@...r.kernel.org>,
	linux-perf-users@...r.kernel.org,
	Steven Rostedt <rostedt@...dmis.org>,
	Josh Poimboeuf <jpoimboe@...nel.org>,
	Indu Bhagat <indu.bhagat@...cle.com>,
	Jens Remus <jremus@...ux.ibm.com>,
	Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
	linux-trace-kernel@...r.kernel.org, bpf@...r.kernel.org
Subject: Re: [PATCH v6 3/6] perf record: Add --call-graph fp,defer option for
 deferred callchains

On Thu, Nov 20, 2025 at 03:48:01PM -0800, Namhyung Kim wrote:
> Add a new callchain record mode option for deferred callchains.  For now
> it only works with FP (frame-pointer) mode.
> 
> And add the missing feature detection logic to clear the flag on old
> kernels.
> 
>   $ perf record --call-graph fp,defer -vv true
>   ...
>   ------------------------------------------------------------
>   perf_event_attr:
>     type                             0 (PERF_TYPE_HARDWARE)
>     size                             136
>     config                           0 (PERF_COUNT_HW_CPU_CYCLES)
>     { sample_period, sample_freq }   4000
>     sample_type                      IP|TID|TIME|CALLCHAIN|PERIOD
>     read_format                      ID|LOST
>     disabled                         1
>     inherit                          1
>     mmap                             1
>     comm                             1
>     freq                             1
>     enable_on_exec                   1
>     task                             1
>     sample_id_all                    1
>     mmap2                            1
>     comm_exec                        1
>     ksymbol                          1
>     bpf_event                        1
>     defer_callchain                  1
>     defer_output                     1
>   ------------------------------------------------------------
>   sys_perf_event_open: pid 162755  cpu 0  group_fd -1  flags 0x8
>   sys_perf_event_open failed, error -22
>   switching off deferred callchain support
> 
> Reviewed-by: Ian Rogers <irogers@...gle.com>
> Signed-off-by: Namhyung Kim <namhyung@...nel.org>
> ---
>  tools/perf/Documentation/perf-config.txt |  3 +++
>  tools/perf/Documentation/perf-record.txt |  4 ++++
>  tools/perf/util/callchain.c              | 16 +++++++++++++---
>  tools/perf/util/callchain.h              |  1 +
>  tools/perf/util/evsel.c                  | 19 +++++++++++++++++++
>  tools/perf/util/evsel.h                  |  1 +
>  6 files changed, 41 insertions(+), 3 deletions(-)
> 
> diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
> index c6f33565966735fe..642d1c490d9e3bcd 100644
> --- a/tools/perf/Documentation/perf-config.txt
> +++ b/tools/perf/Documentation/perf-config.txt
> @@ -452,6 +452,9 @@ Variables
>  		kernel space is controlled not by this option but by the
>  		kernel config (CONFIG_UNWINDER_*).
>  
> +		The 'defer' mode can be used with 'fp' mode to enable deferred
> +		user callchains (like 'fp,defer').
> +
>  	call-graph.dump-size::
>  		The size of stack to dump in order to do post-unwinding. Default is 8192 (byte).
>  		When using dwarf into record-mode, the default size will be used if omitted.
> diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
> index 067891bd7da6edc8..e8b9aadbbfa50574 100644
> --- a/tools/perf/Documentation/perf-record.txt
> +++ b/tools/perf/Documentation/perf-record.txt
> @@ -325,6 +325,10 @@ OPTIONS
>  	by default.  User can change the number by passing it after comma
>  	like "--call-graph fp,32".
>  
> +	Also "defer" can be used with "fp" (like "--call-graph fp,defer") to
> +	enable deferred user callchain which will collect user-space callchains
> +	when the thread returns to the user space.
> +
>  -q::
>  --quiet::
>  	Don't print any warnings or messages, useful for scripting.
> diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
> index d7b7eef740b9d6ed..2884187ccbbecfdc 100644
> --- a/tools/perf/util/callchain.c
> +++ b/tools/perf/util/callchain.c
> @@ -275,9 +275,13 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
>  			if (tok) {
>  				unsigned long size;
>  
> -				size = strtoul(tok, &name, 0);
> -				if (size < (unsigned) sysctl__max_stack())
> -					param->max_stack = size;
> +				if (!strncmp(tok, "defer", sizeof("defer"))) {
> +					param->defer = true;
> +				} else {
> +					size = strtoul(tok, &name, 0);
> +					if (size < (unsigned) sysctl__max_stack())
> +						param->max_stack = size;
> +				}
>  			}
>  			break;
>  
> @@ -314,6 +318,12 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
>  	} while (0);
>  
>  	free(buf);
> +
> +	if (param->defer && param->record_mode != CALLCHAIN_FP) {
> +		pr_err("callchain: deferred callchain only works with FP\n");
> +		return -EINVAL;
> +	}
> +
>  	return ret;
>  }
>  
> diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
> index 86ed9e4d04f9ee7b..d5ae4fbb7ce5fa44 100644
> --- a/tools/perf/util/callchain.h
> +++ b/tools/perf/util/callchain.h
> @@ -98,6 +98,7 @@ extern bool dwarf_callchain_users;
>  
>  struct callchain_param {
>  	bool			enabled;
> +	bool			defer;
>  	enum perf_call_graph_mode record_mode;
>  	u32			dump_size;
>  	enum chain_mode 	mode;
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index f1a311637694ac0a..887c6ac6c49cc415 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -1065,6 +1065,9 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
>  		pr_info("Disabling user space callchains for function trace event.\n");
>  		attr->exclude_callchain_user = 1;
>  	}
> +
> +	if (param->defer && !attr->exclude_callchain_user)
> +		attr->defer_callchain = 1;
>  }
>  
>  void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
> @@ -1511,6 +1514,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
>  	attr->mmap2    = track && !perf_missing_features.mmap2;
>  	attr->comm     = track;
>  	attr->build_id = track && opts->build_id;
> +	attr->defer_output = track && callchain->defer;

I need to update this part like below:

	attr->defer_output = track && callchain && callchain->defer;

Thanks,
Namhyung

>  
>  	/*
>  	 * ksymbol is tracked separately with text poke because it needs to be
> @@ -2199,6 +2203,10 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
>  
>  static void evsel__disable_missing_features(struct evsel *evsel)
>  {
> +	if (perf_missing_features.defer_callchain && evsel->core.attr.defer_callchain)
> +		evsel->core.attr.defer_callchain = 0;
> +	if (perf_missing_features.defer_callchain && evsel->core.attr.defer_output)
> +		evsel->core.attr.defer_output = 0;
>  	if (perf_missing_features.inherit_sample_read && evsel->core.attr.inherit &&
>  	    (evsel->core.attr.sample_type & PERF_SAMPLE_READ))
>  		evsel->core.attr.inherit = 0;
> @@ -2473,6 +2481,13 @@ static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu
>  
>  	/* Please add new feature detection here. */
>  
> +	attr.defer_callchain = true;
> +	if (has_attr_feature(&attr, /*flags=*/0))
> +		goto found;
> +	perf_missing_features.defer_callchain = true;
> +	pr_debug2("switching off deferred callchain support\n");
> +	attr.defer_callchain = false;
> +
>  	attr.inherit = true;
>  	attr.sample_type = PERF_SAMPLE_READ | PERF_SAMPLE_TID;
>  	if (has_attr_feature(&attr, /*flags=*/0))
> @@ -2584,6 +2599,10 @@ static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu
>  	errno = old_errno;
>  
>  check:
> +	if ((evsel->core.attr.defer_callchain || evsel->core.attr.defer_output) &&
> +	    perf_missing_features.defer_callchain)
> +		return true;
> +
>  	if (evsel->core.attr.inherit &&
>  	    (evsel->core.attr.sample_type & PERF_SAMPLE_READ) &&
>  	    perf_missing_features.inherit_sample_read)
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 3ae4ac8f9a37e009..a08130ff2e47a887 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -221,6 +221,7 @@ struct perf_missing_features {
>  	bool branch_counters;
>  	bool aux_action;
>  	bool inherit_sample_read;
> +	bool defer_callchain;
>  };
>  
>  extern struct perf_missing_features perf_missing_features;
> -- 
> 2.52.0.rc2.455.g230fcf2819-goog
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ