[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241112181214.1171244-3-acme@kernel.org>
Date: Tue, 12 Nov 2024 15:12:12 -0300
From: Arnaldo Carvalho de Melo <acme@...nel.org>
To: Namhyung Kim <namhyung@...nel.org>
Cc: Ingo Molnar <mingo@...nel.org>,
Thomas Gleixner <tglx@...utronix.de>,
Jiri Olsa <jolsa@...nel.org>,
Ian Rogers <irogers@...gle.com>,
Adrian Hunter <adrian.hunter@...el.com>,
Kan Liang <kan.liang@...ux.intel.com>,
Clark Williams <williams@...hat.com>,
linux-kernel@...r.kernel.org,
linux-perf-users@...r.kernel.org,
Arnaldo Carvalho de Melo <acme@...hat.com>,
Gabriele Monaco <gmonaco@...hat.com>
Subject: [PATCH 2/4] perf ftrace latency: Introduce --bucket-range to ask for linear bucketing
From: Arnaldo Carvalho de Melo <acme@...hat.com>
In addition to showing it exponentially, using log2() to figure out the
histogram index, allow for showing it linearly:
The preexisting more, the default:
# perf ftrace latency --use-nsec --use-bpf \
-T switch_mm_irqs_off -a sleep 2
# DURATION | COUNT | GRAPH |
0 - 1 ns | 0 | |
1 - 2 ns | 0 | |
2 - 4 ns | 0 | |
4 - 8 ns | 0 | |
8 - 16 ns | 0 | |
16 - 32 ns | 0 | |
32 - 64 ns | 0 | |
64 - 128 ns | 238 | # |
128 - 256 ns | 1704 | ########## |
256 - 512 ns | 672 | ### |
512 - 1024 ns | 4458 | ########################## |
1 - 2 us | 677 | #### |
2 - 4 us | 5 | |
4 - 8 us | 0 | |
8 - 16 us | 0 | |
16 - 32 us | 0 | |
32 - 64 us | 0 | |
64 - 128 us | 0 | |
128 - 256 us | 0 | |
256 - 512 us | 0 | |
512 - 1024 us | 0 | |
1 - ... ms | 0 | |
#
The new histogram mode:
# perf ftrace latency --bucket-range=150 --use-nsec --use-bpf \
-T switch_mm_irqs_off -a sleep 2
# DURATION | COUNT | GRAPH |
0 - 1 ns | 0 | |
1 - 151 ns | 265 | # |
151 - 301 ns | 1797 | ########### |
301 - 451 ns | 258 | # |
451 - 601 ns | 289 | # |
601 - 751 ns | 2049 | ############# |
751 - 901 ns | 967 | ###### |
901 - 1051 ns | 513 | ### |
1.05 - 1.20 us | 114 | |
1.20 - 1.35 us | 559 | ### |
1.35 - 1.50 us | 189 | # |
1.50 - 1.65 us | 137 | |
1.65 - 1.80 us | 32 | |
1.80 - 1.95 us | 2 | |
1.95 - 2.10 us | 0 | |
2.10 - 2.25 us | 1 | |
2.25 - 2.40 us | 1 | |
2.40 - 2.55 us | 0 | |
2.55 - 2.70 us | 0 | |
2.70 - 2.85 us | 0 | |
2.85 - 3.00 us | 1 | |
3.00 - ... us | 4 | |
#
Co-developed-by: Gabriele Monaco <gmonaco@...hat.com>
Signed-off-by: Gabriele Monaco <gmonaco@...hat.com>
Cc: Adrian Hunter <adrian.hunter@...el.com>
Cc: Ian Rogers <irogers@...gle.com>
Cc: Jiri Olsa <jolsa@...nel.org>
Cc: Kan Liang <kan.liang@...ux.intel.com>
Cc: Namhyung Kim <namhyung@...nel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@...hat.com>
---
tools/perf/Documentation/perf-ftrace.txt | 3 +
tools/perf/builtin-ftrace.c | 66 +++++++++++++++++----
tools/perf/util/bpf_ftrace.c | 2 +
tools/perf/util/bpf_skel/func_latency.bpf.c | 14 +++++
tools/perf/util/ftrace.h | 1 +
5 files changed, 73 insertions(+), 13 deletions(-)
diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index eaec8253be681a0e..e8cc8208e29fca7e 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -148,6 +148,9 @@ OPTIONS for 'perf ftrace latency'
--use-nsec::
Use nano-second instead of micro-second as a base unit of the histogram.
+--bucket-range=::
+ Bucket range in ms or ns (according to -n/--use-nsec), default is log2() mode.
+
OPTIONS for 'perf ftrace profile'
---------------------------------
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 88b9f0597b925c69..e047e5dcda2656df 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -777,9 +777,17 @@ static void make_histogram(struct perf_ftrace *ftrace, int buckets[],
if (ftrace->use_nsec)
num *= 1000;
- i = log2(num);
- if (i < 0)
+ if (!ftrace->bucket_range) {
+ i = log2(num);
+ if (i < 0)
+ i = 0;
+ } else {
+ // Less than 1 unit (ms or ns), or, in the future,
+ // than the min latency desired.
i = 0;
+ if (num > 0) // 1st entry: [ 1 unit .. bucket_range units ]
+ i = num / ftrace->bucket_range + 1;
+ }
if (i >= NUM_BUCKET)
i = NUM_BUCKET - 1;
@@ -815,28 +823,58 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[])
" DURATION ", "COUNT", bar_total, "GRAPH");
bar_len = buckets[0] * bar_total / total;
- printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
+
+ printf(" %4d - %4d %s | %10d | %.*s%*s |\n",
0, 1, use_nsec ? "ns" : "us", buckets[0], bar_len, bar, bar_total - bar_len, "");
for (i = 1; i < NUM_BUCKET - 1; i++) {
- int start = (1 << (i - 1));
- int stop = 1 << i;
+ int start, stop;
const char *unit = use_nsec ? "ns" : "us";
- if (start >= 1024) {
- start >>= 10;
- stop >>= 10;
- unit = use_nsec ? "us" : "ms";
+ if (!ftrace->bucket_range) {
+ start = (1 << (i - 1));
+ stop = 1 << i;
+
+ if (start >= 1024) {
+ start >>= 10;
+ stop >>= 10;
+ unit = use_nsec ? "us" : "ms";
+ }
+ } else {
+ start = (i - 1) * ftrace->bucket_range + 1;
+ stop = i * ftrace->bucket_range + 1;
+
+ if (start >= 1000) {
+ double dstart = start / 1000.0,
+ dstop = stop / 1000.0;
+ printf(" %4.2f - %-4.2f", dstart, dstop);
+ unit = use_nsec ? "us" : "ms";
+ goto print_bucket_info;
+ }
}
+
+ printf(" %4d - %4d", start, stop);
+print_bucket_info:
bar_len = buckets[i] * bar_total / total;
- printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
- start, stop, unit, buckets[i], bar_len, bar,
+ printf(" %s | %10d | %.*s%*s |\n", unit, buckets[i], bar_len, bar,
bar_total - bar_len, "");
}
bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
- printf(" %4d - %-4s %s | %10d | %.*s%*s |\n",
- 1, "...", use_nsec ? "ms" : " s", buckets[NUM_BUCKET - 1],
+ if (!ftrace->bucket_range) {
+ printf(" %4d - %-4s %s", 1, "...", use_nsec ? "ms" : "s ");
+ } else {
+ int upper_outlier = (NUM_BUCKET - 2) * ftrace->bucket_range;
+
+ if (upper_outlier >= 1000) {
+ double dstart = upper_outlier / 1000.0;
+
+ printf(" %4.2f - %-4s %s", dstart, "...", use_nsec ? "us" : "ms");
+ } else {
+ printf(" %4d - %4s %s", upper_outlier, "...", use_nsec ? "ns" : "us");
+ }
+ }
+ printf(" | %10d | %.*s%*s |\n", buckets[NUM_BUCKET - 1],
bar_len, bar, bar_total - bar_len, "");
}
@@ -1558,6 +1596,8 @@ int cmd_ftrace(int argc, const char **argv)
#endif
OPT_BOOLEAN('n', "use-nsec", &ftrace.use_nsec,
"Use nano-second histogram"),
+ OPT_UINTEGER(0, "bucket-range", &ftrace.bucket_range,
+ "Bucket range in ms or ns (-n/--use-nsec), default is log2() mode"),
OPT_PARENT(common_options),
};
const struct option profile_options[] = {
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
index 06d1c4018407a265..b3cb68295e56631c 100644
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -36,6 +36,8 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
return -1;
}
+ skel->rodata->bucket_range = ftrace->bucket_range;
+
/* don't need to set cpu filter for system-wide mode */
if (ftrace->target.cpu_list) {
ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus);
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
index f613dc9cb123480c..00a340ca1543dff0 100644
--- a/tools/perf/util/bpf_skel/func_latency.bpf.c
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -41,6 +41,7 @@ int enabled = 0;
const volatile int has_cpu = 0;
const volatile int has_task = 0;
const volatile int use_nsec = 0;
+const volatile unsigned int bucket_range;
SEC("kprobe/func")
int BPF_PROG(func_begin)
@@ -100,12 +101,25 @@ int BPF_PROG(func_end)
if (delta < 0)
return 0;
+ if (bucket_range != 0) {
+ delta /= cmp_base;
+ // Less than 1 unit (ms or ns), or, in the future,
+ // than the min latency desired.
+ key = 0;
+ if (delta > 0) { // 1st entry: [ 1 unit .. bucket_range units )
+ key = delta / bucket_range + 1;
+ if (key >= NUM_BUCKET)
+ key = NUM_BUCKET - 1;
+ }
+ goto do_lookup;
+ }
// calculate index using delta
for (key = 0; key < (NUM_BUCKET - 1); key++) {
if (delta < (cmp_base << key))
break;
}
+do_lookup:
hist = bpf_map_lookup_elem(&latency, &key);
if (!hist)
return 0;
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
index bae649ef50e8447a..6ac136484349a9a5 100644
--- a/tools/perf/util/ftrace.h
+++ b/tools/perf/util/ftrace.h
@@ -20,6 +20,7 @@ struct perf_ftrace {
unsigned long percpu_buffer_size;
bool inherit;
bool use_nsec;
+ unsigned int bucket_range;
int graph_depth;
int func_stack_trace;
int func_irq_info;
--
2.47.0
Powered by blists - more mailing lists