[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220321234609.90455-1-namhyung@kernel.org>
Date: Mon, 21 Mar 2022 16:46:08 -0700
From: Namhyung Kim <namhyung@...nel.org>
To: Arnaldo Carvalho de Melo <acme@...nel.org>,
Jiri Olsa <jolsa@...hat.com>
Cc: Ingo Molnar <mingo@...nel.org>,
Peter Zijlstra <peterz@...radead.org>,
LKML <linux-kernel@...r.kernel.org>,
Andi Kleen <ak@...ux.intel.com>,
Ian Rogers <irogers@...gle.com>,
Stephane Eranian <eranian@...gle.com>,
Changbin Du <changbin.du@...il.com>
Subject: [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency
Sometimes we want to see nano-second granularity.
$ sudo perf ftrace latency -T dput -a sleep 1
# DURATION | COUNT | GRAPH |
0 - 1 us | 2098375 | ############################# |
1 - 2 us | 61 | |
2 - 4 us | 33 | |
4 - 8 us | 13 | |
8 - 16 us | 124 | |
16 - 32 us | 123 | |
32 - 64 us | 1 | |
64 - 128 us | 0 | |
128 - 256 us | 1 | |
256 - 512 us | 0 | |
512 - 1024 us | 0 | |
1 - 2 ms | 0 | |
2 - 4 ms | 0 | |
4 - 8 ms | 0 | |
8 - 16 ms | 0 | |
16 - 32 ms | 0 | |
32 - 64 ms | 0 | |
64 - 128 ms | 0 | |
128 - 256 ms | 0 | |
256 - 512 ms | 0 | |
512 - 1024 ms | 0 | |
1 - ... s | 0 | |
$ sudo perf ftrace latency -T dput -a -n sleep 1
# DURATION | COUNT | GRAPH |
0 - 1 us | 0 | |
1 - 2 ns | 0 | |
2 - 4 ns | 0 | |
4 - 8 ns | 0 | |
8 - 16 ns | 0 | |
16 - 32 ns | 0 | |
32 - 64 ns | 0 | |
64 - 128 ns | 1163434 | ############## |
128 - 256 ns | 914102 | ############# |
256 - 512 ns | 884 | |
512 - 1024 ns | 613 | |
1 - 2 us | 31 | |
2 - 4 us | 17 | |
4 - 8 us | 7 | |
8 - 16 us | 123 | |
16 - 32 us | 83 | |
32 - 64 us | 0 | |
64 - 128 us | 0 | |
128 - 256 us | 0 | |
256 - 512 us | 0 | |
512 - 1024 us | 0 | |
1 - ... ms | 0 | |
Cc: Changbin Du <changbin.du@...il.com>
Signed-off-by: Namhyung Kim <namhyung@...nel.org>
---
tools/perf/builtin-ftrace.c | 24 +++++++++++++--------
tools/perf/util/bpf_ftrace.c | 2 ++
tools/perf/util/bpf_skel/func_latency.bpf.c | 6 ++++--
tools/perf/util/ftrace.h | 1 +
4 files changed, 22 insertions(+), 11 deletions(-)
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index a8785dec5ca6..ad9ce1bfffa1 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -680,7 +680,8 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace)
return (done && !workload_exec_errno) ? 0 : -1;
}
-static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
+static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf,
+ bool use_nsec)
{
char *p, *q;
char *unit;
@@ -727,6 +728,9 @@ static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
if (!unit || strncmp(unit, " us", 3))
goto next;
+ if (use_nsec)
+ num *= 1000;
+
i = log2(num);
if (i < 0)
i = 0;
@@ -744,7 +748,7 @@ static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
strcat(linebuf, p);
}
-static void display_histogram(int buckets[])
+static void display_histogram(int buckets[], bool use_nsec)
{
int i;
int total = 0;
@@ -770,12 +774,12 @@ static void display_histogram(int buckets[])
for (i = 1; i < NUM_BUCKET - 1; i++) {
int start = (1 << (i - 1));
int stop = 1 << i;
- const char *unit = "us";
+ const char *unit = use_nsec ? "ns" : "us";
if (start >= 1024) {
start >>= 10;
stop >>= 10;
- unit = "ms";
+ unit = use_nsec ? "us" : "ms";
}
bar_len = buckets[i] * bar_total / total;
printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
@@ -785,8 +789,8 @@ static void display_histogram(int buckets[])
bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
printf(" %4d - %-4s %s | %10d | %.*s%*s |\n",
- 1, "...", " s", buckets[NUM_BUCKET - 1], bar_len, bar,
- bar_total - bar_len, "");
+ 1, "...", use_nsec ? "ms" : " s", buckets[NUM_BUCKET - 1],
+ bar_len, bar, bar_total - bar_len, "");
}
@@ -913,7 +917,7 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
if (n < 0)
break;
- make_histogram(buckets, buf, n, line);
+ make_histogram(buckets, buf, n, line, ftrace->use_nsec);
}
}
@@ -930,12 +934,12 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
int n = read(trace_fd, buf, sizeof(buf) - 1);
if (n <= 0)
break;
- make_histogram(buckets, buf, n, line);
+ make_histogram(buckets, buf, n, line, ftrace->use_nsec);
}
read_func_latency(ftrace, buckets);
- display_histogram(buckets);
+ display_histogram(buckets, ftrace->use_nsec);
out:
close(trace_fd);
@@ -1171,6 +1175,8 @@ int cmd_ftrace(int argc, const char **argv)
OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf,
"Use BPF to measure function latency"),
#endif
+ OPT_BOOLEAN('n', "--use-nsec", &ftrace.use_nsec,
+ "Use nano-second histogram"),
OPT_PARENT(common_options),
};
const struct option *options = ftrace_options;
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
index d756cc66eef3..4f4d3aaff37c 100644
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -81,6 +81,8 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
}
}
+ skel->bss->use_nsec = ftrace->use_nsec;
+
skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
false, func->name);
if (IS_ERR(skel->links.func_begin)) {
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
index ea94187fe443..9d01e3af7479 100644
--- a/tools/perf/util/bpf_skel/func_latency.bpf.c
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -39,6 +39,7 @@ struct {
int enabled = 0;
int has_cpu = 0;
int has_task = 0;
+int use_nsec = 0;
SEC("kprobe/func")
int BPF_PROG(func_begin)
@@ -80,6 +81,7 @@ int BPF_PROG(func_end)
{
__u64 tid;
__u64 *start;
+ __u64 cmp_base = use_nsec ? 1 : 1000;
if (!enabled)
return 0;
@@ -97,9 +99,9 @@ int BPF_PROG(func_end)
if (delta < 0)
return 0;
- // calculate index using delta in usec
+ // calculate index using delta
for (key = 0; key < (NUM_BUCKET - 1); key++) {
- if (delta < ((1000UL) << key))
+ if (delta < (cmp_base << key))
break;
}
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
index 887f68a185f7..a34cd15733b8 100644
--- a/tools/perf/util/ftrace.h
+++ b/tools/perf/util/ftrace.h
@@ -17,6 +17,7 @@ struct perf_ftrace {
struct list_head nograph_funcs;
unsigned long percpu_buffer_size;
bool inherit;
+ bool use_nsec;
int graph_depth;
int func_stack_trace;
int func_irq_info;
--
2.35.1.894.gb6a874cedc-goog
Powered by blists - more mailing lists