[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAP-5=fUPksNCJ-NqUbJMDpfS7kkmXGsCVhvALkts8HDv42NUyg@mail.gmail.com>
Date: Mon, 18 Nov 2024 17:03:41 -0800
From: Ian Rogers <irogers@...gle.com>
To: Namhyung Kim <namhyung@...nel.org>
Cc: Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>, Mark Rutland <mark.rutland@....com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>, Jiri Olsa <jolsa@...nel.org>,
Adrian Hunter <adrian.hunter@...el.com>, Kan Liang <kan.liang@...ux.intel.com>,
James Clark <james.clark@...aro.org>, Howard Chu <howardchu95@...il.com>,
Athira Jajeev <atrajeev@...ux.vnet.ibm.com>, Michael Petlan <mpetlan@...hat.com>,
Veronika Molnarova <vmolnaro@...hat.com>, Dapeng Mi <dapeng1.mi@...ux.intel.com>,
Thomas Richter <tmricht@...ux.ibm.com>, Ilya Leoshkevich <iii@...ux.ibm.com>,
Colin Ian King <colin.i.king@...il.com>, Weilin Wang <weilin.wang@...el.com>,
Andi Kleen <ak@...ux.intel.com>, Josh Poimboeuf <jpoimboe@...hat.com>, linux-kernel@...r.kernel.org,
linux-perf-users@...r.kernel.org, Arnaldo Carvalho de Melo <acme@...hat.com>
Subject: Re: [PATCH v6 15/22] perf lock: Move common lock contention code to
new file
On Mon, Nov 18, 2024 at 4:23 PM Namhyung Kim <namhyung@...nel.org> wrote:
>
> On Fri, Nov 08, 2024 at 10:18:02PM -0800, Ian Rogers wrote:
> > Avoid references from util code to builtin-lock that require python
> > stubs. Move the functions and related variables to
> > util/lock-contention.c. Add max_stack_depth parameter to
> > match_callstack_filter to avoid sharing a global variable.
> >
> > Signed-off-by: Ian Rogers <irogers@...gle.com>
> > Acked-by: Arnaldo Carvalho de Melo <acme@...hat.com>
> > ---
> > tools/perf/builtin-lock.c | 137 +--------------------
> > tools/perf/util/Build | 1 +
> > tools/perf/util/bpf_lock_contention.c | 2 +-
> > tools/perf/util/lock-contention.c | 170 ++++++++++++++++++++++++++
> > tools/perf/util/lock-contention.h | 37 ++----
> > tools/perf/util/python.c | 17 ---
> > 6 files changed, 185 insertions(+), 179 deletions(-)
> > create mode 100644 tools/perf/util/lock-contention.c
> >
> > diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
> > index 062e2b56a2ab..f66948b1fbed 100644
> > --- a/tools/perf/builtin-lock.c
> > +++ b/tools/perf/builtin-lock.c
> > @@ -46,15 +46,6 @@
> > static struct perf_session *session;
> > static struct target target;
> >
> > -/* based on kernel/lockdep.c */
> > -#define LOCKHASH_BITS 12
> > -#define LOCKHASH_SIZE (1UL << LOCKHASH_BITS)
> > -
> > -static struct hlist_head *lockhash_table;
> > -
> > -#define __lockhashfn(key) hash_long((unsigned long)key, LOCKHASH_BITS)
> > -#define lockhashentry(key) (lockhash_table + __lockhashfn((key)))
> > -
> > static struct rb_root thread_stats;
> >
> > static bool combine_locks;
> > @@ -67,24 +58,13 @@ static unsigned long bpf_map_entries = MAX_ENTRIES;
> > static int max_stack_depth = CONTENTION_STACK_DEPTH;
> > static int stack_skip = CONTENTION_STACK_SKIP;
> > static int print_nr_entries = INT_MAX / 2;
> > -static LIST_HEAD(callstack_filters);
> > static const char *output_name = NULL;
> > static FILE *lock_output;
> >
> > -struct callstack_filter {
> > - struct list_head list;
> > - char name[];
> > -};
> > -
> > static struct lock_filter filters;
> >
> > static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
> >
> > -static bool needs_callstack(void)
> > -{
> > - return !list_empty(&callstack_filters);
> > -}
> > -
> > static struct thread_stat *thread_stat_find(u32 tid)
> > {
> > struct rb_node *node;
> > @@ -477,93 +457,6 @@ static struct lock_stat *pop_from_result(void)
> > return container_of(node, struct lock_stat, rb);
> > }
> >
> > -struct lock_stat *lock_stat_find(u64 addr)
> > -{
> > - struct hlist_head *entry = lockhashentry(addr);
> > - struct lock_stat *ret;
> > -
> > - hlist_for_each_entry(ret, entry, hash_entry) {
> > - if (ret->addr == addr)
> > - return ret;
> > - }
> > - return NULL;
> > -}
> > -
> > -struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
> > -{
> > - struct hlist_head *entry = lockhashentry(addr);
> > - struct lock_stat *ret, *new;
> > -
> > - hlist_for_each_entry(ret, entry, hash_entry) {
> > - if (ret->addr == addr)
> > - return ret;
> > - }
> > -
> > - new = zalloc(sizeof(struct lock_stat));
> > - if (!new)
> > - goto alloc_failed;
> > -
> > - new->addr = addr;
> > - new->name = strdup(name);
> > - if (!new->name) {
> > - free(new);
> > - goto alloc_failed;
> > - }
> > -
> > - new->flags = flags;
> > - new->wait_time_min = ULLONG_MAX;
> > -
> > - hlist_add_head(&new->hash_entry, entry);
> > - return new;
> > -
> > -alloc_failed:
> > - pr_err("memory allocation failed\n");
> > - return NULL;
> > -}
> > -
> > -bool match_callstack_filter(struct machine *machine, u64 *callstack)
> > -{
> > - struct map *kmap;
> > - struct symbol *sym;
> > - u64 ip;
> > - const char *arch = perf_env__arch(machine->env);
> > -
> > - if (list_empty(&callstack_filters))
> > - return true;
> > -
> > - for (int i = 0; i < max_stack_depth; i++) {
> > - struct callstack_filter *filter;
> > -
> > - /*
> > - * In powerpc, the callchain saved by kernel always includes
> > - * first three entries as the NIP (next instruction pointer),
> > - * LR (link register), and the contents of LR save area in the
> > - * second stack frame. In certain scenarios its possible to have
> > - * invalid kernel instruction addresses in either LR or the second
> > - * stack frame's LR. In that case, kernel will store that address as
> > - * zero.
> > - *
> > - * The below check will continue to look into callstack,
> > - * incase first or second callstack index entry has 0
> > - * address for powerpc.
> > - */
> > - if (!callstack || (!callstack[i] && (strcmp(arch, "powerpc") ||
> > - (i != 1 && i != 2))))
> > - break;
> > -
> > - ip = callstack[i];
> > - sym = machine__find_kernel_symbol(machine, ip, &kmap);
> > - if (sym == NULL)
> > - continue;
> > -
> > - list_for_each_entry(filter, &callstack_filters, list) {
> > - if (strstr(sym->name, filter->name))
> > - return true;
> > - }
> > - }
> > - return false;
> > -}
> > -
> > struct trace_lock_handler {
> > /* it's used on CONFIG_LOCKDEP */
> > int (*acquire_event)(struct evsel *evsel,
> > @@ -1165,7 +1058,7 @@ static int report_lock_contention_begin_event(struct evsel *evsel,
> > if (callstack == NULL)
> > return -ENOMEM;
> >
> > - if (!match_callstack_filter(machine, callstack)) {
> > + if (!match_callstack_filter(machine, callstack, max_stack_depth)) {
> > free(callstack);
> > return 0;
> > }
> > @@ -2449,34 +2342,6 @@ static int parse_lock_addr(const struct option *opt __maybe_unused, const char *
> > return ret;
> > }
> >
> > -static int parse_call_stack(const struct option *opt __maybe_unused, const char *str,
> > - int unset __maybe_unused)
> > -{
> > - char *s, *tmp, *tok;
> > - int ret = 0;
> > -
> > - s = strdup(str);
> > - if (s == NULL)
> > - return -1;
> > -
> > - for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
> > - struct callstack_filter *entry;
> > -
> > - entry = malloc(sizeof(*entry) + strlen(tok) + 1);
> > - if (entry == NULL) {
> > - pr_err("Memory allocation failure\n");
> > - free(s);
> > - return -1;
> > - }
> > -
> > - strcpy(entry->name, tok);
> > - list_add_tail(&entry->list, &callstack_filters);
> > - }
> > -
> > - free(s);
> > - return ret;
> > -}
> > -
> > static int parse_output(const struct option *opt __maybe_unused, const char *str,
> > int unset __maybe_unused)
> > {
> > diff --git a/tools/perf/util/Build b/tools/perf/util/Build
> > index 340544a6f5ec..3c6cd8d81d88 100644
> > --- a/tools/perf/util/Build
> > +++ b/tools/perf/util/Build
> > @@ -121,6 +121,7 @@ perf-util-y += topdown.o
> > perf-util-y += iostat.o
> > perf-util-y += stream.o
> > perf-util-y += kvm-stat.o
> > +perf-util-y += lock-contention.o
> > perf-util-$(CONFIG_AUXTRACE) += auxtrace.o
> > perf-util-y += intel-pt-decoder/
> > perf-util-$(CONFIG_AUXTRACE) += intel-pt.o
> > diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
> > index 41a1ad087895..37e17c56f106 100644
> > --- a/tools/perf/util/bpf_lock_contention.c
> > +++ b/tools/perf/util/bpf_lock_contention.c
> > @@ -458,7 +458,7 @@ int lock_contention_read(struct lock_contention *con)
> > if (con->save_callstack) {
> > bpf_map_lookup_elem(stack, &key.stack_id, stack_trace);
> >
> > - if (!match_callstack_filter(machine, stack_trace)) {
> > + if (!match_callstack_filter(machine, stack_trace, con->max_stack)) {
> > con->nr_filtered += data.count;
> > goto next;
> > }
> > diff --git a/tools/perf/util/lock-contention.c b/tools/perf/util/lock-contention.c
> > new file mode 100644
> > index 000000000000..841bb18b1f06
> > --- /dev/null
> > +++ b/tools/perf/util/lock-contention.c
> > @@ -0,0 +1,170 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +#include "debug.h"
> > +#include "env.h"
> > +#include "lock-contention.h"
> > +#include "machine.h"
> > +#include "symbol.h"
> > +
> > +#include <limits.h>
> > +#include <string.h>
> > +
> > +#include <linux/hash.h>
> > +#include <linux/zalloc.h>
> > +
> > +#define __lockhashfn(key) hash_long((unsigned long)key, LOCKHASH_BITS)
> > +#define lockhashentry(key) (lockhash_table + __lockhashfn((key)))
> > +
> > +struct callstack_filter {
> > + struct list_head list;
> > + char name[];
> > +};
> > +
> > +static LIST_HEAD(callstack_filters);
> > +struct hlist_head *lockhash_table;
> > +
> > +int parse_call_stack(const struct option *opt __maybe_unused, const char *str,
> > + int unset __maybe_unused)
> > +{
> > + char *s, *tmp, *tok;
> > + int ret = 0;
> > +
> > + s = strdup(str);
> > + if (s == NULL)
> > + return -1;
> > +
> > + for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
> > + struct callstack_filter *entry;
> > +
> > + entry = malloc(sizeof(*entry) + strlen(tok) + 1);
> > + if (entry == NULL) {
> > + pr_err("Memory allocation failure\n");
> > + free(s);
> > + return -1;
> > + }
> > +
> > + strcpy(entry->name, tok);
> > + list_add_tail(&entry->list, &callstack_filters);
> > + }
> > +
> > + free(s);
> > + return ret;
> > +}
> > +
> > +bool needs_callstack(void)
> > +{
> > + return !list_empty(&callstack_filters);
> > +}
> > +
> > +struct lock_stat *lock_stat_find(u64 addr)
> > +{
> > + struct hlist_head *entry = lockhashentry(addr);
> > + struct lock_stat *ret;
> > +
> > + hlist_for_each_entry(ret, entry, hash_entry) {
> > + if (ret->addr == addr)
> > + return ret;
> > + }
> > + return NULL;
> > +}
> > +
> > +struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
> > +{
> > + struct hlist_head *entry = lockhashentry(addr);
> > + struct lock_stat *ret, *new;
> > +
> > + hlist_for_each_entry(ret, entry, hash_entry) {
> > + if (ret->addr == addr)
> > + return ret;
> > + }
> > +
> > + new = zalloc(sizeof(struct lock_stat));
> > + if (!new)
> > + goto alloc_failed;
> > +
> > + new->addr = addr;
> > + new->name = strdup(name);
> > + if (!new->name) {
> > + free(new);
> > + goto alloc_failed;
> > + }
> > +
> > + new->flags = flags;
> > + new->wait_time_min = ULLONG_MAX;
> > +
> > + hlist_add_head(&new->hash_entry, entry);
> > + return new;
> > +
> > +alloc_failed:
> > + pr_err("memory allocation failed\n");
> > + return NULL;
> > +}
> > +
> > +bool match_callstack_filter(struct machine *machine, u64 *callstack, int max_stack_depth)
> > +{
> > + struct map *kmap;
> > + struct symbol *sym;
> > + u64 ip;
> > + const char *arch = perf_env__arch(machine->env);
> > +
> > + if (list_empty(&callstack_filters))
> > + return true;
> > +
> > + for (int i = 0; i < max_stack_depth; i++) {
> > + struct callstack_filter *filter;
> > +
> > + /*
> > + * In powerpc, the callchain saved by kernel always includes
> > + * first three entries as the NIP (next instruction pointer),
> > + * LR (link register), and the contents of LR save area in the
> > + * second stack frame. In certain scenarios its possible to have
> > + * invalid kernel instruction addresses in either LR or the second
> > + * stack frame's LR. In that case, kernel will store that address as
> > + * zero.
> > + *
> > + * The below check will continue to look into callstack,
> > + * incase first or second callstack index entry has 0
> > + * address for powerpc.
> > + */
> > + if (!callstack || (!callstack[i] && (strcmp(arch, "powerpc") ||
> > + (i != 1 && i != 2))))
> > + break;
> > +
> > + ip = callstack[i];
> > + sym = machine__find_kernel_symbol(machine, ip, &kmap);
> > + if (sym == NULL)
> > + continue;
> > +
> > + list_for_each_entry(filter, &callstack_filters, list) {
> > + if (strstr(sym->name, filter->name))
> > + return true;
> > + }
> > + }
> > + return false;
> > +}
> > +
> > +#ifndef HAVE_BPF_SKEL
> > +int lock_contention_prepare(struct lock_contention *con __maybe_unused)
> > +{
> > + return 0;
> > +}
> > +
> > +int lock_contention_start(void)
> > +{
> > + return 0;
> > +}
> > +
> > +int lock_contention_stop(void)
> > +{
> > + return 0;
> > +}
> > +
> > +int lock_contention_finish(struct lock_contention *con __maybe_unused)
> > +{
> > + return 0;
> > +}
> > +
> > +int lock_contention_read(struct lock_contention *con __maybe_unused)
> > +{
> > + return 0;
> > +}
> > +#endif /* !HAVE_BPF_SKEL */
>
> I still think it's the convention to have them in a header file as
> static inline functions and reduce the #ifdef in the .c file.
Shouldn't minimizing ifdefs, and associated cognitive load, in header
files be the priority given they are #included many times while the .c
file is only compiled once?
Shouldn't a goal of the header file be to abstract away things like
HAVE_BPF_SKEL?
I'm not clear what the goal of having the functions in the header
files is, performance? The code isn't going to run anyway. I feel
lock_contention.h is smaller and easier to read like this but I also
don't care enough to fight. I did this change here as
lock_contention.h was being brought into python.c for the sake of
stubbing out functions that the header file was also subbing out for
!BPF_HAVE_SKEL. A single stub felt like progress.
Thanks,
Ian
> > diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h
> > index 1a7248ff3889..bfa5c7db0a5d 100644
> > --- a/tools/perf/util/lock-contention.h
> > +++ b/tools/perf/util/lock-contention.h
> > @@ -67,10 +67,11 @@ struct lock_stat {
> > */
> > #define MAX_LOCK_DEPTH 48
> >
> > -struct lock_stat *lock_stat_find(u64 addr);
> > -struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags);
> > +/* based on kernel/lockdep.c */
> > +#define LOCKHASH_BITS 12
> > +#define LOCKHASH_SIZE (1UL << LOCKHASH_BITS)
> >
> > -bool match_callstack_filter(struct machine *machine, u64 *callstack);
> > +extern struct hlist_head *lockhash_table;
> >
> > /*
> > * struct lock_seq_stat:
> > @@ -148,7 +149,14 @@ struct lock_contention {
> > bool save_callstack;
> > };
> >
> > -#ifdef HAVE_BPF_SKEL
> > +struct option;
> > +int parse_call_stack(const struct option *opt, const char *str, int unset);
> > +bool needs_callstack(void);
> > +
> > +struct lock_stat *lock_stat_find(u64 addr);
> > +struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags);
> > +
> > +bool match_callstack_filter(struct machine *machine, u64 *callstack, int max_stack_depth);
> >
> > int lock_contention_prepare(struct lock_contention *con);
> > int lock_contention_start(void);
> > @@ -156,25 +164,4 @@ int lock_contention_stop(void);
> > int lock_contention_read(struct lock_contention *con);
> > int lock_contention_finish(struct lock_contention *con);
> >
> > -#else /* !HAVE_BPF_SKEL */
> > -
> > -static inline int lock_contention_prepare(struct lock_contention *con __maybe_unused)
> > -{
> > - return 0;
> > -}
> > -
> > -static inline int lock_contention_start(void) { return 0; }
> > -static inline int lock_contention_stop(void) { return 0; }
> > -static inline int lock_contention_finish(struct lock_contention *con __maybe_unused)
> > -{
> > - return 0;
> > -}
> > -
> > -static inline int lock_contention_read(struct lock_contention *con __maybe_unused)
> > -{
> > - return 0;
> > -}
> > -
> > -#endif /* HAVE_BPF_SKEL */
> > -
> > #endif /* PERF_LOCK_CONTENTION_H */
> > diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
> > index 35d84a96dbec..91fd444615cd 100644
> > --- a/tools/perf/util/python.c
> > +++ b/tools/perf/util/python.c
> > @@ -18,7 +18,6 @@
> > #include "mmap.h"
> > #include "util/kwork.h"
> > #include "util/sample.h"
> > -#include "util/lock-contention.h"
> > #include <internal/lib.h>
> > #include "../builtin.h"
> >
> > @@ -1311,22 +1310,6 @@ struct kwork_work *perf_kwork_add_work(struct perf_kwork *kwork __maybe_unused,
> > return NULL;
> > }
> >
> > -bool match_callstack_filter(struct machine *machine __maybe_unused, u64 *callstack __maybe_unused)
> > -{
> > - return false;
> > -}
> > -
> > -struct lock_stat *lock_stat_find(u64 addr __maybe_unused)
> > -{
> > - return NULL;
> > -}
> > -
> > -struct lock_stat *lock_stat_findnew(u64 addr __maybe_unused, const char *name __maybe_unused,
> > - int flags __maybe_unused)
> > -{
> > - return NULL;
> > -}
> > -
> > int cmd_inject(int argc __maybe_unused, const char *argv[] __maybe_unused)
> > {
> > return -1;
> > --
> > 2.47.0.277.g8800431eea-goog
> >
Powered by blists - more mailing lists