[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAP-5=fV3JAJD_beASNdP_Sxh=oAFv-yh_cqW=P=Bc9FabddUpQ@mail.gmail.com>
Date: Tue, 5 Nov 2024 09:41:05 -0800
From: Ian Rogers <irogers@...gle.com>
To: Namhyung Kim <namhyung@...nel.org>
Cc: Arnaldo Carvalho de Melo <acme@...nel.org>, Kan Liang <kan.liang@...ux.intel.com>,
Jiri Olsa <jolsa@...nel.org>, Adrian Hunter <adrian.hunter@...el.com>,
Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...nel.org>,
LKML <linux-kernel@...r.kernel.org>, linux-perf-users@...r.kernel.org,
Song Liu <song@...nel.org>, bpf@...r.kernel.org,
Stephane Eranian <eranian@...gle.com>, Vlastimil Babka <vbabka@...e.cz>, Kees Cook <kees@...nel.org>,
Roman Gushchin <roman.gushchin@...ux.dev>, Hyeonggon Yoo <42.hyeyoo@...il.com>
Subject: Re: [PATCH 3/4] perf lock contention: Resolve slab object name using BPF
On Tue, Nov 5, 2024 at 9:26 AM Namhyung Kim <namhyung@...nel.org> wrote:
>
> The bpf_get_kmem_cache() kfunc can return an address of the slab cache
> (kmem_cache). As it has the name of the slab cache from the iterator,
> we can use it to symbolize some dynamic kernel locks in a slab.
>
> Before:
> root@...tme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1
> contended total wait max wait avg wait address symbol
>
> 2 3.34 us 2.87 us 1.67 us ffff9d7800ad9600 (mutex)
> 2 2.16 us 1.93 us 1.08 us ffff9d7804b992d8 (mutex)
> 4 1.37 us 517 ns 343 ns ffff9d78036e6e00 (mutex)
> 1 1.27 us 1.27 us 1.27 us ffff9d7804b99378 (mutex)
> 2 845 ns 599 ns 422 ns ffffffff9e1c3620 delayed_uprobe_lock (mutex)
> 1 845 ns 845 ns 845 ns ffffffff9da0b280 jiffies_lock (spinlock)
> 2 377 ns 259 ns 188 ns ffffffff9e1cf840 pcpu_alloc_mutex (mutex)
> 1 305 ns 305 ns 305 ns ffffffff9e1b4cf8 tracepoint_srcu_srcu_usage (mutex)
> 1 295 ns 295 ns 295 ns ffffffff9e1c0940 pack_mutex (mutex)
> 1 232 ns 232 ns 232 ns ffff9d7804b7d8d8 (mutex)
> 1 180 ns 180 ns 180 ns ffffffff9e1b4c28 tracepoint_srcu_srcu_usage (mutex)
> 1 165 ns 165 ns 165 ns ffffffff9da8b3a0 text_mutex (mutex)
>
> After:
> root@...tme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1
> contended total wait max wait avg wait address symbol
>
> 2 1.95 us 1.77 us 975 ns ffff9d5e852d3498 &task_struct (mutex)
> 1 1.18 us 1.18 us 1.18 us ffff9d5e852d3538 &task_struct (mutex)
> 4 1.12 us 354 ns 279 ns ffff9d5e841ca800 &kmalloc-cg-512 (mutex)
> 2 859 ns 617 ns 429 ns ffffffffa41c3620 delayed_uprobe_lock (mutex)
> 3 691 ns 388 ns 230 ns ffffffffa41c0940 pack_mutex (mutex)
> 3 421 ns 164 ns 140 ns ffffffffa3a8b3a0 text_mutex (mutex)
> 1 409 ns 409 ns 409 ns ffffffffa41b4cf8 tracepoint_srcu_srcu_usage (mutex)
> 2 362 ns 239 ns 181 ns ffffffffa41cf840 pcpu_alloc_mutex (mutex)
> 1 220 ns 220 ns 220 ns ffff9d5e82b534d8 &signal_cache (mutex)
> 1 215 ns 215 ns 215 ns ffffffffa41b4c28 tracepoint_srcu_srcu_usage (mutex)
>
> Note that the name starts with '&' sign for slab objects to inform they
> are dynamic locks. It won't give the accurate lock or type names but
> it's still useful. We may add type info to the slab cache later to get
> the exact name of the lock in the type later.
Many variables may reference a lock through a pointer, should the name
not be associated with the lock or from decoding the task_struct?
The '&' looks redundant as the addresses are clearly different.
How are >1 lock/mutex in the same struct handled?
Thanks,
Ian
> Signed-off-by: Namhyung Kim <namhyung@...nel.org>
> ---
> tools/perf/util/bpf_lock_contention.c | 52 +++++++++++++++++++
> .../perf/util/bpf_skel/lock_contention.bpf.c | 21 +++++++-
> 2 files changed, 71 insertions(+), 2 deletions(-)
>
> diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
> index a2efd40897bad316..50c3039c647d4d77 100644
> --- a/tools/perf/util/bpf_lock_contention.c
> +++ b/tools/perf/util/bpf_lock_contention.c
> @@ -2,6 +2,7 @@
> #include "util/cgroup.h"
> #include "util/debug.h"
> #include "util/evlist.h"
> +#include "util/hashmap.h"
> #include "util/machine.h"
> #include "util/map.h"
> #include "util/symbol.h"
> @@ -20,12 +21,25 @@
>
> static struct lock_contention_bpf *skel;
> static bool has_slab_iter;
> +static struct hashmap slab_hash;
> +
> +static size_t slab_cache_hash(long key, void *ctx __maybe_unused)
> +{
> + return key;
> +}
> +
> +static bool slab_cache_equal(long key1, long key2, void *ctx __maybe_unused)
> +{
> + return key1 == key2;
> +}
>
> static void check_slab_cache_iter(struct lock_contention *con)
> {
> struct btf *btf = btf__load_vmlinux_btf();
> s32 ret;
>
> + hashmap__init(&slab_hash, slab_cache_hash, slab_cache_equal, /*ctx=*/NULL);
> +
> ret = libbpf_get_error(btf);
> if (ret) {
> pr_debug("BTF loading failed: %d\n", ret);
> @@ -50,6 +64,7 @@ static void run_slab_cache_iter(void)
> {
> int fd;
> char buf[256];
> + long key, *prev_key;
>
> if (!has_slab_iter)
> return;
> @@ -65,6 +80,34 @@ static void run_slab_cache_iter(void)
> continue;
>
> close(fd);
> +
> + /* Read the slab cache map and build a hash with IDs */
> + fd = bpf_map__fd(skel->maps.slab_caches);
> + prev_key = NULL;
> + while (!bpf_map_get_next_key(fd, prev_key, &key)) {
> + struct slab_cache_data *data;
> +
> + data = malloc(sizeof(*data));
> + if (data == NULL)
> + break;
> +
> + if (bpf_map_lookup_elem(fd, &key, data) < 0)
> + break;
> +
> + hashmap__add(&slab_hash, data->id, data);
> + prev_key = &key;
> + }
> +}
> +
> +static void exit_slab_cache_iter(void)
> +{
> + struct hashmap_entry *cur;
> + unsigned bkt;
> +
> + hashmap__for_each_entry(&slab_hash, cur, bkt)
> + free(cur->pvalue);
> +
> + hashmap__clear(&slab_hash);
> }
>
> int lock_contention_prepare(struct lock_contention *con)
> @@ -398,6 +441,7 @@ static const char *lock_contention_get_name(struct lock_contention *con,
>
> if (con->aggr_mode == LOCK_AGGR_ADDR) {
> int lock_fd = bpf_map__fd(skel->maps.lock_syms);
> + struct slab_cache_data *slab_data;
>
> /* per-process locks set upper bits of the flags */
> if (flags & LCD_F_MMAP_LOCK)
> @@ -416,6 +460,12 @@ static const char *lock_contention_get_name(struct lock_contention *con,
> return "rq_lock";
> }
>
> + /* look slab_hash for dynamic locks in a slab object */
> + if (hashmap__find(&slab_hash, flags & LCB_F_SLAB_ID_MASK, &slab_data)) {
> + snprintf(name_buf, sizeof(name_buf), "&%s", slab_data->name);
> + return name_buf;
> + }
> +
> return "";
> }
>
> @@ -590,5 +640,7 @@ int lock_contention_finish(struct lock_contention *con)
> cgroup__put(cgrp);
> }
>
> + exit_slab_cache_iter();
> +
> return 0;
> }
> diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
> index fd24ccb00faec0ba..b5bc37955560a58e 100644
> --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
> +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
> @@ -123,6 +123,8 @@ struct mm_struct___new {
> struct rw_semaphore mmap_lock;
> } __attribute__((preserve_access_index));
>
> +extern struct kmem_cache *bpf_get_kmem_cache(u64 addr) __ksym __weak;
> +
> /* control flags */
> const volatile int has_cpu;
> const volatile int has_task;
> @@ -496,8 +498,23 @@ int contention_end(u64 *ctx)
> };
> int err;
>
> - if (aggr_mode == LOCK_AGGR_ADDR)
> - first.flags |= check_lock_type(pelem->lock, pelem->flags);
> + if (aggr_mode == LOCK_AGGR_ADDR) {
> + first.flags |= check_lock_type(pelem->lock,
> + pelem->flags & LCB_F_TYPE_MASK);
> +
> + /* Check if it's from a slab object */
> + if (bpf_get_kmem_cache) {
> + struct kmem_cache *s;
> + struct slab_cache_data *d;
> +
> + s = bpf_get_kmem_cache(pelem->lock);
> + if (s != NULL) {
> + d = bpf_map_lookup_elem(&slab_caches, &s);
> + if (d != NULL)
> + first.flags |= d->id;
> + }
> + }
> + }
>
> err = bpf_map_update_elem(&lock_stat, &key, &first, BPF_NOEXIST);
> if (err < 0) {
> --
> 2.47.0.199.ga7371fff76-goog
>
Powered by blists - more mailing lists