lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAP-5=fV3JAJD_beASNdP_Sxh=oAFv-yh_cqW=P=Bc9FabddUpQ@mail.gmail.com>
Date: Tue, 5 Nov 2024 09:41:05 -0800
From: Ian Rogers <irogers@...gle.com>
To: Namhyung Kim <namhyung@...nel.org>
Cc: Arnaldo Carvalho de Melo <acme@...nel.org>, Kan Liang <kan.liang@...ux.intel.com>, 
	Jiri Olsa <jolsa@...nel.org>, Adrian Hunter <adrian.hunter@...el.com>, 
	Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...nel.org>, 
	LKML <linux-kernel@...r.kernel.org>, linux-perf-users@...r.kernel.org, 
	Song Liu <song@...nel.org>, bpf@...r.kernel.org, 
	Stephane Eranian <eranian@...gle.com>, Vlastimil Babka <vbabka@...e.cz>, Kees Cook <kees@...nel.org>, 
	Roman Gushchin <roman.gushchin@...ux.dev>, Hyeonggon Yoo <42.hyeyoo@...il.com>
Subject: Re: [PATCH 3/4] perf lock contention: Resolve slab object name using BPF

On Tue, Nov 5, 2024 at 9:26 AM Namhyung Kim <namhyung@...nel.org> wrote:
>
> The bpf_get_kmem_cache() kfunc can return an address of the slab cache
> (kmem_cache).  As it has the name of the slab cache from the iterator,
> we can use it to symbolize some dynamic kernel locks in a slab.
>
> Before:
>   root@...tme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1
>    contended   total wait     max wait     avg wait            address   symbol
>
>            2      3.34 us      2.87 us      1.67 us   ffff9d7800ad9600    (mutex)
>            2      2.16 us      1.93 us      1.08 us   ffff9d7804b992d8    (mutex)
>            4      1.37 us       517 ns       343 ns   ffff9d78036e6e00    (mutex)
>            1      1.27 us      1.27 us      1.27 us   ffff9d7804b99378    (mutex)
>            2       845 ns       599 ns       422 ns   ffffffff9e1c3620   delayed_uprobe_lock (mutex)
>            1       845 ns       845 ns       845 ns   ffffffff9da0b280   jiffies_lock (spinlock)
>            2       377 ns       259 ns       188 ns   ffffffff9e1cf840   pcpu_alloc_mutex (mutex)
>            1       305 ns       305 ns       305 ns   ffffffff9e1b4cf8   tracepoint_srcu_srcu_usage (mutex)
>            1       295 ns       295 ns       295 ns   ffffffff9e1c0940   pack_mutex (mutex)
>            1       232 ns       232 ns       232 ns   ffff9d7804b7d8d8    (mutex)
>            1       180 ns       180 ns       180 ns   ffffffff9e1b4c28   tracepoint_srcu_srcu_usage (mutex)
>            1       165 ns       165 ns       165 ns   ffffffff9da8b3a0   text_mutex (mutex)
>
> After:
>   root@...tme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1
>    contended   total wait     max wait     avg wait            address   symbol
>
>            2      1.95 us      1.77 us       975 ns   ffff9d5e852d3498   &task_struct (mutex)
>            1      1.18 us      1.18 us      1.18 us   ffff9d5e852d3538   &task_struct (mutex)
>            4      1.12 us       354 ns       279 ns   ffff9d5e841ca800   &kmalloc-cg-512 (mutex)
>            2       859 ns       617 ns       429 ns   ffffffffa41c3620   delayed_uprobe_lock (mutex)
>            3       691 ns       388 ns       230 ns   ffffffffa41c0940   pack_mutex (mutex)
>            3       421 ns       164 ns       140 ns   ffffffffa3a8b3a0   text_mutex (mutex)
>            1       409 ns       409 ns       409 ns   ffffffffa41b4cf8   tracepoint_srcu_srcu_usage (mutex)
>            2       362 ns       239 ns       181 ns   ffffffffa41cf840   pcpu_alloc_mutex (mutex)
>            1       220 ns       220 ns       220 ns   ffff9d5e82b534d8   &signal_cache (mutex)
>            1       215 ns       215 ns       215 ns   ffffffffa41b4c28   tracepoint_srcu_srcu_usage (mutex)
>
> Note that the name starts with '&' sign for slab objects to inform they
> are dynamic locks.  It won't give the accurate lock or type names but
> it's still useful.  We may add type info to the slab cache later to get
> the exact name of the lock in the type later.

Many variables may reference a lock through a pointer, should the name
not be associated with the lock or from decoding the task_struct?
The '&' looks redundant as the addresses are clearly different.
How are >1 lock/mutex in the same struct handled?

Thanks,
Ian

> Signed-off-by: Namhyung Kim <namhyung@...nel.org>
> ---
>  tools/perf/util/bpf_lock_contention.c         | 52 +++++++++++++++++++
>  .../perf/util/bpf_skel/lock_contention.bpf.c  | 21 +++++++-
>  2 files changed, 71 insertions(+), 2 deletions(-)
>
> diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
> index a2efd40897bad316..50c3039c647d4d77 100644
> --- a/tools/perf/util/bpf_lock_contention.c
> +++ b/tools/perf/util/bpf_lock_contention.c
> @@ -2,6 +2,7 @@
>  #include "util/cgroup.h"
>  #include "util/debug.h"
>  #include "util/evlist.h"
> +#include "util/hashmap.h"
>  #include "util/machine.h"
>  #include "util/map.h"
>  #include "util/symbol.h"
> @@ -20,12 +21,25 @@
>
>  static struct lock_contention_bpf *skel;
>  static bool has_slab_iter;
> +static struct hashmap slab_hash;
> +
> +static size_t slab_cache_hash(long key, void *ctx __maybe_unused)
> +{
> +       return key;
> +}
> +
> +static bool slab_cache_equal(long key1, long key2, void *ctx __maybe_unused)
> +{
> +       return key1 == key2;
> +}
>
>  static void check_slab_cache_iter(struct lock_contention *con)
>  {
>         struct btf *btf = btf__load_vmlinux_btf();
>         s32 ret;
>
> +       hashmap__init(&slab_hash, slab_cache_hash, slab_cache_equal, /*ctx=*/NULL);
> +
>         ret = libbpf_get_error(btf);
>         if (ret) {
>                 pr_debug("BTF loading failed: %d\n", ret);
> @@ -50,6 +64,7 @@ static void run_slab_cache_iter(void)
>  {
>         int fd;
>         char buf[256];
> +       long key, *prev_key;
>
>         if (!has_slab_iter)
>                 return;
> @@ -65,6 +80,34 @@ static void run_slab_cache_iter(void)
>                 continue;
>
>         close(fd);
> +
> +       /* Read the slab cache map and build a hash with IDs */
> +       fd = bpf_map__fd(skel->maps.slab_caches);
> +       prev_key = NULL;
> +       while (!bpf_map_get_next_key(fd, prev_key, &key)) {
> +               struct slab_cache_data *data;
> +
> +               data = malloc(sizeof(*data));
> +               if (data == NULL)
> +                       break;
> +
> +               if (bpf_map_lookup_elem(fd, &key, data) < 0)
> +                       break;
> +
> +               hashmap__add(&slab_hash, data->id, data);
> +               prev_key = &key;
> +       }
> +}
> +
> +static void exit_slab_cache_iter(void)
> +{
> +       struct hashmap_entry *cur;
> +       unsigned bkt;
> +
> +       hashmap__for_each_entry(&slab_hash, cur, bkt)
> +               free(cur->pvalue);
> +
> +       hashmap__clear(&slab_hash);
>  }
>
>  int lock_contention_prepare(struct lock_contention *con)
> @@ -398,6 +441,7 @@ static const char *lock_contention_get_name(struct lock_contention *con,
>
>         if (con->aggr_mode == LOCK_AGGR_ADDR) {
>                 int lock_fd = bpf_map__fd(skel->maps.lock_syms);
> +               struct slab_cache_data *slab_data;
>
>                 /* per-process locks set upper bits of the flags */
>                 if (flags & LCD_F_MMAP_LOCK)
> @@ -416,6 +460,12 @@ static const char *lock_contention_get_name(struct lock_contention *con,
>                                 return "rq_lock";
>                 }
>
> +               /* look slab_hash for dynamic locks in a slab object */
> +               if (hashmap__find(&slab_hash, flags & LCB_F_SLAB_ID_MASK, &slab_data)) {
> +                       snprintf(name_buf, sizeof(name_buf), "&%s", slab_data->name);
> +                       return name_buf;
> +               }
> +
>                 return "";
>         }
>
> @@ -590,5 +640,7 @@ int lock_contention_finish(struct lock_contention *con)
>                 cgroup__put(cgrp);
>         }
>
> +       exit_slab_cache_iter();
> +
>         return 0;
>  }
> diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
> index fd24ccb00faec0ba..b5bc37955560a58e 100644
> --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
> +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
> @@ -123,6 +123,8 @@ struct mm_struct___new {
>         struct rw_semaphore mmap_lock;
>  } __attribute__((preserve_access_index));
>
> +extern struct kmem_cache *bpf_get_kmem_cache(u64 addr) __ksym __weak;
> +
>  /* control flags */
>  const volatile int has_cpu;
>  const volatile int has_task;
> @@ -496,8 +498,23 @@ int contention_end(u64 *ctx)
>                 };
>                 int err;
>
> -               if (aggr_mode == LOCK_AGGR_ADDR)
> -                       first.flags |= check_lock_type(pelem->lock, pelem->flags);
> +               if (aggr_mode == LOCK_AGGR_ADDR) {
> +                       first.flags |= check_lock_type(pelem->lock,
> +                                                      pelem->flags & LCB_F_TYPE_MASK);
> +
> +                       /* Check if it's from a slab object */
> +                       if (bpf_get_kmem_cache) {
> +                               struct kmem_cache *s;
> +                               struct slab_cache_data *d;
> +
> +                               s = bpf_get_kmem_cache(pelem->lock);
> +                               if (s != NULL) {
> +                                       d = bpf_map_lookup_elem(&slab_caches, &s);
> +                                       if (d != NULL)
> +                                               first.flags |= d->id;
> +                               }
> +                       }
> +               }
>
>                 err = bpf_map_update_elem(&lock_stat, &key, &first, BPF_NOEXIST);
>                 if (err < 0) {
> --
> 2.47.0.199.ga7371fff76-goog
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ