lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAB=+i9Sm4UEhGy-jzsZEs1Q6KQCVdbnu_eAiRzXz=sRC-3H6Uw@mail.gmail.com>
Date: Sun, 29 Sep 2024 23:27:25 +0900
From: Hyeonggon Yoo <42.hyeyoo@...il.com>
To: Namhyung Kim <namhyung@...nel.org>
Cc: Alexei Starovoitov <ast@...nel.org>, Daniel Borkmann <daniel@...earbox.net>, 
	Andrii Nakryiko <andrii@...nel.org>, Martin KaFai Lau <martin.lau@...ux.dev>, 
	Eduard Zingerman <eddyz87@...il.com>, Song Liu <song@...nel.org>, 
	Yonghong Song <yonghong.song@...ux.dev>, John Fastabend <john.fastabend@...il.com>, 
	KP Singh <kpsingh@...nel.org>, Stanislav Fomichev <sdf@...ichev.me>, Hao Luo <haoluo@...gle.com>, 
	Jiri Olsa <jolsa@...nel.org>, LKML <linux-kernel@...r.kernel.org>, bpf@...r.kernel.org, 
	Andrew Morton <akpm@...ux-foundation.org>, Christoph Lameter <cl@...ux.com>, 
	Pekka Enberg <penberg@...nel.org>, David Rientjes <rientjes@...gle.com>, 
	Joonsoo Kim <iamjoonsoo.kim@....com>, Vlastimil Babka <vbabka@...e.cz>, 
	Roman Gushchin <roman.gushchin@...ux.dev>, linux-mm@...ck.org, 
	Arnaldo Carvalho de Melo <acme@...nel.org>
Subject: Re: [RFC/PATCH bpf-next 3/3] selftests/bpf: Add a test for kmem_cache_iter

On Sun, Sep 29, 2024 at 3:13 PM Namhyung Kim <namhyung@...nel.org> wrote:
>
> On Fri, Sep 27, 2024 at 11:41:33AM -0700, Namhyung Kim wrote:
> > The test traverses all slab caches using the kmem_cache_iter and check
> > if current task's pointer is from "task_struct" slab cache.
> >
> > Signed-off-by: Namhyung Kim <namhyung@...nel.org>
> > ---
> >  .../bpf/prog_tests/kmem_cache_iter.c          | 64 ++++++++++++++++++
> >  tools/testing/selftests/bpf/progs/bpf_iter.h  |  7 ++
> >  .../selftests/bpf/progs/kmem_cache_iter.c     | 66 +++++++++++++++++++
> >  3 files changed, 137 insertions(+)
> >  create mode 100644 tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/kmem_cache_iter.c
> >
> > diff --git a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
> > new file mode 100644
> > index 0000000000000000..814bcc453e9f3ccd
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
> > @@ -0,0 +1,64 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/* Copyright (c) 2024 Google */
> > +
> > +#include <test_progs.h>
> > +#include <bpf/libbpf.h>
> > +#include <bpf/btf.h>
> > +#include "kmem_cache_iter.skel.h"
> > +
> > +static void test_kmem_cache_iter_check_task(struct kmem_cache_iter *skel)
> > +{
> > +     LIBBPF_OPTS(bpf_test_run_opts, opts,
> > +             .flags = BPF_F_TEST_RUN_ON_CPU,
> > +     );
> > +     int prog_fd = bpf_program__fd(skel->progs.check_task_struct);
> > +
> > +     /* get task_struct and check it if's from a slab cache */
> > +     bpf_prog_test_run_opts(prog_fd, &opts);
> > +
> > +     /* the BPF program should set 'found' variable */
> > +     ASSERT_EQ(skel->bss->found, 1, "found task_struct");
>
> Hmm.. I'm seeing a failure with found being -1, which means ...
>
> > +}
> > +
> > +void test_kmem_cache_iter(void)
> > +{
> > +     DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
> > +     struct kmem_cache_iter *skel = NULL;
> > +     union bpf_iter_link_info linfo = {};
> > +     struct bpf_link *link;
> > +     char buf[1024];
> > +     int iter_fd;
> > +
> > +     skel = kmem_cache_iter__open_and_load();
> > +     if (!ASSERT_OK_PTR(skel, "kmem_cache_iter__open_and_load"))
> > +             return;
> > +
> > +     opts.link_info = &linfo;
> > +     opts.link_info_len = sizeof(linfo);
> > +
> > +     link = bpf_program__attach_iter(skel->progs.slab_info_collector, &opts);
> > +     if (!ASSERT_OK_PTR(link, "attach_iter"))
> > +             goto destroy;
> > +
> > +     iter_fd = bpf_iter_create(bpf_link__fd(link));
> > +     if (!ASSERT_GE(iter_fd, 0, "iter_create"))
> > +             goto free_link;
> > +
> > +     memset(buf, 0, sizeof(buf));
> > +     while (read(iter_fd, buf, sizeof(buf) > 0)) {
> > +             /* read out all contents */
> > +             printf("%s", buf);
> > +     }
> > +
> > +     /* next reads should return 0 */
> > +     ASSERT_EQ(read(iter_fd, buf, sizeof(buf)), 0, "read");
> > +
> > +     test_kmem_cache_iter_check_task(skel);
> > +
> > +     close(iter_fd);
> > +
> > +free_link:
> > +     bpf_link__destroy(link);
> > +destroy:
> > +     kmem_cache_iter__destroy(skel);
> > +}
> > diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
> > index c41ee80533ca219a..3305dc3a74b32481 100644
> > --- a/tools/testing/selftests/bpf/progs/bpf_iter.h
> > +++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
> > @@ -24,6 +24,7 @@
> >  #define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used
> >  #define BTF_F_ZERO BTF_F_ZERO___not_used
> >  #define bpf_iter__ksym bpf_iter__ksym___not_used
> > +#define bpf_iter__kmem_cache bpf_iter__kmem_cache___not_used
> >  #include "vmlinux.h"
> >  #undef bpf_iter_meta
> >  #undef bpf_iter__bpf_map
> > @@ -48,6 +49,7 @@
> >  #undef BTF_F_PTR_RAW
> >  #undef BTF_F_ZERO
> >  #undef bpf_iter__ksym
> > +#undef bpf_iter__kmem_cache
> >
> >  struct bpf_iter_meta {
> >       struct seq_file *seq;
> > @@ -165,3 +167,8 @@ struct bpf_iter__ksym {
> >       struct bpf_iter_meta *meta;
> >       struct kallsym_iter *ksym;
> >  };
> > +
> > +struct bpf_iter__kmem_cache {
> > +     struct bpf_iter_meta *meta;
> > +     struct kmem_cache *s;
> > +} __attribute__((preserve_access_index));
> > diff --git a/tools/testing/selftests/bpf/progs/kmem_cache_iter.c b/tools/testing/selftests/bpf/progs/kmem_cache_iter.c
> > new file mode 100644
> > index 0000000000000000..3f6ec15a1bf6344c
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/progs/kmem_cache_iter.c
> > @@ -0,0 +1,66 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/* Copyright (c) 2024 Google */
> > +
> > +#include "bpf_iter.h"
> > +#include <bpf/bpf_helpers.h>
> > +#include <bpf/bpf_tracing.h>
> > +
> > +char _license[] SEC("license") = "GPL";
> > +
> > +#define SLAB_NAME_MAX  256
> > +
> > +struct {
> > +     __uint(type, BPF_MAP_TYPE_HASH);
> > +     __uint(key_size, sizeof(void *));
> > +     __uint(value_size, SLAB_NAME_MAX);
> > +     __uint(max_entries, 1024);
> > +} slab_hash SEC(".maps");
> > +
> > +extern struct kmem_cache *bpf_get_kmem_cache(__u64 addr) __ksym;
> > +
> > +/* result, will be checked by userspace */
> > +int found;
> > +
> > +SEC("iter/kmem_cache")
> > +int slab_info_collector(struct bpf_iter__kmem_cache *ctx)
> > +{
> > +     struct seq_file *seq = ctx->meta->seq;
> > +     struct kmem_cache *s = ctx->s;
> > +
> > +     if (s) {
> > +             char name[SLAB_NAME_MAX];
> > +
> > +             /*
> > +              * To make sure if the slab_iter implements the seq interface
> > +              * properly and it's also useful for debugging.
> > +              */
> > +             BPF_SEQ_PRINTF(seq, "%s: %u\n", s->name, s->object_size);
> > +
> > +             bpf_probe_read_kernel_str(name, sizeof(name), s->name);
> > +             bpf_map_update_elem(&slab_hash, &s, name, BPF_NOEXIST);
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> > +SEC("raw_tp/bpf_test_finish")
> > +int BPF_PROG(check_task_struct)
> > +{
> > +     __u64 curr = bpf_get_current_task();
> > +     struct kmem_cache *s;
> > +     char *name;
> > +
> > +     s = bpf_get_kmem_cache(curr);
> > +     if (s == NULL) {
> > +             found = -1;
> > +             return 0;
>
> ... it cannot find a kmem_cache for the current task.  This program is
> run by bpf_prog_test_run_opts() with BPF_F_TEST_RUN_ON_CPU.  So I think
> the curr should point a task_struct in a slab cache.
>
> Am I missing something?

Hi Namhyung,

Out of curiosity I've been investigating this issue on my machine and
running some experiments.

When the test fails, calling dump_page() for the page the task_struct
belongs to,
shows that the page does not have the PGTY_slab flag set which is why
virt_to_slab(current) returns NULL.

Does the test always fails on your environment? On my machine, the
test passed sometimes but failed some times.

Maybe sometimes the value returned by 'current' macro belongs to a
slab, but sometimes it does not.
But that doesn't really make sense to me as IIUC task_struct
descriptors are allocated from slab.

....Or maybe some code can overwrote the page_type field of a slab?
Hmm, it seems we need more information to identify what's gone wrong.

Just FYI, adding the output of the following code snippet in
bpf_get_kmem_cache():

pr_info("current = %llx\n", (unsigned long long)current);
dump_page(virt_to_head_page(current), "virt_to_head_page()");

# When the test passes
[  232.755028] current = ffff8ff5b9ebd200
[  232.755031] page: refcount:1 mapcount:0 mapping:0000000000000000
index:0x0 pfn:0x139eb8
[  232.755033] head: order:3 mapcount:0 entire_mapcount:0
nr_pages_mapped:0 pincount:0
[  232.755035] memcg:ffff8ff5b3ee0c01
[  232.755037] ksm flags:
0x17ffffc0000040(head|node=0|zone=2|lastcpupid=0x1fffff)
[  232.755040] page_type: f5(slab)
[  232.755042] raw: 0017ffffc0000040 ffff8ff58028ab00 ffffdaba05b8fc00
dead000000000003
[  232.755045] raw: 0000000000000000 0000000000030003 00000001f5000000
ffff8ff5b3ee0c01
[  232.755047] head: 0017ffffc0000040 ffff8ff58028ab00
ffffdaba05b8fc00 dead000000000003
[  232.755048] head: 0000000000000000 0000000000030003
00000001f5000000 ffff8ff5b3ee0c01
[  232.755050] head: 0017ffffc0000003 ffffdaba04e7ae01
ffffffffffffffff 0000000000000000
[  232.755052] head: 0000000000000008 0000000000000000
00000000ffffffff 0000000000000000
[  232.755053] page dumped because: virt_to_head_page()

# When the test fails
[  130.811626] current = ffffffff884110c0
[  130.811628] page: refcount:1 mapcount:0 mapping:0000000000000000
index:0x0 pfn:0x8a9411
[  130.811632] flags:
0x17ffffc0002000(reserved|node=0|zone=2|lastcpupid=0x1fffff)
[  130.811636] raw: 0017ffffc0002000 ffffdaba22a50448 ffffdaba22a50448
0000000000000000
[  130.811639] raw: 0000000000000000 0000000000000000 00000001ffffffff
0000000000000000
[  130.811641] page dumped because: virt_to_head_page()

Best,
Hyeonggon

>
> Thanks,
> Namhyung
>
> > +     }
> > +
> > +     name = bpf_map_lookup_elem(&slab_hash, &s);
> > +     if (name && !bpf_strncmp(name, 11, "task_struct"))
> > +             found = 1;
> > +     else
> > +             found = -2;
> > +
> > +     return 0;
> > +}
> > --
> > 2.46.1.824.gd892dcdcdd-goog
> >

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ