[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <29e8f249-a23b-3c17-4000-a4075398b669@fb.com>
Date: Wed, 16 Dec 2020 10:18:16 -0800
From: Yonghong Song <yhs@...com>
To: Song Liu <songliubraving@...com>, <bpf@...r.kernel.org>,
<netdev@...r.kernel.org>
CC: <ast@...nel.org>, <daniel@...earbox.net>, <andrii@...nel.org>,
<john.fastabend@...il.com>, <kpsingh@...omium.org>,
<kernel-team@...com>
Subject: Re: [PATCH v2 bpf-next 4/4] selftests/bpf: add test for
bpf_iter_task_vma
On 12/15/20 3:37 PM, Song Liu wrote:
> The test dumps information similar to /proc/pid/maps. The first line of
> the output is compared against the /proc file to make sure they match.
>
> Signed-off-by: Song Liu <songliubraving@...com>
> ---
> .../selftests/bpf/prog_tests/bpf_iter.c | 106 ++++++++++++++++--
> tools/testing/selftests/bpf/progs/bpf_iter.h | 9 ++
> .../selftests/bpf/progs/bpf_iter_task_vma.c | 55 +++++++++
> 3 files changed, 160 insertions(+), 10 deletions(-)
> create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> index 0e586368948dd..7afd3abae1899 100644
> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> @@ -7,6 +7,7 @@
> #include "bpf_iter_task.skel.h"
> #include "bpf_iter_task_stack.skel.h"
> #include "bpf_iter_task_file.skel.h"
> +#include "bpf_iter_task_vma.skel.h"
> #include "bpf_iter_task_btf.skel.h"
> #include "bpf_iter_tcp4.skel.h"
> #include "bpf_iter_tcp6.skel.h"
> @@ -64,6 +65,22 @@ static void do_dummy_read(struct bpf_program *prog)
> bpf_link__destroy(link);
> }
>
> +static int read_fd_into_buffer(int fd, char *buf, int size)
> +{
> + int bufleft = size;
> + int len;
> +
> + do {
> + len = read(fd, buf, bufleft);
> + if (len > 0) {
> + buf += len;
> + bufleft -= len;
> + }
> + } while (len > 0);
> +
> + return len;
> +}
> +
> static void test_ipv6_route(void)
> {
> struct bpf_iter_ipv6_route *skel;
> @@ -177,7 +194,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
> {
> struct bpf_program *prog = skel->progs.dump_task_struct;
> struct bpf_iter_task_btf__bss *bss = skel->bss;
> - int iter_fd = -1, len = 0, bufleft = TASKBUFSZ;
> + int iter_fd = -1, err;
> struct bpf_link *link;
> char *buf = taskbuf;
> int ret = 0;
> @@ -190,14 +207,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
> if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
> goto free_link;
>
> - do {
> - len = read(iter_fd, buf, bufleft);
> - if (len > 0) {
> - buf += len;
> - bufleft -= len;
> - }
> - } while (len > 0);
> -
> + err = read_fd_into_buffer(iter_fd, buf, TASKBUFSZ);
> if (bss->skip) {
> printf("%s:SKIP:no __builtin_btf_type_id\n", __func__);
> ret = 1;
> @@ -205,7 +215,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
> goto free_link;
> }
>
> - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
> + if (CHECK(err < 0, "read", "read failed: %s\n", strerror(errno)))
> goto free_link;
>
> CHECK(strstr(taskbuf, "(struct task_struct)") == NULL,
> @@ -1133,6 +1143,80 @@ static void test_buf_neg_offset(void)
> bpf_iter_test_kern6__destroy(skel);
> }
>
> +#define CMP_BUFFER_SIZE 1024
> +char task_vma_output[CMP_BUFFER_SIZE];
> +char proc_maps_output[CMP_BUFFER_SIZE];
The above two as 'static' variables to avoid potential conflicts
with other selftests?
> +
> +/* remove \0 and \t from str, and only keep the first line */
> +static void str_strip_first_line(char *str)
> +{
> + char *dst = str, *src = str;
> +
> + do {
> + if (*src == ' ' || *src == '\t')
> + src++;
> + else
> + *(dst++) = *(src++);
> +
> + } while (*src != '\0' && *src != '\n');
> +
> + *dst = '\0';
> +}
> +
> +static void test_task_vma(void)
> +{
> + int err, iter_fd = -1, proc_maps_fd = -1;
> + struct bpf_iter_task_vma *skel;
> + char maps_path[64];
> +
> + skel = bpf_iter_task_vma__open();
> + if (CHECK(!skel, "bpf_iter_task_vma__open", "skeleton open failed\n"))
> + return;
> +
> + skel->bss->pid = getpid();
> +
> + err = bpf_iter_task_vma__load(skel);
> + if (CHECK(err, "bpf_iter_task_vma__load", "skeleton load failed\n"))
> + goto out;
> +
> + do_dummy_read(skel->progs.proc_maps);
This do_dummy_read() is not needed, right?
> +
> + skel->links.proc_maps = bpf_program__attach_iter(
> + skel->progs.proc_maps, NULL);
> +
> + if (CHECK(IS_ERR(skel->links.proc_maps), "bpf_program__attach_iter",
> + "attach iterator failed\n"))
> + goto out;
> +
> + /* read 1kB from bpf_iter */
Maybe 1kB => CMP_BUFFER_SIZE(1kB)?
so future if people change CMP_BUFFER_SIZE value, then can find here
and adjust properly.
> + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.proc_maps));
> + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
> + goto out;
> + err = read_fd_into_buffer(iter_fd, task_vma_output, CMP_BUFFER_SIZE);
> + if (CHECK(err < 0, "read_iter_fd", "read_iter_fd failed\n"))
> + goto out;
> +
> + /* read 1kB from /proc/pid/maps */
> + snprintf(maps_path, 64, "/proc/%u/maps", skel->bss->pid);
> + proc_maps_fd = open(maps_path, O_RDONLY);
> + if (CHECK(proc_maps_fd < 0, "open_proc_maps", "open_proc_maps failed\n"))
> + goto out;
> + err = read_fd_into_buffer(proc_maps_fd, proc_maps_output, CMP_BUFFER_SIZE);
> + if (CHECK(err < 0, "read_prog_maps_fd", "read_prog_maps_fd failed\n"))
> + goto out;
> +
> + /* strip and compare the first line of the two files */
> + str_strip_first_line(task_vma_output);
> + str_strip_first_line(proc_maps_output);
> +
> + CHECK(strcmp(task_vma_output, proc_maps_output), "compare_output",
> + "found mismatch\n");
> +out:
> + close(proc_maps_fd);
> + close(iter_fd);
> + bpf_iter_task_vma__destroy(skel);
> +}
> +
> void test_bpf_iter(void)
> {
> if (test__start_subtest("btf_id_or_null"))
> @@ -1149,6 +1233,8 @@ void test_bpf_iter(void)
> test_task_stack();
> if (test__start_subtest("task_file"))
> test_task_file();
> + if (test__start_subtest("task_vma"))
> + test_task_vma();
> if (test__start_subtest("task_btf"))
> test_task_btf();
> if (test__start_subtest("tcp4"))
> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
> index 6a1255465fd6d..4dab0869c4fcb 100644
> --- a/tools/testing/selftests/bpf/progs/bpf_iter.h
> +++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
> @@ -7,6 +7,7 @@
> #define bpf_iter__netlink bpf_iter__netlink___not_used
> #define bpf_iter__task bpf_iter__task___not_used
> #define bpf_iter__task_file bpf_iter__task_file___not_used
> +#define bpf_iter__task_vma bpf_iter__task_vma___not_used
> #define bpf_iter__tcp bpf_iter__tcp___not_used
> #define tcp6_sock tcp6_sock___not_used
> #define bpf_iter__udp bpf_iter__udp___not_used
> @@ -26,6 +27,7 @@
> #undef bpf_iter__netlink
> #undef bpf_iter__task
> #undef bpf_iter__task_file
> +#undef bpf_iter__task_vma
> #undef bpf_iter__tcp
> #undef tcp6_sock
> #undef bpf_iter__udp
> @@ -67,6 +69,13 @@ struct bpf_iter__task_file {
> struct file *file;
> } __attribute__((preserve_access_index));
>
> +struct bpf_iter__task_vma {
> + struct bpf_iter_meta *meta;
> + struct task_struct *task;
> + struct __vm_area_struct *vma;
> + struct file *file;
> +} __attribute__((preserve_access_index));
Should we also define __vm_area_struct here since it is not available
in old kernels?
> +
> struct bpf_iter__bpf_map {
> struct bpf_iter_meta *meta;
> struct bpf_map *map;
> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
> new file mode 100644
> index 0000000000000..ba87afe01024c
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
> @@ -0,0 +1,55 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2020 Facebook */
> +#include "bpf_iter.h"
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
> +
> +char _license[] SEC("license") = "GPL";
> +
> +/* Copied from mm.h */
> +#define VM_READ 0x00000001
> +#define VM_WRITE 0x00000002
> +#define VM_EXEC 0x00000004
> +#define VM_MAYSHARE 0x00000080
> +
> +/* Copied from kdev_t.h */
> +#define MINORBITS 20
> +#define MINORMASK ((1U << MINORBITS) - 1)
> +#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS))
> +#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
> +
> +#define D_PATH_BUF_SIZE 1024
> +char d_path_buf[D_PATH_BUF_SIZE];
> +__u32 pid;
To please llvm10, maybe
char d_path_buf[D_PATH_BUF_SIZE] = {};
__u32 pid = 0;
> +
> +SEC("iter.s/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx)
> +{
> + struct __vm_area_struct *vma = ctx->vma;
> + struct seq_file *seq = ctx->meta->seq;
> + struct task_struct *task = ctx->task;
> + struct file *file = ctx->file;
> + char perm_str[] = "----";
> +
> + if (task == (void *)0 || vma == (void *)0 || task->pid != pid)
I suppose kernel already filtered all non-group-leader tasks, so here
we can have task->tgid != pid?
> + return 0;
Using /proc system, user typically do cat /proc/pid/maps. How can we
have a similar user experience with vma_iter here? One way to do this
is:
- We still have this bpf program, filtering based on user pid,
- normal bpftool iter pin command pid the program to say
/sys/fs/bpf/task_vma
- since "pid" is in a map, user can use bpftool to update "pid"
with the target pid.
- "cat /sys/fs/bpf/task_vma" will work.
One thing here is pid and d_path_buf are global (map) variables, so
if two users are trying to do "cat /sys/fs/bpf/task_vma" at the same
time, there will be interferences and it will not work.
One possible way is during BPF_ITER_CREATE, we duplicate all program
maps. But this is unnecessary as in most cases, the bpf_iter is not
pinned and private to applications.
Any other ideas?
> +
> + perm_str[0] = (vma->flags & VM_READ) ? 'r' : '-';
> + perm_str[1] = (vma->flags & VM_WRITE) ? 'w' : '-';
> + perm_str[2] = (vma->flags & VM_EXEC) ? 'x' : '-';
> + perm_str[3] = (vma->flags & VM_MAYSHARE) ? 's' : 'p';
> + BPF_SEQ_PRINTF(seq, "%08llx-%08llx %s ", vma->start, vma->end, perm_str);
> +
> + if (file) {
> + __u32 dev = file->f_inode->i_sb->s_dev;
> +
> + bpf_d_path(&file->f_path, d_path_buf, D_PATH_BUF_SIZE);
> +
> + BPF_SEQ_PRINTF(seq, "%08llx ", vma->pgoff << 12);
> + BPF_SEQ_PRINTF(seq, "%02x:%02x %u", MAJOR(dev), MINOR(dev),
> + file->f_inode->i_ino);
> + BPF_SEQ_PRINTF(seq, "\t%s\n", d_path_buf);
> + } else {
> + BPF_SEQ_PRINTF(seq, "%08llx 00:00 0\n", 0ULL);
> + }
> + return 0;
> +}
>
Powered by blists - more mailing lists