[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAOQ4uxjDudLwKuxXaFihdYJUv2yvwkETouP9zJtJ6bRSpmV-Kw@mail.gmail.com>
Date: Thu, 7 Nov 2024 12:10:50 +0100
From: Amir Goldstein <amir73il@...il.com>
To: Song Liu <song@...nel.org>
Cc: bpf@...r.kernel.org, linux-fsdevel@...r.kernel.org,
linux-kernel@...r.kernel.org, kernel-team@...a.com, andrii@...nel.org,
eddyz87@...il.com, ast@...nel.org, daniel@...earbox.net, martin.lau@...ux.dev,
viro@...iv.linux.org.uk, brauner@...nel.org, jack@...e.cz, kpsingh@...nel.org,
mattbobrowski@...gle.com, repnop@...gle.com, jlayton@...nel.org,
josef@...icpanda.com
Subject: Re: [RFC bpf-next fanotify 5/5] selftests/bpf: Add test for BPF based
fanotify fastpath handler
On Wed, Oct 30, 2024 at 12:13 AM Song Liu <song@...nel.org> wrote:
>
> This test shows a simplified logic that monitors a subtree. This is
> simplified as it doesn't handle all the scenarios, such as:
>
> 1) moving a subsubtree into/outof the being monitoring subtree;
There is a solution for that (see below)
> 2) mount point inside the being monitored subtree
For that we will need to add the MOUNT/UNMOUNT/MOVE_MOUNT events,
but those have been requested by userspace anyway.
>
> Therefore, this is not to show a way to reliably monitor a subtree.
> Instead, this is to test the functionalities of bpf based fastpath.
> To really monitor a subtree reliably, we will need more complex logic.
Actually, this example is the foundation of my vision for efficient and race
free subtree filtering:
1. The inode map is to be treated as a cache for the is_subdir() query
2. Cache entries can also have a "distance from root" (i.e. depth) value
3. Each unknown queried path can call is_subdir() and populate the cache
entries for all ancestors
4. The cache/map size should be limited and when limit is reached,
evicting entries by depth priority makes sense
5. A rename event for a directory whose inode is in the map and whose
new parent is not in the map or has a different value than old parent
needs to invalidate the entire map
6. fast_path also needs a hook from inode evict to clear cache entries
This is similar, but more efficient and race free than what could already
be achieved in userspace using FAN_MARK_EVICTABLE.
Food for thought.
Thanks,
Amir.
>
> Overview of the logic:
> 1. fanotify is created for the whole file system (/tmp);
> 2. A bpf map (inode_storage_map) is used to tag directories to
> monitor (starting from /tmp/fanotify_test);
> 3. On fsnotify_mkdir, thee tag is propagated to newly created sub
> directories (/tmp/fanotify_test/subdir);
> 4. The bpf fastpath checks whether the fanotify event happens in a
> directory with the tag. If yes, the event is sent to user space;
> otherwise, the event is dropped.
>
> Signed-off-by: Song Liu <song@...nel.org>
> ---
> tools/testing/selftests/bpf/bpf_kfuncs.h | 4 +
> tools/testing/selftests/bpf/config | 1 +
> .../testing/selftests/bpf/prog_tests/fan_fp.c | 245 ++++++++++++++++++
> tools/testing/selftests/bpf/progs/fan_fp.c | 77 ++++++
> 4 files changed, 327 insertions(+)
> create mode 100644 tools/testing/selftests/bpf/prog_tests/fan_fp.c
> create mode 100644 tools/testing/selftests/bpf/progs/fan_fp.c
>
> diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
> index 2eb3483f2fb0..44dcf4991244 100644
> --- a/tools/testing/selftests/bpf/bpf_kfuncs.h
> +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
> @@ -87,4 +87,8 @@ struct dentry;
> */
> extern int bpf_get_dentry_xattr(struct dentry *dentry, const char *name,
> struct bpf_dynptr *value_ptr) __ksym __weak;
> +
> +struct fanotify_fastpath_event;
> +extern struct inode *bpf_fanotify_data_inode(struct fanotify_fastpath_event *event) __ksym __weak;
> +extern void bpf_iput(struct inode *inode) __ksym __weak;
> #endif
> diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
> index 4ca84c8d9116..392cbcad8a8b 100644
> --- a/tools/testing/selftests/bpf/config
> +++ b/tools/testing/selftests/bpf/config
> @@ -24,6 +24,7 @@ CONFIG_DEBUG_INFO_BTF=y
> CONFIG_DEBUG_INFO_DWARF4=y
> CONFIG_DUMMY=y
> CONFIG_DYNAMIC_FTRACE=y
> +CONFIG_FANOTIFY=y
> CONFIG_FPROBE=y
> CONFIG_FTRACE_SYSCALLS=y
> CONFIG_FUNCTION_ERROR_INJECTION=y
> diff --git a/tools/testing/selftests/bpf/prog_tests/fan_fp.c b/tools/testing/selftests/bpf/prog_tests/fan_fp.c
> new file mode 100644
> index 000000000000..ea57ed366647
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/fan_fp.c
> @@ -0,0 +1,245 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
> +
> +#define _GNU_SOURCE
> +#include <err.h>
> +#include <fcntl.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <sys/fanotify.h>
> +#include <unistd.h>
> +#include <sys/ioctl.h>
> +#include <sys/stat.h>
> +
> +#include <test_progs.h>
> +
> +#include "fan_fp.skel.h"
> +
> +#define TEST_FS "/tmp/"
> +#define TEST_DIR "/tmp/fanotify_test/"
> +
> +static int create_test_subtree(void)
> +{
> + int err;
> +
> + err = mkdir(TEST_DIR, 0777);
> + if (err && errno != EEXIST)
> + return err;
> +
> + return open(TEST_DIR, O_RDONLY);
> +}
> +
> +static int create_fanotify_fd(void)
> +{
> + int fanotify_fd, err;
> +
> + fanotify_fd = fanotify_init(FAN_CLASS_NOTIF | FAN_REPORT_NAME | FAN_REPORT_DIR_FID,
> + O_RDONLY);
> +
> + if (!ASSERT_OK_FD(fanotify_fd, "fanotify_init"))
> + return -1;
> +
> + err = fanotify_mark(fanotify_fd, FAN_MARK_ADD | FAN_MARK_FILESYSTEM,
> + FAN_CREATE | FAN_OPEN | FAN_ONDIR | FAN_EVENT_ON_CHILD,
> + AT_FDCWD, TEST_FS);
> + if (!ASSERT_OK(err, "fanotify_mark")) {
> + close(fanotify_fd);
> + return -1;
> + }
> +
> + return fanotify_fd;
> +}
> +
> +static int attach_global_fastpath(int fanotify_fd)
> +{
> + struct fanotify_fastpath_args args = {
> + .name = "_tmp_test_sub_tree",
> + .version = 1,
> + .flags = 0,
> + };
> +
> + if (ioctl(fanotify_fd, FAN_IOC_ADD_FP, &args))
> + return -1;
> +
> + return 0;
> +}
> +
> +#define EVENT_BUFFER_SIZE 4096
> +struct file_access_result {
> + char name_prefix[16];
> + bool accessed;
> +} access_results[3] = {
> + {"aa", false},
> + {"bb", false},
> + {"cc", false},
> +};
> +
> +static void update_access_results(char *name)
> +{
> + int i;
> +
> + for (i = 0; i < 3; i++) {
> + if (strstr(name, access_results[i].name_prefix))
> + access_results[i].accessed = true;
> + }
> +}
> +
> +static void parse_event(char *buffer, int len)
> +{
> + struct fanotify_event_metadata *event =
> + (struct fanotify_event_metadata *) buffer;
> + struct fanotify_event_info_header *info;
> + struct fanotify_event_info_fid *fid;
> + struct file_handle *handle;
> + char *name;
> + int off;
> +
> + for (; FAN_EVENT_OK(event, len); event = FAN_EVENT_NEXT(event, len)) {
> + for (off = sizeof(*event) ; off < event->event_len;
> + off += info->len) {
> + info = (struct fanotify_event_info_header *)
> + ((char *) event + off);
> + switch (info->info_type) {
> + case FAN_EVENT_INFO_TYPE_DFID_NAME:
> + fid = (struct fanotify_event_info_fid *) info;
> + handle = (struct file_handle *)&fid->handle;
> + name = (char *)handle + sizeof(*handle) + handle->handle_bytes;
> + update_access_results(name);
> + break;
> + default:
> + break;
> + }
> + }
> + }
> +}
> +
> +static void touch_file(const char *path)
> +{
> + int fd;
> +
> + fd = open(path, O_WRONLY|O_CREAT|O_NOCTTY|O_NONBLOCK, 0666);
> + if (!ASSERT_OK_FD(fd, "open"))
> + goto cleanup;
> + close(fd);
> +cleanup:
> + unlink(path);
> +}
> +
> +static void generate_and_test_event(int fanotify_fd)
> +{
> + char buffer[EVENT_BUFFER_SIZE];
> + int len, err;
> +
> + /* access /tmp/fanotify_test/aa, this will generate event */
> + touch_file(TEST_DIR "aa");
> +
> + /* create /tmp/fanotify_test/subdir, this will get tag from the
> + * parent directory (added in the bpf program on fsnotify_mkdir)
> + */
> + err = mkdir(TEST_DIR "subdir", 0777);
> + ASSERT_OK(err, "mkdir");
> +
> + /* access /tmp/fanotify_test/subdir/bb, this will generate event */
> + touch_file(TEST_DIR "subdir/bb");
> +
> + /* access /tmp/cc, this will NOT generate event, as the BPF
> + * fastpath filtered this event out. (Because /tmp doesn't have
> + * the tag.)
> + */
> + touch_file(TEST_FS "cc");
> +
> + /* read and parse the events */
> + len = read(fanotify_fd, buffer, EVENT_BUFFER_SIZE);
> + if (!ASSERT_GE(len, 0, "read event"))
> + goto cleanup;
> + parse_event(buffer, len);
> +
> + /* verify we generated events for aa and bb, but filtered out the
> + * event for cc.
> + */
> + ASSERT_TRUE(access_results[0].accessed, "access aa");
> + ASSERT_TRUE(access_results[1].accessed, "access bb");
> + ASSERT_FALSE(access_results[2].accessed, "access cc");
> +
> +cleanup:
> + rmdir(TEST_DIR "subdir");
> + rmdir(TEST_DIR);
> +}
> +
> +/* This test shows a simplified logic that monitors a subtree. This is
> + * simplified as it doesn't handle all the scenarios, such as:
> + *
> + * 1) moving a subsubtree into/outof the being monitoring subtree;
> + * 2) mount point inside the being monitored subtree
> + *
> + * Therefore, this is not to show a way to reliably monitor a subtree.
> + * Instead, this is to test the functionalities of bpf based fastpath.
> + *
> + * Overview of the logic:
> + * 1. fanotify is created for the whole file system (/tmp);
> + * 2. A bpf map (inode_storage_map) is used to tag directories to
> + * monitor (starting from /tmp/fanotify_test);
> + * 3. On fsnotify_mkdir, thee tag is propagated to newly created sub
> + * directories (/tmp/fanotify_test/subdir);
> + * 4. The bpf fastpath checks whether the event happens in a directory
> + * with the tag. If yes, the event is sent to user space; otherwise,
> + * the event is dropped.
> + */
> +static void test_monitor_subtree(void)
> +{
> + struct bpf_link *link;
> + struct fan_fp *skel;
> + int test_root_fd;
> + __u32 one = 1;
> + int err, fanotify_fd;
> +
> + test_root_fd = create_test_subtree();
> +
> + if (!ASSERT_OK_FD(test_root_fd, "create_test_subtree"))
> + return;
> +
> + skel = fan_fp__open_and_load();
> +
> + if (!ASSERT_OK_PTR(skel, "fan_fp__open_and_load"))
> + goto close_test_root_fd;
> +
> + /* Add tag to /tmp/fanotify_test/ */
> + err = bpf_map_update_elem(bpf_map__fd(skel->maps.inode_storage_map),
> + &test_root_fd, &one, BPF_ANY);
> + if (!ASSERT_OK(err, "bpf_map_update_elem"))
> + goto destroy_skel;
> + link = bpf_map__attach_struct_ops(skel->maps.bpf_fanotify_fastpath_ops);
> + if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops"))
> + goto destroy_skel;
> +
> +
> + fanotify_fd = create_fanotify_fd();
> + if (!ASSERT_OK_FD(fanotify_fd, "create_fanotify_fd"))
> + goto destroy_link;
> +
> + err = attach_global_fastpath(fanotify_fd);
> + if (!ASSERT_OK(err, "attach_global_fastpath"))
> + goto close_fanotify_fd;
> +
> + generate_and_test_event(fanotify_fd);
> +
> + ASSERT_EQ(skel->bss->added_inode_storage, 1, "added_inode_storage");
> +
> +close_fanotify_fd:
> + close(fanotify_fd);
> +
> +destroy_link:
> + bpf_link__destroy(link);
> +destroy_skel:
> + fan_fp__destroy(skel);
> +
> +close_test_root_fd:
> + close(test_root_fd);
> + rmdir(TEST_DIR);
> +}
> +
> +void test_bpf_fanotify_fastpath(void)
> +{
> + if (test__start_subtest("subtree"))
> + test_monitor_subtree();
> +}
> diff --git a/tools/testing/selftests/bpf/progs/fan_fp.c b/tools/testing/selftests/bpf/progs/fan_fp.c
> new file mode 100644
> index 000000000000..ee86dc189e38
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/fan_fp.c
> @@ -0,0 +1,77 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2024 Facebook */
> +
> +#include "vmlinux.h"
> +#include <bpf/bpf_tracing.h>
> +#include <bpf/bpf_core_read.h>
> +
> +#define FS_CREATE 0x00000100 /* Subfile was created */
> +#define FS_ISDIR 0x40000000 /* event occurred against dir */
> +
> +struct {
> + __uint(type, BPF_MAP_TYPE_INODE_STORAGE);
> + __uint(map_flags, BPF_F_NO_PREALLOC);
> + __type(key, int);
> + __type(value, __u32);
> +} inode_storage_map SEC(".maps");
> +
> +int added_inode_storage;
> +
> +SEC("struct_ops")
> +int BPF_PROG(bpf_fp_handler,
> + struct fsnotify_group *group,
> + struct fanotify_fastpath_hook *fp_hook,
> + struct fanotify_fastpath_event *fp_event)
> +{
> + struct inode *dir;
> + __u32 *value;
> +
> + dir = fp_event->dir;
> +
> + value = bpf_inode_storage_get(&inode_storage_map, dir, 0, 0);
> +
> + /* if dir doesn't have the tag, skip the event */
> + if (!value)
> + return FAN_FP_RET_SKIP_EVENT;
> +
> + /* propagate tag to subdir on fsnotify_mkdir */
> + if (fp_event->mask == (FS_CREATE | FS_ISDIR) &&
> + fp_event->data_type == FSNOTIFY_EVENT_DENTRY) {
> + struct inode *new_inode;
> +
> + new_inode = bpf_fanotify_data_inode(fp_event);
> + if (!new_inode)
> + goto out;
> +
> + value = bpf_inode_storage_get(&inode_storage_map, new_inode, 0,
> + BPF_LOCAL_STORAGE_GET_F_CREATE);
> + if (value) {
> + *value = 1;
> + added_inode_storage++;
> + }
> + bpf_iput(new_inode);
> + }
> +out:
> + return FAN_FP_RET_SEND_TO_USERSPACE;
> +}
> +
> +SEC("struct_ops")
> +int BPF_PROG(bpf_fp_init, struct fanotify_fastpath_hook *hook, const char *args)
> +{
> + return 0;
> +}
> +
> +SEC("struct_ops")
> +void BPF_PROG(bpf_fp_free, struct fanotify_fastpath_hook *hook)
> +{
> +}
> +
> +SEC(".struct_ops.link")
> +struct fanotify_fastpath_ops bpf_fanotify_fastpath_ops = {
> + .fp_handler = (void *)bpf_fp_handler,
> + .fp_init = (void *)bpf_fp_init,
> + .fp_free = (void *)bpf_fp_free,
> + .name = "_tmp_test_sub_tree",
> +};
> +
> +char _license[] SEC("license") = "GPL";
> --
> 2.43.5
>
Powered by blists - more mailing lists