[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ae654476-5cc2-36ae-1047-eba196c9b38d@bytedance.com>
Date: Wed, 16 Aug 2023 20:31:03 +0800
From: Chuyi Zhou <zhouchuyi@...edance.com>
To: Alan Maguire <alan.maguire@...cle.com>, hannes@...xchg.org,
mhocko@...nel.org, roman.gushchin@...ux.dev, ast@...nel.org,
daniel@...earbox.net, andrii@...nel.org, muchun.song@...ux.dev
Cc: bpf@...r.kernel.org, linux-kernel@...r.kernel.org,
wuyun.abel@...edance.com, robin.lu@...edance.com
Subject: Re: [RFC PATCH v2 4/5] bpf: Add a OOM policy test
Hello,
在 2023/8/16 19:53, Alan Maguire 写道:
> On 10/08/2023 09:13, Chuyi Zhou wrote:
>> This patch adds a test which implements a priority-based policy through
>> bpf_oom_evaluate_task.
>>
>> The BPF program, oom_policy.c, compares the cgroup priority of two tasks
>> and select the lower one. The userspace program test_oom_policy.c
>> maintains a priority map by using cgroup id as the keys and priority as
>> the values. We could protect certain cgroups from oom-killer by setting
>> higher priority.
>>
>> Signed-off-by: Chuyi Zhou <zhouchuyi@...edance.com>
>> ---
>> .../bpf/prog_tests/test_oom_policy.c | 140 ++++++++++++++++++
>> .../testing/selftests/bpf/progs/oom_policy.c | 104 +++++++++++++
>> 2 files changed, 244 insertions(+)
>> create mode 100644 tools/testing/selftests/bpf/prog_tests/test_oom_policy.c
>> create mode 100644 tools/testing/selftests/bpf/progs/oom_policy.c
>>
>> diff --git a/tools/testing/selftests/bpf/prog_tests/test_oom_policy.c b/tools/testing/selftests/bpf/prog_tests/test_oom_policy.c
>> new file mode 100644
>> index 000000000000..bea61ff22603
>> --- /dev/null
>> +++ b/tools/testing/selftests/bpf/prog_tests/test_oom_policy.c
>> @@ -0,0 +1,140 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +#define _GNU_SOURCE
>> +
>> +#include <stdio.h>
>> +#include <fcntl.h>
>> +#include <unistd.h>
>> +#include <stdlib.h>
>> +#include <signal.h>
>> +#include <sys/stat.h>
>> +#include <test_progs.h>
>> +#include <bpf/btf.h>
>> +#include <bpf/bpf.h>
>> +
>> +#include "cgroup_helpers.h"
>> +#include "oom_policy.skel.h"
>> +
>> +static int map_fd;
>> +static int cg_nr;
>> +struct {
>> + const char *path;
>> + int fd;
>> + unsigned long long id;
>> +} cgs[] = {
>> + { "/cg1" },
>> + { "/cg2" },
>> +};
>> +
>> +
>> +static struct oom_policy *open_load_oom_policy_skel(void)
>> +{
>> + struct oom_policy *skel;
>> + int err;
>> +
>> + skel = oom_policy__open();
>> + if (!ASSERT_OK_PTR(skel, "skel_open"))
>> + return NULL;
>> +
>> + err = oom_policy__load(skel);
>> + if (!ASSERT_OK(err, "skel_load"))
>> + goto cleanup;
>> +
>> + return skel;
>> +
>> +cleanup:
>> + oom_policy__destroy(skel);
>> + return NULL;
>> +}
>> +
>> +static void run_memory_consume(unsigned long long consume_size, int idx)
>> +{
>> + char *buf;
>> +
>> + join_parent_cgroup(cgs[idx].path);
>> + buf = malloc(consume_size);
>> + memset(buf, 0, consume_size);
>> + sleep(2);
>> + exit(0);
>> +}
>> +
>> +static int set_cgroup_prio(unsigned long long cg_id, int prio)
>> +{
>> + int err;
>> +
>> + err = bpf_map_update_elem(map_fd, &cg_id, &prio, BPF_ANY);
>> + ASSERT_EQ(err, 0, "update_map");
>> + return err;
>> +}
>> +
>> +static int prepare_cgroup_environment(void)
>> +{
>> + int err;
>> +
>> + err = setup_cgroup_environment();
>> + if (err)
>> + goto clean_cg_env;
>> + for (int i = 0; i < cg_nr; i++) {
>> + err = cgs[i].fd = create_and_get_cgroup(cgs[i].path);
>> + if (!ASSERT_GE(cgs[i].fd, 0, "cg_create"))
>> + goto clean_cg_env;
>> + cgs[i].id = get_cgroup_id(cgs[i].path);
>> + }
>> + return 0;
>> +clean_cg_env:
>> + cleanup_cgroup_environment();
>> + return err;
>> +}
>> +
>> +void test_oom_policy(void)
>> +{
>> + struct oom_policy *skel;
>> + struct bpf_link *link;
>> + int err;
>> + int victim_pid;
>> + unsigned long long victim_cg_id;
>> +
>> + link = NULL;
>> + cg_nr = ARRAY_SIZE(cgs);
>> +
>> + skel = open_load_oom_policy_skel();
>> + err = oom_policy__attach(skel);
>> + if (!ASSERT_OK(err, "oom_policy__attach"))
>> + goto cleanup;
>> +
>> + map_fd = bpf_object__find_map_fd_by_name(skel->obj, "cg_map");
>> + if (!ASSERT_GE(map_fd, 0, "find map"))
>> + goto cleanup;
>> +
>> + err = prepare_cgroup_environment();
>> + if (!ASSERT_EQ(err, 0, "prepare cgroup env"))
>> + goto cleanup;
>> +
>> + write_cgroup_file("/", "memory.max", "10M");
>> +
>> + /*
>> + * Set higher priority to cg2 and lower to cg1, so we would select
>> + * task under cg1 as victim.(see oom_policy.c)
>> + */
>> + set_cgroup_prio(cgs[0].id, 10);
>> + set_cgroup_prio(cgs[1].id, 50);
>> +
>> + victim_cg_id = cgs[0].id;
>> + victim_pid = fork();
>> +
>> + if (victim_pid == 0)
>> + run_memory_consume(1024 * 1024 * 4, 0);
>> +
>> + if (fork() == 0)
>> + run_memory_consume(1024 * 1024 * 8, 1);
>> +
>> + while (wait(NULL) > 0)
>> + ;
>> +
>> + ASSERT_EQ(skel->bss->victim_pid, victim_pid, "victim_pid");
>> + ASSERT_EQ(skel->bss->victim_cg_id, victim_cg_id, "victim_cgid");
>> + ASSERT_EQ(skel->bss->failed_cnt, 1, "failed_cnt");
>> +cleanup:
>> + bpf_link__destroy(link);
>> + oom_policy__destroy(skel);
>> + cleanup_cgroup_environment();
>> +}
>> diff --git a/tools/testing/selftests/bpf/progs/oom_policy.c b/tools/testing/selftests/bpf/progs/oom_policy.c
>> new file mode 100644
>> index 000000000000..fc9efc93914e
>> --- /dev/null
>> +++ b/tools/testing/selftests/bpf/progs/oom_policy.c
>> @@ -0,0 +1,104 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +#include <vmlinux.h>
>> +#include <bpf/bpf_tracing.h>
>> +#include <bpf/bpf_helpers.h>
>> +
>> +char _license[] SEC("license") = "GPL";
>> +
>> +struct {
>> + __uint(type, BPF_MAP_TYPE_HASH);
>> + __type(key, int);
>> + __type(value, int);
>> + __uint(max_entries, 24);
>> +} cg_map SEC(".maps");
>> +
>> +unsigned int victim_pid;
>> +u64 victim_cg_id;
>> +int failed_cnt;
>> +
>> +#define EOPNOTSUPP 95
>> +
>> +enum {
>> + NO_BPF_POLICY,
>> + BPF_EVAL_ABORT,
>> + BPF_EVAL_NEXT,
>> + BPF_EVAL_SELECT,
>> +};
>
> When I built a kernel using this series and tried building the
> associated test for that kernel I saw:
>
> progs/oom_policy.c:22:2: error: redefinition of enumerator 'NO_BPF_POLICY'
> NO_BPF_POLICY,
> ^
> /home/opc/src/bpf-next/tools/testing/selftests/bpf/tools/include/vmlinux.h:75894:2:
> note: previous definition is here
> NO_BPF_POLICY = 0,
> ^
> progs/oom_policy.c:23:2: error: redefinition of enumerator 'BPF_EVAL_ABORT'
> BPF_EVAL_ABORT,
> ^
> /home/opc/src/bpf-next/tools/testing/selftests/bpf/tools/include/vmlinux.h:75895:2:
> note: previous definition is here
> BPF_EVAL_ABORT = 1,
> ^
> progs/oom_policy.c:24:2: error: redefinition of enumerator 'BPF_EVAL_NEXT'
> BPF_EVAL_NEXT,
> ^
> /home/opc/src/bpf-next/tools/testing/selftests/bpf/tools/include/vmlinux.h:75896:2:
> note: previous definition is here
> BPF_EVAL_NEXT = 2,
> ^
> progs/oom_policy.c: CLNG-BPF [test_maps] tailcall_bpf2bpf4.bpf.o
> 25:2: error: redefinition of enumerator 'BPF_EVAL_SELECT'
> BPF_EVAL_SELECT,
> ^
> /home/opc/src/bpf-next/tools/testing/selftests/bpf/tools/include/vmlinux.h:75897:2:
> note: previous definition is here
> BPF_EVAL_SELECT = 3,
> ^
> 4 errors generated.
>
>
> So you shouldn't need the enum definition since it already makes it into
> vmlinux.h.
> OK. It seems my vmlinux.h doesn't contain these enum...
> I also ran into test failures when I removed the above (and compilation
> succeeded):
>
>
> test_oom_policy:PASS:prepare cgroup env 0 nsec
> (cgroup_helpers.c:130: errno: No such file or directory) Opening
> /mnt/cgroup-test-work-dir23054//memory.max
> set_cgroup_prio:PASS:update_map 0 nsec
> set_cgroup_prio:PASS:update_map 0 nsec
> test_oom_policy:FAIL:victim_pid unexpected victim_pid: actual 0 !=
> expected 23058
> test_oom_policy:FAIL:victim_cgid unexpected victim_cgid: actual 0 !=
> expected 68
> test_oom_policy:FAIL:failed_cnt unexpected failed_cnt: actual 0 !=
> expected 1
> #154 oom_policy:FAIL
> Summary: 1/0 PASSED, 0 SKIPPED, 1 FAILED
>
> So it seems that because my system was using the cgroupv1 memory
> controller, it could not be used for v2 unless I rebooted with
>
> systemd.unified_cgroup_hierarchy=1
>
> ...on the boot commandline. It would be good to note any such
> requirements for this test in the selftests/bpf/README.rst.
> Might also be worth adding
>
> write_cgroup_file("", "cgroup.subtree_control", "+memory");
>
> ...to ensure the memory controller is enabled for the root cgroup.
>
> At that point the test still failed:
>
> set_cgroup_prio:PASS:update_map 0 nsec
> test_oom_policy:FAIL:victim_pid unexpected victim_pid: actual 0 !=
> expected 12649
> test_oom_policy:FAIL:victim_cgid unexpected victim_cgid: actual 0 !=
> expected 9583
> test_oom_policy:FAIL:failed_cnt unexpected failed_cnt: actual 0 !=
> expected 1
> #154 oom_policy:FAIL
> Summary: 0/0 PASSED, 0 SKIPPED, 1 FAILED
> Successfully unloaded bpf_testmod.ko.
>
>
It seems that OOM is not invoked in your environment(you can check it in
demsg). If the memcg OOM is invoked by the test, we would record the
*victim_pid* and *victim_cgid* and they would not be zero. I guess the
reason is memory_control is not enabled in cgroup
"/mnt/cgroup-test-work-dir23054/", because I see the error message:
(cgroup_helpers.c:130: errno: No such file or directory) Opening
> /mnt/cgroup-test-work-dir23054//memory.max
Thanks for your review and test!
> Are there other implicit assumptions about configuration that cause this
> test to fail perhaps?
>
> Alan
Powered by blists - more mailing lists