lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5bb59039-4f3b-49b6-d440-3210d7a92754@oracle.com>
Date:   Wed, 16 Aug 2023 12:53:02 +0100
From:   Alan Maguire <alan.maguire@...cle.com>
To:     Chuyi Zhou <zhouchuyi@...edance.com>, hannes@...xchg.org,
        mhocko@...nel.org, roman.gushchin@...ux.dev, ast@...nel.org,
        daniel@...earbox.net, andrii@...nel.org, muchun.song@...ux.dev
Cc:     bpf@...r.kernel.org, linux-kernel@...r.kernel.org,
        wuyun.abel@...edance.com, robin.lu@...edance.com
Subject: Re: [RFC PATCH v2 4/5] bpf: Add a OOM policy test

On 10/08/2023 09:13, Chuyi Zhou wrote:
> This patch adds a test which implements a priority-based policy through
> bpf_oom_evaluate_task.
> 
> The BPF program, oom_policy.c, compares the cgroup priority of two tasks
> and select the lower one. The userspace program test_oom_policy.c
> maintains a priority map by using cgroup id as the keys and priority as
> the values. We could protect certain cgroups from oom-killer by setting
> higher priority.
> 
> Signed-off-by: Chuyi Zhou <zhouchuyi@...edance.com>
> ---
>  .../bpf/prog_tests/test_oom_policy.c          | 140 ++++++++++++++++++
>  .../testing/selftests/bpf/progs/oom_policy.c  | 104 +++++++++++++
>  2 files changed, 244 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/prog_tests/test_oom_policy.c
>  create mode 100644 tools/testing/selftests/bpf/progs/oom_policy.c
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/test_oom_policy.c b/tools/testing/selftests/bpf/prog_tests/test_oom_policy.c
> new file mode 100644
> index 000000000000..bea61ff22603
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/test_oom_policy.c
> @@ -0,0 +1,140 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +#define _GNU_SOURCE
> +
> +#include <stdio.h>
> +#include <fcntl.h>
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <signal.h>
> +#include <sys/stat.h>
> +#include <test_progs.h>
> +#include <bpf/btf.h>
> +#include <bpf/bpf.h>
> +
> +#include "cgroup_helpers.h"
> +#include "oom_policy.skel.h"
> +
> +static int map_fd;
> +static int cg_nr;
> +struct {
> +	const char *path;
> +	int fd;
> +	unsigned long long id;
> +} cgs[] = {
> +	{ "/cg1" },
> +	{ "/cg2" },
> +};
> +
> +
> +static struct oom_policy *open_load_oom_policy_skel(void)
> +{
> +	struct oom_policy *skel;
> +	int err;
> +
> +	skel = oom_policy__open();
> +	if (!ASSERT_OK_PTR(skel, "skel_open"))
> +		return NULL;
> +
> +	err = oom_policy__load(skel);
> +	if (!ASSERT_OK(err, "skel_load"))
> +		goto cleanup;
> +
> +	return skel;
> +
> +cleanup:
> +	oom_policy__destroy(skel);
> +	return NULL;
> +}
> +
> +static void run_memory_consume(unsigned long long consume_size, int idx)
> +{
> +	char *buf;
> +
> +	join_parent_cgroup(cgs[idx].path);
> +	buf = malloc(consume_size);
> +	memset(buf, 0, consume_size);
> +	sleep(2);
> +	exit(0);
> +}
> +
> +static int set_cgroup_prio(unsigned long long cg_id, int prio)
> +{
> +	int err;
> +
> +	err = bpf_map_update_elem(map_fd, &cg_id, &prio, BPF_ANY);
> +	ASSERT_EQ(err, 0, "update_map");
> +	return err;
> +}
> +
> +static int prepare_cgroup_environment(void)
> +{
> +	int err;
> +
> +	err = setup_cgroup_environment();
> +	if (err)
> +		goto clean_cg_env;
> +	for (int i = 0; i < cg_nr; i++) {
> +		err = cgs[i].fd = create_and_get_cgroup(cgs[i].path);
> +		if (!ASSERT_GE(cgs[i].fd, 0, "cg_create"))
> +			goto clean_cg_env;
> +		cgs[i].id = get_cgroup_id(cgs[i].path);
> +	}
> +	return 0;
> +clean_cg_env:
> +	cleanup_cgroup_environment();
> +	return err;
> +}
> +
> +void test_oom_policy(void)
> +{
> +	struct oom_policy *skel;
> +	struct bpf_link *link;
> +	int err;
> +	int victim_pid;
> +	unsigned long long victim_cg_id;
> +
> +	link = NULL;
> +	cg_nr = ARRAY_SIZE(cgs);
> +
> +	skel = open_load_oom_policy_skel();
> +	err = oom_policy__attach(skel);
> +	if (!ASSERT_OK(err, "oom_policy__attach"))
> +		goto cleanup;
> +
> +	map_fd = bpf_object__find_map_fd_by_name(skel->obj, "cg_map");
> +	if (!ASSERT_GE(map_fd, 0, "find map"))
> +		goto cleanup;
> +
> +	err = prepare_cgroup_environment();
> +	if (!ASSERT_EQ(err, 0, "prepare cgroup env"))
> +		goto cleanup;
> +
> +	write_cgroup_file("/", "memory.max", "10M");
> +
> +	/*
> +	 * Set higher priority to cg2 and lower to cg1, so we would select
> +	 * task under cg1 as victim.(see oom_policy.c)
> +	 */
> +	set_cgroup_prio(cgs[0].id, 10);
> +	set_cgroup_prio(cgs[1].id, 50);
> +
> +	victim_cg_id = cgs[0].id;
> +	victim_pid = fork();
> +
> +	if (victim_pid == 0)
> +		run_memory_consume(1024 * 1024 * 4, 0);
> +
> +	if (fork() == 0)
> +		run_memory_consume(1024 * 1024 * 8, 1);
> +
> +	while (wait(NULL) > 0)
> +		;
> +
> +	ASSERT_EQ(skel->bss->victim_pid, victim_pid, "victim_pid");
> +	ASSERT_EQ(skel->bss->victim_cg_id, victim_cg_id, "victim_cgid");
> +	ASSERT_EQ(skel->bss->failed_cnt, 1, "failed_cnt");
> +cleanup:
> +	bpf_link__destroy(link);
> +	oom_policy__destroy(skel);
> +	cleanup_cgroup_environment();
> +}
> diff --git a/tools/testing/selftests/bpf/progs/oom_policy.c b/tools/testing/selftests/bpf/progs/oom_policy.c
> new file mode 100644
> index 000000000000..fc9efc93914e
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/oom_policy.c
> @@ -0,0 +1,104 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +#include <vmlinux.h>
> +#include <bpf/bpf_tracing.h>
> +#include <bpf/bpf_helpers.h>
> +
> +char _license[] SEC("license") = "GPL";
> +
> +struct {
> +	__uint(type, BPF_MAP_TYPE_HASH);
> +	__type(key, int);
> +	__type(value, int);
> +	__uint(max_entries, 24);
> +} cg_map SEC(".maps");
> +
> +unsigned int victim_pid;
> +u64 victim_cg_id;
> +int failed_cnt;
> +
> +#define	EOPNOTSUPP	95
> +
> +enum {
> +	NO_BPF_POLICY,
> +	BPF_EVAL_ABORT,
> +	BPF_EVAL_NEXT,
> +	BPF_EVAL_SELECT,
> +};

When I built a kernel using this series and tried building the
associated test for that kernel I saw:

progs/oom_policy.c:22:2: error: redefinition of enumerator 'NO_BPF_POLICY'
        NO_BPF_POLICY,
        ^
/home/opc/src/bpf-next/tools/testing/selftests/bpf/tools/include/vmlinux.h:75894:2:
note: previous definition is here
        NO_BPF_POLICY = 0,
        ^
progs/oom_policy.c:23:2: error: redefinition of enumerator 'BPF_EVAL_ABORT'
        BPF_EVAL_ABORT,
        ^
/home/opc/src/bpf-next/tools/testing/selftests/bpf/tools/include/vmlinux.h:75895:2:
note: previous definition is here
        BPF_EVAL_ABORT = 1,
        ^
progs/oom_policy.c:24:2: error: redefinition of enumerator 'BPF_EVAL_NEXT'
        BPF_EVAL_NEXT,
        ^
/home/opc/src/bpf-next/tools/testing/selftests/bpf/tools/include/vmlinux.h:75896:2:
note: previous definition is here
        BPF_EVAL_NEXT = 2,
        ^
progs/oom_policy.c:  CLNG-BPF [test_maps] tailcall_bpf2bpf4.bpf.o
25:2: error: redefinition of enumerator 'BPF_EVAL_SELECT'
        BPF_EVAL_SELECT,
        ^
/home/opc/src/bpf-next/tools/testing/selftests/bpf/tools/include/vmlinux.h:75897:2:
note: previous definition is here
        BPF_EVAL_SELECT = 3,
        ^
4 errors generated.


So you shouldn't need the enum definition since it already makes it into
vmlinux.h.

I also ran into test failures when I removed the above (and compilation
succeeded):


test_oom_policy:PASS:prepare cgroup env 0 nsec
(cgroup_helpers.c:130: errno: No such file or directory) Opening
/mnt/cgroup-test-work-dir23054//memory.max
set_cgroup_prio:PASS:update_map 0 nsec
set_cgroup_prio:PASS:update_map 0 nsec
test_oom_policy:FAIL:victim_pid unexpected victim_pid: actual 0 !=
expected 23058
test_oom_policy:FAIL:victim_cgid unexpected victim_cgid: actual 0 !=
expected 68
test_oom_policy:FAIL:failed_cnt unexpected failed_cnt: actual 0 !=
expected 1
#154     oom_policy:FAIL
Summary: 1/0 PASSED, 0 SKIPPED, 1 FAILED

So it seems that because my system was using the cgroupv1 memory
controller, it could not be used for v2 unless I rebooted with

systemd.unified_cgroup_hierarchy=1

...on the boot commandline. It would be good to note any such
requirements for this test in the selftests/bpf/README.rst.
Might also be worth adding

write_cgroup_file("", "cgroup.subtree_control", "+memory");

...to ensure the memory controller is enabled for the root cgroup.

At that point the test still failed:

set_cgroup_prio:PASS:update_map 0 nsec
test_oom_policy:FAIL:victim_pid unexpected victim_pid: actual 0 !=
expected 12649
test_oom_policy:FAIL:victim_cgid unexpected victim_cgid: actual 0 !=
expected 9583
test_oom_policy:FAIL:failed_cnt unexpected failed_cnt: actual 0 !=
expected 1
#154     oom_policy:FAIL
Summary: 0/0 PASSED, 0 SKIPPED, 1 FAILED
Successfully unloaded bpf_testmod.ko.


Are there other implicit assumptions about configuration that cause this
test to fail perhaps?

Alan

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ