lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230810081319.65668-2-zhouchuyi@bytedance.com>
Date:   Thu, 10 Aug 2023 16:13:15 +0800
From:   Chuyi Zhou <zhouchuyi@...edance.com>
To:     hannes@...xchg.org, mhocko@...nel.org, roman.gushchin@...ux.dev,
        ast@...nel.org, daniel@...earbox.net, andrii@...nel.org,
        muchun.song@...ux.dev
Cc:     bpf@...r.kernel.org, linux-kernel@...r.kernel.org,
        wuyun.abel@...edance.com, robin.lu@...edance.com,
        Chuyi Zhou <zhouchuyi@...edance.com>,
        Michal Hocko <mhocko@...e.com>
Subject: [RFC PATCH v2 1/5] mm, oom: Introduce bpf_oom_evaluate_task

This patch adds a new hook bpf_oom_evaluate_task in oom_evaluate_task. It
takes oc and current iterating task as parameters and returns a result
indicating which one should be selected. We can use it to bypass the
current logic of oom_evaluate_task and implement customized OOM policies
in the attached BPF progams.

Suggested-by: Michal Hocko <mhocko@...e.com>
Signed-off-by: Chuyi Zhou <zhouchuyi@...edance.com>
---
 mm/oom_kill.c | 59 +++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 50 insertions(+), 9 deletions(-)

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 612b5597d3af..255c9ef1d808 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -18,6 +18,7 @@
  *  kernel subsystems and hints as to where to find out what things do.
  */
 
+#include <linux/bpf.h>
 #include <linux/oom.h>
 #include <linux/mm.h>
 #include <linux/err.h>
@@ -305,6 +306,27 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc)
 	return CONSTRAINT_NONE;
 }
 
+enum {
+	NO_BPF_POLICY,
+	BPF_EVAL_ABORT,
+	BPF_EVAL_NEXT,
+	BPF_EVAL_SELECT,
+};
+
+__weak noinline int bpf_oom_evaluate_task(struct task_struct *task, struct oom_control *oc)
+{
+	return NO_BPF_POLICY;
+}
+
+BTF_SET8_START(oom_bpf_fmodret_ids)
+BTF_ID_FLAGS(func, bpf_oom_evaluate_task)
+BTF_SET8_END(oom_bpf_fmodret_ids)
+
+static const struct btf_kfunc_id_set oom_bpf_fmodret_set = {
+	.owner = THIS_MODULE,
+	.set   = &oom_bpf_fmodret_ids,
+};
+
 static int oom_evaluate_task(struct task_struct *task, void *arg)
 {
 	struct oom_control *oc = arg;
@@ -317,6 +339,26 @@ static int oom_evaluate_task(struct task_struct *task, void *arg)
 	if (!is_memcg_oom(oc) && !oom_cpuset_eligible(task, oc))
 		goto next;
 
+	/*
+	 * If task is allocating a lot of memory and has been marked to be
+	 * killed first if it triggers an oom, then select it.
+	 */
+	if (oom_task_origin(task)) {
+		points = LONG_MAX;
+		goto select;
+	}
+
+	switch (bpf_oom_evaluate_task(task, oc)) {
+	case BPF_EVAL_ABORT:
+		goto abort; /* abort search process */
+	case BPF_EVAL_NEXT:
+		goto next; /* ignore the task */
+	case BPF_EVAL_SELECT:
+		goto select; /* select the task */
+	default:
+		break; /* No BPF policy */
+	}
+
 	/*
 	 * This task already has access to memory reserves and is being killed.
 	 * Don't allow any other task to have access to the reserves unless
@@ -329,15 +371,6 @@ static int oom_evaluate_task(struct task_struct *task, void *arg)
 		goto abort;
 	}
 
-	/*
-	 * If task is allocating a lot of memory and has been marked to be
-	 * killed first if it triggers an oom, then select it.
-	 */
-	if (oom_task_origin(task)) {
-		points = LONG_MAX;
-		goto select;
-	}
-
 	points = oom_badness(task, oc->totalpages);
 	if (points == LONG_MIN || points < oc->chosen_points)
 		goto next;
@@ -732,10 +765,18 @@ static struct ctl_table vm_oom_kill_table[] = {
 
 static int __init oom_init(void)
 {
+	int err;
 	oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper");
 #ifdef CONFIG_SYSCTL
 	register_sysctl_init("vm", vm_oom_kill_table);
 #endif
+
+#ifdef CONFIG_BPF_SYSCALL
+	err = register_btf_fmodret_id_set(&oom_bpf_fmodret_set);
+	if (err)
+		pr_warn("error while registering oom fmodret entrypoints: %d", err);
+#endif
+
 	return 0;
 }
 subsys_initcall(oom_init)
-- 
2.20.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ