[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250428033617.3797686-2-roman.gushchin@linux.dev>
Date: Mon, 28 Apr 2025 03:36:06 +0000
From: Roman Gushchin <roman.gushchin@...ux.dev>
To: linux-kernel@...r.kernel.org
Cc: Andrew Morton <akpm@...ux-foundation.org>,
Alexei Starovoitov <ast@...nel.org>,
Johannes Weiner <hannes@...xchg.org>,
Michal Hocko <mhocko@...nel.org>,
Shakeel Butt <shakeel.butt@...ux.dev>,
Suren Baghdasaryan <surenb@...gle.com>,
David Rientjes <rientjes@...gle.com>,
Josh Don <joshdon@...gle.com>,
Chuyi Zhou <zhouchuyi@...edance.com>,
cgroups@...r.kernel.org,
linux-mm@...ck.org,
bpf@...r.kernel.org,
Roman Gushchin <roman.gushchin@...ux.dev>
Subject: [PATCH rfc 01/12] mm: introduce a bpf hook for OOM handling
Introduce a bpf hook for implementing custom OOM handling policies.
The hook is int bpf_handle_out_of_memory(struct oom_control *oc)
function, which expected to return 1 if it was able to free some
memory and 0 otherwise. In the latter case it's guaranteed that
the in-kernel OOM killer will be invoked. Otherwise the kernel
also checks the bpf_memory_freed field of the oom_control structure,
which is expected to be set by kfuncs suitable for releasing memory.
It's a safety mechanism which prevents a bpf program to claim
forward progress without actually releasing memory.
The hook program is sleepable to enable using iterators, e.g.
cgroup iterators.
The hook is executed just before the kernel victim task selection
algorithm, so all heuristics and sysctls like panic on oom,
sysctl_oom_kill_allocating_task and sysctl_oom_kill_allocating_task
are respected.
Signed-off-by: Roman Gushchin <roman.gushchin@...ux.dev>
---
include/linux/oom.h | 5 ++++
mm/oom_kill.c | 68 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 73 insertions(+)
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 1e0fc6931ce9..cc14aac9742c 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -51,6 +51,11 @@ struct oom_control {
/* Used to print the constraint info. */
enum oom_constraint constraint;
+
+#ifdef CONFIG_BPF_SYSCALL
+ /* Used by the bpf oom implementation to mark the forward progress */
+ bool bpf_memory_freed;
+#endif
};
extern struct mutex oom_lock;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 25923cfec9c6..d00776b63c0a 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -45,6 +45,7 @@
#include <linux/mmu_notifier.h>
#include <linux/cred.h>
#include <linux/nmi.h>
+#include <linux/bpf.h>
#include <asm/tlb.h>
#include "internal.h"
@@ -1100,6 +1101,30 @@ int unregister_oom_notifier(struct notifier_block *nb)
}
EXPORT_SYMBOL_GPL(unregister_oom_notifier);
+#ifdef CONFIG_BPF_SYSCALL
+int bpf_handle_out_of_memory(struct oom_control *oc);
+
+/*
+ * Returns true if the bpf oom program returns 1 and some memory was
+ * freed.
+ */
+static bool bpf_handle_oom(struct oom_control *oc)
+{
+ if (WARN_ON_ONCE(oc->chosen))
+ oc->chosen = NULL;
+
+ oc->bpf_memory_freed = false;
+
+ return bpf_handle_out_of_memory(oc) && oc->bpf_memory_freed;
+}
+
+#else
+static inline bool bpf_handle_oom(struct oom_control *oc)
+{
+ return 0;
+}
+#endif
+
/**
* out_of_memory - kill the "best" process when we run out of memory
* @oc: pointer to struct oom_control
@@ -1161,6 +1186,13 @@ bool out_of_memory(struct oom_control *oc)
return true;
}
+ /*
+ * Let bpf handle the OOM first. If it was able to free up some memory,
+ * bail out. Otherwise fall back to the kernel OOM killer.
+ */
+ if (bpf_handle_oom(oc))
+ return true;
+
select_bad_process(oc);
/* Found nothing?!?! */
if (!oc->chosen) {
@@ -1264,3 +1296,39 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
return -ENOSYS;
#endif /* CONFIG_MMU */
}
+
+#ifdef CONFIG_BPF_SYSCALL
+
+__bpf_hook_start();
+
+/*
+ * Bpf hook to customize the oom handling policy.
+ */
+__weak noinline int bpf_handle_out_of_memory(struct oom_control *oc)
+{
+ return 0;
+}
+
+__bpf_hook_end();
+
+BTF_KFUNCS_START(bpf_oom_hooks)
+BTF_ID_FLAGS(func, bpf_handle_out_of_memory, KF_SLEEPABLE)
+BTF_KFUNCS_END(bpf_oom_hooks)
+
+static const struct btf_kfunc_id_set bpf_oom_hook_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_oom_hooks,
+};
+static int __init bpf_oom_init(void)
+{
+ int err;
+
+ err = register_btf_fmodret_id_set(&bpf_oom_hook_set);
+ if (err)
+ pr_warn("error while registering bpf oom hooks: %d", err);
+
+ return err;
+}
+late_initcall(bpf_oom_init);
+
+#endif
--
2.49.0.901.g37484f566f-goog
Powered by blists - more mailing lists