[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251027232206.473085-11-roman.gushchin@linux.dev>
Date: Mon, 27 Oct 2025 16:22:04 -0700
From: Roman Gushchin <roman.gushchin@...ux.dev>
To: Andrew Morton <akpm@...ux-foundation.org>
Cc: linux-kernel@...r.kernel.org,
Alexei Starovoitov <ast@...nel.org>,
Suren Baghdasaryan <surenb@...gle.com>,
Michal Hocko <mhocko@...nel.org>,
Shakeel Butt <shakeel.butt@...ux.dev>,
Johannes Weiner <hannes@...xchg.org>,
Andrii Nakryiko <andrii@...nel.org>,
JP Kobryn <inwardvessel@...il.com>,
linux-mm@...ck.org,
cgroups@...r.kernel.org,
bpf@...r.kernel.org,
Martin KaFai Lau <martin.lau@...nel.org>,
Song Liu <song@...nel.org>,
Kumar Kartikeya Dwivedi <memxor@...il.com>,
Tejun Heo <tj@...nel.org>,
Roman Gushchin <roman.gushchin@...ux.dev>
Subject: [PATCH v2 21/23] sched: psi: implement bpf_psi_create_trigger() kfunc
Implement a new bpf_psi_create_trigger() BPF kfunc, which allows
to create new PSI triggers and attach them to cgroups or be
system-wide.
Created triggers will exist until the struct ops is loaded and
if they are attached to a cgroup until the cgroup exists.
Due to a limitation of 5 arguments, the resource type and the "full"
bit are squeezed into a single u32.
Signed-off-by: Roman Gushchin <roman.gushchin@...ux.dev>
---
include/linux/cgroup.h | 4 ++
include/linux/psi.h | 6 +++
kernel/sched/bpf_psi.c | 94 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 104 insertions(+)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 6ed477338b16..1a99da44999e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -707,6 +707,10 @@ static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
{}
+static inline struct cgroup *cgroup_get_from_id(u64 id)
+{
+ return NULL;
+}
#endif /* !CONFIG_CGROUPS */
#ifdef CONFIG_CGROUPS
diff --git a/include/linux/psi.h b/include/linux/psi.h
index 8178e998d94b..8ffe84cd8571 100644
--- a/include/linux/psi.h
+++ b/include/linux/psi.h
@@ -50,6 +50,12 @@ int psi_cgroup_alloc(struct cgroup *cgrp);
void psi_cgroup_free(struct cgroup *cgrp);
void cgroup_move_task(struct task_struct *p, struct css_set *to);
void psi_cgroup_restart(struct psi_group *group);
+
+#else
+static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
+{
+ return &psi_system;
+}
#endif
#else /* CONFIG_PSI */
diff --git a/kernel/sched/bpf_psi.c b/kernel/sched/bpf_psi.c
index c383a20119a6..7974de56594f 100644
--- a/kernel/sched/bpf_psi.c
+++ b/kernel/sched/bpf_psi.c
@@ -8,6 +8,7 @@
#include <linux/bpf_psi.h>
#include <linux/cgroup-defs.h>
+struct bpf_struct_ops bpf_psi_bpf_ops;
static struct workqueue_struct *bpf_psi_wq;
static DEFINE_MUTEX(bpf_psi_lock);
@@ -186,6 +187,92 @@ static const struct bpf_verifier_ops bpf_psi_verifier_ops = {
.is_valid_access = bpf_psi_ops_is_valid_access,
};
+__bpf_kfunc_start_defs();
+
+/**
+ * bpf_psi_create_trigger - Create a PSI trigger
+ * @bpf_psi: bpf_psi struct to attach the trigger to
+ * @cgroup_id: cgroup Id to attach the trigger; 0 for system-wide scope
+ * @resource: resource to monitor (PSI_MEM, PSI_IO, etc) and the full bit.
+ * @threshold_us: threshold in us
+ * @window_us: window in us
+ *
+ * Creates a PSI trigger and attached is to bpf_psi. The trigger will be
+ * active unless bpf struct ops is unloaded or the corresponding cgroup
+ * is deleted.
+ *
+ * Resource's most significant bit encodes whether "some" or "full"
+ * PSI state should be tracked.
+ *
+ * Returns 0 on success and the error code on failure.
+ */
+__bpf_kfunc int bpf_psi_create_trigger(struct bpf_psi *bpf_psi,
+ u64 cgroup_id, u32 resource,
+ u32 threshold_us, u32 window_us)
+{
+ enum psi_res res = resource & ~BPF_PSI_FULL;
+ bool full = resource & BPF_PSI_FULL;
+ struct psi_trigger_params params;
+ struct cgroup *cgroup __maybe_unused = NULL;
+ struct psi_group *group;
+ struct psi_trigger *t;
+ int ret = 0;
+
+ if (res >= NR_PSI_RESOURCES)
+ return -EINVAL;
+
+ if (IS_ENABLED(CONFIG_CGROUPS) && cgroup_id) {
+ cgroup = cgroup_get_from_id(cgroup_id);
+ if (IS_ERR_OR_NULL(cgroup))
+ return PTR_ERR(cgroup);
+
+ group = cgroup_psi(cgroup);
+ } else {
+ group = &psi_system;
+ }
+
+ params.type = PSI_BPF;
+ params.bpf_psi = bpf_psi;
+ params.privileged = capable(CAP_SYS_RESOURCE);
+ params.res = res;
+ params.full = full;
+ params.threshold_us = threshold_us;
+ params.window_us = window_us;
+
+ t = psi_trigger_create(group, ¶ms);
+ if (IS_ERR(t))
+ ret = PTR_ERR(t);
+ else
+ t->cgroup_id = cgroup_id;
+
+#ifdef CONFIG_CGROUPS
+ if (cgroup)
+ cgroup_put(cgroup);
+#endif
+
+ return ret;
+}
+__bpf_kfunc_end_defs();
+
+BTF_KFUNCS_START(bpf_psi_kfuncs)
+BTF_ID_FLAGS(func, bpf_psi_create_trigger, KF_TRUSTED_ARGS)
+BTF_KFUNCS_END(bpf_psi_kfuncs)
+
+static int bpf_psi_kfunc_filter(const struct bpf_prog *prog, u32 kfunc_id)
+{
+ if (btf_id_set8_contains(&bpf_psi_kfuncs, kfunc_id) &&
+ prog->aux->st_ops != &bpf_psi_bpf_ops)
+ return -EACCES;
+
+ return 0;
+}
+
+static const struct btf_kfunc_id_set bpf_psi_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_psi_kfuncs,
+ .filter = bpf_psi_kfunc_filter,
+};
+
static int bpf_psi_ops_reg(void *kdata, struct bpf_link *link)
{
struct bpf_psi_ops *ops = kdata;
@@ -287,6 +374,13 @@ static int __init bpf_psi_struct_ops_init(void)
if (!bpf_psi_wq)
return -ENOMEM;
+ err = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
+ &bpf_psi_kfunc_set);
+ if (err) {
+ pr_warn("error while registering bpf psi kfuncs: %d", err);
+ goto err;
+ }
+
err = register_bpf_struct_ops(&bpf_psi_bpf_ops, bpf_psi_ops);
if (err) {
pr_warn("error while registering bpf psi struct ops: %d", err);
--
2.51.0
Powered by blists - more mailing lists