lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250920005931.2753828-25-tj@kernel.org>
Date: Fri, 19 Sep 2025 14:58:47 -1000
From: Tejun Heo <tj@...nel.org>
To: void@...ifault.com,
	arighi@...dia.com,
	multics69@...il.com
Cc: linux-kernel@...r.kernel.org,
	sched-ext@...ts.linux.dev,
	memxor@...il.com,
	bpf@...r.kernel.org,
	Tejun Heo <tj@...nel.org>
Subject: [PATCH 24/46] HACK_NOT_FOR_UPSTREAM: BPF: Implement prog grouping hack

Hopefully, we can have something better instead.

NOT_SIGNED_OFF
---
 include/linux/bpf.h        |  5 +++++
 include/linux/sched.h      |  2 ++
 kernel/bpf/syscall.c       | 23 +++++++++++++++++++++++
 kernel/sched/ext.c         | 36 ++++++++++++++++++++++++++++++++++++
 tools/sched_ext/scx_qmap.c | 13 +++++++++++++
 5 files changed, 79 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cc700925b802..5101ae3ba2b6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1581,6 +1581,11 @@ struct bpf_stream_stage {
 
 struct bpf_prog_aux {
 	atomic64_t refcnt;
+
+	/* XXX - See kernel/sched/ext.c::scx_sub_enable() */
+	u64 priv_user;
+	void *priv;
+
 	u32 used_map_cnt;
 	u32 used_btf_cnt;
 	u32 max_ctx_offset;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2b272382673d..576aed48beb2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1596,6 +1596,8 @@ struct task_struct {
 	struct bpf_local_storage __rcu	*bpf_storage;
 	/* Used for BPF run context */
 	struct bpf_run_ctx		*bpf_ctx;
+	/* XXX - See kernel/sched/ext.c::scx_sub_enable() */
+	u64				bpf_prog_aux_priv;
 #endif
 	/* Used by BPF for per-TASK xdp storage */
 	struct bpf_net_context		*bpf_net_context;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0fbfa8532c39..e85dbe7fe5ce 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2761,6 +2761,27 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
 	}
 }
 
+static int prog_aux_priv_param_set(const char *input, const struct kernel_param *kp)
+{
+	return kstrtoull(input, 0, &current->bpf_prog_aux_priv);
+}
+
+static int prog_aux_priv_param_get(char *buf, const struct kernel_param *kp)
+{
+	return scnprintf(buf, PAGE_SIZE, "%llu\n", current->bpf_prog_aux_priv);
+}
+
+static const struct kernel_param_ops prog_aux_priv_param_ops = {
+	.set    = prog_aux_priv_param_set,
+	.get    = prog_aux_priv_param_get,
+};
+
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "bpf."
+module_param_cb(prog_aux_priv, &prog_aux_priv_param_ops, NULL, 0664);
+MODULE_PARM_DESC("prog_aux_priv",
+		 "Set prog->aux->priv to this value for all BPF programs loaded by %current");
+
 /* last field in 'union bpf_attr' used by this command */
 #define BPF_PROG_LOAD_LAST_FIELD fd_array_cnt
 
@@ -2898,6 +2919,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 
 	prog->expected_attach_type = attr->expected_attach_type;
 	prog->sleepable = !!(attr->prog_flags & BPF_F_SLEEPABLE);
+	/* XXX - See kernel/sched/ext.c::scx_sub_enable() */
+	prog->aux->priv_user = current->bpf_prog_aux_priv;
 	prog->aux->attach_btf = attach_btf;
 	prog->aux->attach_btf_id = attr->attach_btf_id;
 	prog->aux->dst_prog = dst_prog;
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 5eb1d6919595..a0251442b8ac 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -4116,6 +4116,24 @@ static void scx_sub_disable(struct scx_sched *sch)
 
 	if (sch->ops.exit)
 		SCX_CALL_OP(sch, SCX_KF_UNLOCKED, exit, NULL, sch->exit_info);
+
+	/*
+	 * XXX - NULL prog->aux->priv is interpreted as scx_root, so use an
+	 * ERR_PTR value to mark the associated progs dead. Note that this is
+	 * racy as e.g. a tracepoint program associated with a scheduler which
+	 * hasn't finished scx_sub_enable() yet may end up affecting scx_root
+	 * inadvertently. Plug the hole when this hack is replaced with a proper
+	 * BPF construct.
+	 */
+	u32 prog_id = 0;
+	struct bpf_prog *prog;
+	while ((prog = bpf_prog_get_curr_or_next(&prog_id))) {
+		if (prog->aux->priv == sch)
+			RCU_INIT_POINTER(prog->aux->priv, ERR_PTR(-ENODEV));
+		bpf_prog_put(prog);
+		prog_id++;
+	}
+
 	kobject_del(&sch->kobj);
 }
 #else	/* CONFIG_EXT_SUB_SCHED */
@@ -5148,6 +5166,24 @@ static int scx_sub_enable(struct sched_ext_ops *ops, struct bpf_link *link)
 		goto err_disable;
 	}
 
+	/*
+	 * XXX - We want all BPF programs loaded together with this scheduler
+	 * instance to point to this scheduler instance. BPF currently doesn't
+	 * have such feature so work around with a hack. The loading userspace
+	 * thread sets %current->bpf_prog_aux_priv to the associated cgroup ID
+	 * which gets transferred to bpf->aux->priv_user in bpf_prog_load().
+	 * Here, we can find all progs that have the matching cgroup ID and set
+	 * their prog->aux->priv to $sch.
+	 */
+	u32 prog_id = 0;
+	struct bpf_prog *prog;
+	while ((prog = bpf_prog_get_curr_or_next(&prog_id))) {
+		if (prog->aux->priv_user == cgroup_id(cgrp))
+			rcu_assign_pointer(prog->aux->priv, sch);
+		bpf_prog_put(prog);
+		prog_id++;
+	}
+
 	if (sch->ops.init) {
 		ret = SCX_CALL_OP_RET(sch, SCX_KF_UNLOCKED, init, NULL);
 		if (ret) {
diff --git a/tools/sched_ext/scx_qmap.c b/tools/sched_ext/scx_qmap.c
index 5d762d10f4db..cefc439c9e4a 100644
--- a/tools/sched_ext/scx_qmap.c
+++ b/tools/sched_ext/scx_qmap.c
@@ -99,12 +99,25 @@ int main(int argc, char **argv)
 			break;
 		case 'c': {
 			struct stat st;
+			int fd, len;
+			char buf[19];
 			if (stat(optarg, &st) < 0) {
 				perror("stat");
 				return 1;
 			}
 			skel->struct_ops.qmap_ops->sub_cgroup_id = st.st_ino;
 			skel->rodata->sub_cgroup_id = st.st_ino;
+			fd = open("/sys/module/bpf/parameters/prog_aux_priv", O_RDWR);
+			if (fd < 0) {
+				perror("open(\"/sys/module/bpf/parameters/prog_aux_priv\")");
+				return 1;
+			}
+			len = snprintf(buf, sizeof(buf), "0x%lx", st.st_ino);
+			if (write(fd, buf, len) != len) {
+				perror("write(\"/sys/module/bpf/parameters/prog_aux_priv\")");
+				return 1;
+			}
+			close(fd);
 			break;
 		}
 		case 'd':
-- 
2.51.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ