lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20210916162451.709260-3-guro@fb.com>
Date:   Thu, 16 Sep 2021 09:24:47 -0700
From:   Roman Gushchin <guro@...com>
To:     Peter Zijlstra <peterz@...radead.org>,
        Ingo Molnar <mingo@...hat.com>
CC:     Mel Gorman <mgorman@...hsingularity.net>, <bpf@...r.kernel.org>,
        <linux-kernel@...r.kernel.org>, Roman Gushchin <guro@...com>
Subject: [PATCH rfc 2/6] bpf: sched: add convenient helpers to identify sched entities

This patch adds 3 helpers useful for dealing with sched entities:
  u64 bpf_sched_entity_to_tgidpid(struct sched_entity *se);
  u64 bpf_sched_entity_to_cgrpid(struct sched_entity *se);
  long bpf_sched_entity_belongs_to_cgrp(struct sched_entity *se, u64 cgrpid);

Sched entity is a basic structure used by the scheduler to represent
schedulable objects: tasks and cgroups (if CONFIG_FAIR_GROUP_SCHED
is enabled). It will be passed as an argument to many bpf hooks, so
scheduler bpf programs need a convenient way to deal with it.

bpf_sched_entity_to_tgidpid() and bpf_sched_entity_to_cgrpid() are
useful to identify a sched entity in userspace terms (pid, tgid and
cgroup id). bpf_sched_entity_belongs_to_cgrp() allows to check whether
a sched entity belongs to sub-tree of a cgroup. It allows to write
cgroup-specific scheduler policies even without enabling the cgroup
cpu controller.

Signed-off-by: Roman Gushchin <guro@...com>
---
 include/uapi/linux/bpf.h       | 23 +++++++++++
 kernel/sched/bpf_sched.c       | 74 ++++++++++++++++++++++++++++++++++
 scripts/bpf_doc.py             |  2 +
 tools/include/uapi/linux/bpf.h | 23 +++++++++++
 4 files changed, 122 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6dfbebb8fc8f..199e4a92820d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4900,6 +4900,26 @@ union bpf_attr {
  *		**-EINVAL** if *flags* is not zero.
  *
  *		**-ENOENT** if architecture does not support branch records.
+ *
+ * u64 bpf_sched_entity_to_tgidpid(struct sched_entity *se)
+ *	Description
+ *		Return task's encoded tgid and pid if the sched entity is a task.
+ *	Return
+ *		Tgid and pid encoded as tgid << 32 \| pid, if *se* is a task. (u64)-1 otherwise.
+ *
+ * u64 bpf_sched_entity_to_cgrpid(struct sched_entity *se)
+ *	Description
+ *		Return cgroup id if the given sched entity is a cgroup.
+ *	Return
+ *		Cgroup id, if *se* is a cgroup. (u64)-1 otherwise.
+ *
+ * long bpf_sched_entity_belongs_to_cgrp(struct sched_entity *se, u64 cgrpid)
+ *	Description
+ *		Checks whether the sched entity belongs to a cgroup or
+ *		it's sub-tree. It doesn't require a cgroup CPU controller
+ *		to be enabled.
+ *	Return
+ *		1 if the sched entity belongs to a cgroup, 0 otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5079,6 +5099,9 @@ union bpf_attr {
 	FN(get_attach_cookie),		\
 	FN(task_pt_regs),		\
 	FN(get_branch_snapshot),	\
+	FN(sched_entity_to_tgidpid),	\
+	FN(sched_entity_to_cgrpid),	\
+	FN(sched_entity_belongs_to_cgrp),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/sched/bpf_sched.c b/kernel/sched/bpf_sched.c
index 2f05c186cfd0..ead691dc6e85 100644
--- a/kernel/sched/bpf_sched.c
+++ b/kernel/sched/bpf_sched.c
@@ -42,12 +42,86 @@ int bpf_sched_verify_prog(struct bpf_verifier_log *vlog,
 	return 0;
 }
 
+BPF_CALL_1(bpf_sched_entity_to_tgidpid, struct sched_entity *, se)
+{
+	if (entity_is_task(se)) {
+		struct task_struct *task = task_of(se);
+
+		return (u64) task->tgid << 32 | task->pid;
+	} else {
+		return (u64) -1;
+	}
+}
+
+BPF_CALL_1(bpf_sched_entity_to_cgrpid, struct sched_entity *, se)
+{
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	if (!entity_is_task(se))
+		return cgroup_id(se->cfs_rq->tg->css.cgroup);
+#endif
+	return (u64) -1;
+}
+
+BPF_CALL_2(bpf_sched_entity_belongs_to_cgrp, struct sched_entity *, se,
+	   u64, cgrpid)
+{
+#ifdef CONFIG_CGROUPS
+	struct cgroup *cgrp;
+	int level;
+
+	if (entity_is_task(se))
+		cgrp = task_dfl_cgroup(task_of(se));
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	else
+		cgrp = se->cfs_rq->tg->css.cgroup;
+#endif
+
+	for (level = cgrp->level; level; level--)
+		if (cgrp->ancestor_ids[level] == cgrpid)
+			return 1;
+#endif
+	return 0;
+}
+
+BTF_ID_LIST_SINGLE(btf_sched_entity_ids, struct, sched_entity)
+
+static const struct bpf_func_proto bpf_sched_entity_to_tgidpid_proto = {
+	.func		= bpf_sched_entity_to_tgidpid,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_BTF_ID,
+	.arg1_btf_id	= &btf_sched_entity_ids[0],
+};
+
+static const struct bpf_func_proto bpf_sched_entity_to_cgrpid_proto = {
+	.func		= bpf_sched_entity_to_cgrpid,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_BTF_ID,
+	.arg1_btf_id	= &btf_sched_entity_ids[0],
+};
+
+static const struct bpf_func_proto bpf_sched_entity_belongs_to_cgrp_proto = {
+	.func		= bpf_sched_entity_belongs_to_cgrp,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_BTF_ID,
+	.arg1_btf_id	= &btf_sched_entity_ids[0],
+	.arg2_type	= ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *
 bpf_sched_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
 	case BPF_FUNC_trace_printk:
 		return bpf_get_trace_printk_proto();
+	case BPF_FUNC_sched_entity_to_tgidpid:
+		return &bpf_sched_entity_to_tgidpid_proto;
+	case BPF_FUNC_sched_entity_to_cgrpid:
+		return &bpf_sched_entity_to_cgrpid_proto;
+	case BPF_FUNC_sched_entity_belongs_to_cgrp:
+		return &bpf_sched_entity_belongs_to_cgrp_proto;
 	default:
 		return NULL;
 	}
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index 00ac7b79cddb..84019ba5b67b 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -548,6 +548,7 @@ class PrinterHelpers(Printer):
             'struct socket',
             'struct file',
             'struct bpf_timer',
+            'struct sched_entity',
     ]
     known_types = {
             '...',
@@ -596,6 +597,7 @@ class PrinterHelpers(Printer):
             'struct socket',
             'struct file',
             'struct bpf_timer',
+            'struct sched_entity',
     }
     mapped_types = {
             'u8': '__u8',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6dfbebb8fc8f..199e4a92820d 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4900,6 +4900,26 @@ union bpf_attr {
  *		**-EINVAL** if *flags* is not zero.
  *
  *		**-ENOENT** if architecture does not support branch records.
+ *
+ * u64 bpf_sched_entity_to_tgidpid(struct sched_entity *se)
+ *	Description
+ *		Return task's encoded tgid and pid if the sched entity is a task.
+ *	Return
+ *		Tgid and pid encoded as tgid << 32 \| pid, if *se* is a task. (u64)-1 otherwise.
+ *
+ * u64 bpf_sched_entity_to_cgrpid(struct sched_entity *se)
+ *	Description
+ *		Return cgroup id if the given sched entity is a cgroup.
+ *	Return
+ *		Cgroup id, if *se* is a cgroup. (u64)-1 otherwise.
+ *
+ * long bpf_sched_entity_belongs_to_cgrp(struct sched_entity *se, u64 cgrpid)
+ *	Description
+ *		Checks whether the sched entity belongs to a cgroup or
+ *		it's sub-tree. It doesn't require a cgroup CPU controller
+ *		to be enabled.
+ *	Return
+ *		1 if the sched entity belongs to a cgroup, 0 otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5079,6 +5099,9 @@ union bpf_attr {
 	FN(get_attach_cookie),		\
 	FN(task_pt_regs),		\
 	FN(get_branch_snapshot),	\
+	FN(sched_entity_to_tgidpid),	\
+	FN(sched_entity_to_cgrpid),	\
+	FN(sched_entity_belongs_to_cgrp),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
2.31.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ