lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20250602180544.3626909-4-zecheng@google.com>
Date: Mon,  2 Jun 2025 18:05:43 +0000
From: Zecheng Li <zecheng@...gle.com>
To: Ingo Molnar <mingo@...hat.com>, Peter Zijlstra <peterz@...radead.org>, 
	Juri Lelli <juri.lelli@...hat.com>, Vincent Guittot <vincent.guittot@...aro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@....com>, Steven Rostedt <rostedt@...dmis.org>, 
	Ben Segall <bsegall@...gle.com>, Mel Gorman <mgorman@...e.de>, 
	Valentin Schneider <vschneid@...hat.com>, Xu Liu <xliuprof@...gle.com>, 
	Blake Jones <blakejones@...gle.com>, Josh Don <joshdon@...gle.com>, 
	Madadi Vineeth Reddy <vineethr@...ux.ibm.com>, linux-kernel@...r.kernel.org, 
	Zecheng Li <zecheng@...gle.com>
Subject: [RFC PATCH v2 3/3] sched/fair: Reorder struct sched_entity

Groups the mostly read fields in struct sched_entity to the head of the
struct when `CONFIG_FAIR_GROUP_SCHED` is set. The additional fields from
`CONFIG_FAIR_GROUP_SCHED` are related to CFS cgroup scheduling and were
placed far away from the hot fields `load`, `on_rq` and `vruntime`. They
are moved together to the head of the struct to exploit locality.
Although `depth` is not as hot as other fields, we keep it here to avoid
breaking the #ifdef boundaries. Adds enforced alignment of struct
sched_entity to ensure the cache group works as intended.

Also adds a compile time check when `CONFIG_FAIR_GROUP_SCHED` is set to
check the placement of the hot fields.

Signed-off-by: Zecheng Li <zecheng@...gle.com>
---
 include/linux/sched.h | 39 +++++++++++++++++++++------------------
 kernel/sched/core.c   | 20 ++++++++++++++++++++
 2 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f96ac1982893..b20b2d590cf6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -567,40 +567,43 @@ struct sched_statistics {
 } ____cacheline_aligned;
 
 struct sched_entity {
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	/* Group the read most hot fields in sched_entity */
+	__cacheline_group_begin(hot);
+	struct sched_entity		*parent;
+	/* rq on which this entity is (to be) queued: */
+	struct cfs_rq			*cfs_rq;
+	/* rq "owned" by this entity/group: */
+	struct cfs_rq			*my_q;
+	/* cached value of my_q->h_nr_running */
+	unsigned long			runnable_weight;
+	int				depth;
+#endif
+	unsigned char			on_rq;
+	unsigned char			sched_delayed;
+	unsigned char			rel_deadline;
+	unsigned char			custom_slice;
 	/* For load-balancing: */
 	struct load_weight		load;
+	u64				vruntime;
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	__cacheline_group_end(hot);
+#endif
 	struct rb_node			run_node;
 	u64				deadline;
 	u64				min_vruntime;
 	u64				min_slice;
 
 	struct list_head		group_node;
-	unsigned char			on_rq;
-	unsigned char			sched_delayed;
-	unsigned char			rel_deadline;
-	unsigned char			custom_slice;
-					/* hole */
 
 	u64				exec_start;
 	u64				sum_exec_runtime;
 	u64				prev_sum_exec_runtime;
-	u64				vruntime;
 	s64				vlag;
 	u64				slice;
 
 	u64				nr_migrations;
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
-	int				depth;
-	struct sched_entity		*parent;
-	/* rq on which this entity is (to be) queued: */
-	struct cfs_rq			*cfs_rq;
-	/* rq "owned" by this entity/group: */
-	struct cfs_rq			*my_q;
-	/* cached value of my_q->h_nr_running */
-	unsigned long			runnable_weight;
-#endif
-
 #ifdef CONFIG_SMP
 	/*
 	 * Per entity load average tracking.
@@ -610,7 +613,7 @@ struct sched_entity {
 	 */
 	struct sched_avg		avg;
 #endif
-};
+} ____cacheline_aligned;
 
 struct sched_rt_entity {
 	struct list_head		run_list;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ba89cd4f2fac..dcc50df9e8ca 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8525,6 +8525,7 @@ static struct kmem_cache *task_group_cache __ro_after_init;
 #endif
 
 static void __init cfs_rq_struct_check(void);
+static void __init sched_entity_struct_check(void);
 
 void __init sched_init(void)
 {
@@ -8543,6 +8544,7 @@ void __init sched_init(void)
 	BUG_ON(!sched_class_above(&ext_sched_class, &idle_sched_class));
 #endif
 	cfs_rq_struct_check();
+	sched_entity_struct_check();
 	wait_bit_init();
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -10805,3 +10807,21 @@ static void __init cfs_rq_struct_check(void)
 #endif
 #endif
 }
+
+static void __init sched_entity_struct_check(void)
+{
+	/*
+	 * The compile time check is only enabled with CONFIG_FAIR_GROUP_SCHED.
+	 * We care about the placement of six hottest fields below.
+	 */
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, parent);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, cfs_rq);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, my_q);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot,
+				      runnable_weight);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, on_rq);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, load);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, vruntime);
+#endif
+}
-- 
2.49.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ