linux-kernel - [RFC PATCH 2/2] sched/fair: Reorder struct sched

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <20250402212904.8866-3-zecheng@google.com>
Date: Wed,  2 Apr 2025 21:29:02 +0000
From: Zecheng Li <zecheng@...gle.com>
To: Ingo Molnar <mingo@...hat.com>, Peter Zijlstra <peterz@...radead.org>, 
	Juri Lelli <juri.lelli@...hat.com>, Vincent Guittot <vincent.guittot@...aro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@....com>, Steven Rostedt <rostedt@...dmis.org>, 
	Ben Segall <bsegall@...gle.com>, Mel Gorman <mgorman@...e.de>, 
	Valentin Schneider <vschneid@...hat.com>, Xu Liu <xliuprof@...gle.com>, 
	Blake Jones <blakejones@...gle.com>, Josh Don <joshdon@...gle.com>, linux-kernel@...r.kernel.org, 
	Zecheng Li <zecheng@...gle.com>
Subject: [RFC PATCH 2/2] sched/fair: Reorder struct sched_entity

Group the mostly read fields in struct sched_entity to the first
cacheline when `CONFIG_FAIR_GROUP_SCHED` is set. This moves the
additional fields from `CONFIG_FAIR_GROUP_SCHED` to the first cache line
since they are mostly accessed and generally read most. Currently these
fields related to cfs cgroup scheduling is placed on a separate
cacheline from hot fields `load`, `on_rq` and `vruntime`. Although
`depth` is not as hot as other fields, we keep it here to avoid breaking
the #ifdef boundaries.

Also adds a compile time check when `CONFIG_FAIR_GROUP_SCHED` is set to
check the placement of the hot fields.

Signed-off-by: Zecheng Li <zecheng@...gle.com>
---
 include/linux/sched.h | 37 ++++++++++++++++++++-----------------
 kernel/sched/core.c   | 20 ++++++++++++++++++++
 2 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9c15365a30c0..e9f58254999d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -545,40 +545,43 @@ struct sched_statistics {
 } ____cacheline_aligned;
 
 struct sched_entity {
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	/* Group the read most hot fields in sched_entity in a cache line */
+	__cacheline_group_begin_aligned(hot);
+	struct sched_entity		*parent;
+	/* rq on which this entity is (to be) queued: */
+	struct cfs_rq			*cfs_rq;
+	/* rq "owned" by this entity/group: */
+	struct cfs_rq			*my_q;
+	/* cached value of my_q->h_nr_running */
+	unsigned long			runnable_weight;
+	int				depth;
+#endif
+	unsigned char			on_rq;
+	unsigned char			sched_delayed;
+	unsigned char			rel_deadline;
+	unsigned char			custom_slice;
 	/* For load-balancing: */
 	struct load_weight		load;
+	u64				vruntime;
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	__cacheline_group_end_aligned(hot);
+#endif
 	struct rb_node			run_node;
 	u64				deadline;
 	u64				min_vruntime;
 	u64				min_slice;
 
 	struct list_head		group_node;
-	unsigned char			on_rq;
-	unsigned char			sched_delayed;
-	unsigned char			rel_deadline;
-	unsigned char			custom_slice;
-					/* hole */
 
 	u64				exec_start;
 	u64				sum_exec_runtime;
 	u64				prev_sum_exec_runtime;
-	u64				vruntime;
 	s64				vlag;
 	u64				slice;
 
 	u64				nr_migrations;
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
-	int				depth;
-	struct sched_entity		*parent;
-	/* rq on which this entity is (to be) queued: */
-	struct cfs_rq			*cfs_rq;
-	/* rq "owned" by this entity/group: */
-	struct cfs_rq			*my_q;
-	/* cached value of my_q->h_nr_running */
-	unsigned long			runnable_weight;
-#endif
-
 #ifdef CONFIG_SMP
 	/*
 	 * Per entity load average tracking.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 84ee289d98d7..58bcd7d55eca 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8474,6 +8474,7 @@ static struct kmem_cache *task_group_cache __ro_after_init;
 #endif
 
 static void __init cfs_rq_struct_check(void);
+static void __init sched_entity_struct_check(void);
 
 void __init sched_init(void)
 {
@@ -8492,6 +8493,7 @@ void __init sched_init(void)
 	BUG_ON(!sched_class_above(&ext_sched_class, &idle_sched_class));
 #endif
 	cfs_rq_struct_check();
+	sched_entity_struct_check();
 	wait_bit_init();
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -10755,3 +10757,21 @@ static void __init cfs_rq_struct_check(void)
 #endif
 #endif
 }
+
+static void __init sched_entity_struct_check(void)
+{
+	/*
+	 * The compile time check is only enabled with CONFIG_FAIR_GROUP_SCHED.
+	 * We care about the placement of six hottest fields below.
+	 */
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, parent);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, cfs_rq);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, my_q);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot,
+				      runnable_weight);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, on_rq);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, load);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, vruntime);
+#endif
+}
-- 
2.49.0