linux-kernel - [PATCH] sched/fair: properly serialize the cfs_rq h

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [thread-next>] [day] [month] [year] [list]

Message-ID: <20241122152856.3533625-1-neelx@suse.com>
Date: Fri, 22 Nov 2024 16:28:55 +0100
From: Daniel Vacek <neelx@...e.com>
To: Ingo Molnar <mingo@...hat.com>,
	Peter Zijlstra <peterz@...radead.org>,
	Juri Lelli <juri.lelli@...hat.com>,
	Vincent Guittot <vincent.guittot@...aro.org>,
	Dietmar Eggemann <dietmar.eggemann@....com>,
	Steven Rostedt <rostedt@...dmis.org>,
	Ben Segall <bsegall@...gle.com>,
	Mel Gorman <mgorman@...e.de>,
	Valentin Schneider <vschneid@...hat.com>
Cc: Daniel Vacek <neelx@...e.com>,
	linux-kernel@...r.kernel.org
Subject: [PATCH] sched/fair: properly serialize the cfs_rq h_load calculation

Make sure the given cfs_rq's h_load is always correctly updated. This
prevents a race between more CPUs eventually updating the same hierarchy
of h_load_next return pointers.

Signed-off-by: Daniel Vacek <neelx@...e.com>
---
 kernel/sched/fair.c | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2d16c8545c71..50794ba0db75 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9786,6 +9786,8 @@ static bool __update_blocked_fair(struct rq *rq, bool *done)
 	return decayed;
 }
 
+static DEFINE_PER_CPU(raw_spinlock_t, h_load_lock);
+
 /*
  * Compute the hierarchical load factor for cfs_rq and all its ascendants.
  * This needs to be done in a top-down fashion because the load of a child
@@ -9793,18 +9795,26 @@ static bool __update_blocked_fair(struct rq *rq, bool *done)
  */
 static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq)
 {
-	struct rq *rq = rq_of(cfs_rq);
-	struct sched_entity *se = cfs_rq->tg->se[cpu_of(rq)];
+	int cpu = cpu_of(rq_of(cfs_rq));
+	struct sched_entity *se = cfs_rq->tg->se[cpu];
+	raw_spinlock_t * lock;
 	unsigned long now = jiffies;
 	unsigned long load;
 
 	if (cfs_rq->last_h_load_update == now)
 		return;
 
-	WRITE_ONCE(cfs_rq->h_load_next, NULL);
+	/* Protects cfs_rq->h_load_next and cfs_rq->last_h_load_update */
+	raw_spin_lock(lock = &per_cpu(h_load_lock, cpu));
+
+	now = jiffies;
+	if (cfs_rq->last_h_load_update == now)
+		goto unlock;
+
+	cfs_rq->h_load_next = NULL;
 	for_each_sched_entity(se) {
 		cfs_rq = cfs_rq_of(se);
-		WRITE_ONCE(cfs_rq->h_load_next, se);
+		cfs_rq->h_load_next = se;
 		if (cfs_rq->last_h_load_update == now)
 			break;
 	}
@@ -9814,7 +9824,7 @@ static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq)
 		cfs_rq->last_h_load_update = now;
 	}
 
-	while ((se = READ_ONCE(cfs_rq->h_load_next)) != NULL) {
+	while ((se = cfs_rq->h_load_next) != NULL) {
 		load = cfs_rq->h_load;
 		load = div64_ul(load * se->avg.load_avg,
 			cfs_rq_load_avg(cfs_rq) + 1);
@@ -9822,6 +9832,8 @@ static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq)
 		cfs_rq->h_load = load;
 		cfs_rq->last_h_load_update = now;
 	}
+unlock:
+	raw_spin_unlock(lock);
 }
 
 static unsigned long task_h_load(struct task_struct *p)
@@ -13665,6 +13677,9 @@ __init void init_sched_fair_class(void)
 		zalloc_cpumask_var_node(&per_cpu(should_we_balance_tmpmask, i),
 					GFP_KERNEL, cpu_to_node(i));
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
+		raw_spin_lock_init(&per_cpu(h_load_lock, i));
+#endif
 #ifdef CONFIG_CFS_BANDWIDTH
 		INIT_CSD(&cpu_rq(i)->cfsb_csd, __cfsb_csd_unthrottle, cpu_rq(i));
 		INIT_LIST_HEAD(&cpu_rq(i)->cfsb_csd_list);
-- 
2.45.2