linux-kernel - [PATCH v10 6/7] sched: Provide runnable_load_avg back to cfs

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 15 Jul 2015 08:04:41 +0800
From:	Yuyang Du <yuyang.du@...el.com>
To:	mingo@...nel.org, peterz@...radead.org,
	linux-kernel@...r.kernel.org
Cc:	pjt@...gle.com, bsegall@...gle.com, morten.rasmussen@....com,
	vincent.guittot@...aro.org, dietmar.eggemann@....com,
	umgwanakikbuti@...il.com, len.brown@...el.com,
	rafael.j.wysocki@...el.com, arjan@...ux.intel.com,
	fengguang.wu@...el.com, Yuyang Du <yuyang.du@...el.com>
Subject: [PATCH v10 6/7] sched: Provide runnable_load_avg back to cfs_rq

The cfs_rq's load_avg is composed of runnable_load_avg and blocked_load_avg.
Before this series, sometimes the runnable_load_avg is used, and sometimes
the load_avg is used. Completely replacing all uses of runnable_load_avg
with load_avg may be too big a leap, i.e., the blocked_load_avg is concerned
to result in overrated load. Therefore, we get runnable_load_avg back.

The new cfs_rq's runnable_load_avg is improved to be updated with all of the
runnable sched_eneities at the same time, so the one sched_entity updated and
the others stale problem is solved.

Signed-off-by: Yuyang Du <yuyang.du@...el.com>
---
 kernel/sched/debug.c |  2 ++
 kernel/sched/fair.c  | 57 +++++++++++++++++++++++++++++++++++++++++-----------
 kernel/sched/sched.h |  2 ++
 3 files changed, 49 insertions(+), 12 deletions(-)

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 56d83f3..bfb7bb2 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -205,6 +205,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 #ifdef CONFIG_SMP
 	SEQ_printf(m, "  .%-30s: %lu\n", "load_avg",
 			cfs_rq->avg.load_avg);
+	SEQ_printf(m, "  .%-30s: %lu\n", "runnable_load_avg",
+			cfs_rq->runnable_load_avg);
 	SEQ_printf(m, "  .%-30s: %lu\n", "util_avg",
 			cfs_rq->avg.util_avg);
 	SEQ_printf(m, "  .%-30s: %ld\n", "removed_load_avg",
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 19f1199..a1bffe1 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2545,9 +2545,9 @@ static u32 __compute_runnable_contrib(u64 n)
  *            = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}]
  */
 static __always_inline int __update_load_avg(u64 now, int cpu,
-						struct sched_avg *sa,
-						unsigned long weight,
-						int running)
+					struct sched_avg *sa,
+					unsigned long weight,
+					int running, struct cfs_rq *cfs_rq)
 {
 	u64 delta, periods;
 	u32 contrib;
@@ -2587,8 +2587,11 @@ static __always_inline int __update_load_avg(u64 now, int cpu,
 		 * period and accrue it.
 		 */
 		delta_w = 1024 - delta_w;
-		if (weight)
+		if (weight) {
 			sa->load_sum += weight * delta_w;
+			if (cfs_rq)
+				cfs_rq->runnable_load_sum += weight * delta_w;
+		}
 		if (running)
 			sa->util_sum += delta_w * scale_freq >> SCHED_CAPACITY_SHIFT;
 
@@ -2599,19 +2602,28 @@ static __always_inline int __update_load_avg(u64 now, int cpu,
 		delta %= 1024;
 
 		sa->load_sum = decay_load(sa->load_sum, periods + 1);
+		if (cfs_rq)
+			cfs_rq->runnable_load_sum =
+				decay_load(cfs_rq->runnable_load_sum, periods + 1);
 		sa->util_sum = decay_load((u64)(sa->util_sum), periods + 1);
 
 		/* Efficiently calculate \sum (1..n_period) 1024*y^i */
 		contrib = __compute_runnable_contrib(periods);
-		if (weight)
+		if (weight) {
 			sa->load_sum += weight * contrib;
+			if (cfs_rq)
+				cfs_rq->runnable_load_sum += weight * contrib;
+		}
 		if (running)
 			sa->util_sum += contrib * scale_freq >> SCHED_CAPACITY_SHIFT;
 	}
 
 	/* Remainder of delta accrued against u_0` */
-	if (weight)
+	if (weight) {
 		sa->load_sum += weight * delta;
+		if (cfs_rq)
+			cfs_rq->runnable_load_sum += weight * delta;
+	}
 	if (running)
 		sa->util_sum += delta * scale_freq >> SCHED_CAPACITY_SHIFT;
 
@@ -2619,6 +2631,9 @@ static __always_inline int __update_load_avg(u64 now, int cpu,
 
 	if (decayed) {
 		sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX);
+		if (cfs_rq)
+			cfs_rq->runnable_load_avg =
+				div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX);
 		sa->util_avg = (sa->util_sum << SCHED_LOAD_SHIFT) / LOAD_AVG_MAX;
 	}
 
@@ -2666,7 +2681,7 @@ static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
 	}
 
 	decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
-		scale_load_down(cfs_rq->load.weight), cfs_rq->curr != NULL);
+		scale_load_down(cfs_rq->load.weight), cfs_rq->curr != NULL, cfs_rq);
 
 #ifndef CONFIG_64BIT
 	smp_wmb();
@@ -2688,7 +2703,7 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
 	 * track group sched_entity load average for task_h_load calc in migration
 	 */
 	__update_load_avg(now, cpu, &se->avg,
-		se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se);
+		se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se, NULL);
 
 	if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg)
 		update_tg_load_avg(cfs_rq, 0);
@@ -2708,11 +2723,15 @@ enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	}
 	else {
 		__update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
-			se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se);
+			se->on_rq * scale_load_down(se->load.weight),
+			cfs_rq->curr == se, NULL);
 	}
 
 	decayed = update_cfs_rq_load_avg(now, cfs_rq);
 
+	cfs_rq->runnable_load_avg += sa->load_avg;
+	cfs_rq->runnable_load_sum += sa->load_sum;
+
 	if (migrated) {
 		cfs_rq->avg.load_avg += sa->load_avg;
 		cfs_rq->avg.load_sum += sa->load_sum;
@@ -2724,6 +2743,18 @@ enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		update_tg_load_avg(cfs_rq, 0);
 }
 
+/* Remove the runnable load generated by se from cfs_rq's runnable load average */
+static inline void
+dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	update_load_avg(se, 1);
+
+	cfs_rq->runnable_load_avg =
+		max_t(long, cfs_rq->runnable_load_avg - se->avg.load_avg, 0);
+	cfs_rq->runnable_load_sum =
+		max_t(s64, cfs_rq->runnable_load_sum - se->avg.load_sum, 0);
+}
+
 /*
  * Task first catches up with cfs_rq, and then subtract
  * itself from the cfs_rq (task must be off the queue now).
@@ -2745,7 +2776,7 @@ void remove_entity_load_avg(struct sched_entity *se)
 	last_update_time = cfs_rq->avg.last_update_time;
 #endif
 
-	__update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0);
+	__update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL);
 	atomic_long_add(se->avg.load_avg, &cfs_rq->removed_load_avg);
 	atomic_long_add(se->avg.util_avg, &cfs_rq->removed_util_avg);
 }
@@ -2775,6 +2806,8 @@ static int idle_balance(struct rq *this_rq);
 static inline void update_load_avg(struct sched_entity *se, int update_tg) {}
 static inline void
 enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
+static inline void
+dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
 static inline void remove_entity_load_avg(struct sched_entity *se) {}
 
 static inline int idle_balance(struct rq *rq)
@@ -2982,7 +3015,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	 * Update run-time statistics of the 'current'.
 	 */
 	update_curr(cfs_rq);
-	update_load_avg(se, 1);
+	dequeue_entity_load_avg(cfs_rq, se);
 
 	update_stats_dequeue(cfs_rq, se);
 	if (flags & DEQUEUE_SLEEP) {
@@ -7899,7 +7932,7 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p)
 #ifdef CONFIG_SMP
 	/* Catch up with the cfs_rq and remove our load when we leave */
 	__update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq), &se->avg,
-		se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se);
+		se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se, NULL);
 
 	cfs_rq->avg.load_avg =
 		max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index f2b17ea..118f5ae 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -368,6 +368,8 @@ struct cfs_rq {
 	 * CFS load tracking
 	 */
 	struct sched_avg avg;
+	u64 runnable_load_sum;
+	unsigned long runnable_load_avg;
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	unsigned long tg_load_avg_contrib;
 #endif
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/