linux-kernel - Re: [RFC PATCH 2/5] sched/events: Introduce cfs

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 28 Mar 2017 18:44:59 +0200
From:   Peter Zijlstra <peterz@...radead.org>
To:     Steven Rostedt <rostedt@...dmis.org>
Cc:     Dietmar Eggemann <dietmar.eggemann@....com>,
        Ingo Molnar <mingo@...nel.org>,
        LKML <linux-kernel@...r.kernel.org>,
        Matt Fleming <matt@...eblueprint.co.uk>,
        Vincent Guittot <vincent.guittot@...aro.org>,
        Morten Rasmussen <morten.rasmussen@....com>,
        Juri Lelli <juri.lelli@....com>,
        Patrick Bellasi <patrick.bellasi@....com>
Subject: Re: [RFC PATCH 2/5] sched/events: Introduce cfs_rq load tracking
 trace event

On Tue, Mar 28, 2017 at 10:46:00AM -0400, Steven Rostedt wrote:
> On Tue, 28 Mar 2017 07:35:38 +0100
> Dietmar Eggemann <dietmar.eggemann@....com> wrote:
> 
> >  /* This part must be outside protection */
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index 03adf9fb48b1..ac19ab6ced8f 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -2950,6 +2950,9 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa,
> >  		sa->util_avg = sa->util_sum / LOAD_AVG_MAX;
> >  	}
> >  
> > +	if (cfs_rq)
> > +		trace_sched_load_cfs_rq(cfs_rq);
> > +
> 
> Please use TRACE_EVENT_CONDITION(), and test for cfs_rq not NULL.

I too suggested that; but then I looked again at that code and we can
actually do this. cfs_rq can be constant propagated and the if
determined at build time.

Its not immediately obvious from the current code; but if we do
something like the below, it should be clearer.

---
Subject: sched/fair: Explicitly generate __update_load_avg() instances
From: Peter Zijlstra <peterz@...radead.org>
Date: Tue Mar 28 11:08:20 CEST 2017

The __update_load_avg() function is an __always_inline because its
used with constant propagation to generate different variants of the
code without having to duplicate it (which would be prone to bugs).

Explicitly instantiate the 3 variants.

Note that most of this is called from rather hot paths, so reducing
branches is good.

Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
---
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2849,7 +2849,7 @@ static u32 __compute_runnable_contrib(u6
  *            = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}]
  */
 static __always_inline int
-__update_load_avg(u64 now, int cpu, struct sched_avg *sa,
+___update_load_avg(u64 now, int cpu, struct sched_avg *sa,
 		  unsigned long weight, int running, struct cfs_rq *cfs_rq)
 {
 	u64 delta, scaled_delta, periods;
@@ -2953,6 +2953,26 @@ __update_load_avg(u64 now, int cpu, stru
 	return decayed;
 }
 
+static int
+__update_load_avg_blocked_se(u64 now, int cpu, struct sched_avg *sa)
+{
+	return ___update_load_avg(now, cpu, sa, 0, 0, NULL);
+}
+
+static int
+__update_load_avg_se(u64 now, int cpu, struct sched_avg *sa,
+		     unsigned long weight, int running)
+{
+	return ___update_load_avg(now, cpu, sa, weight, running, NULL);
+}
+
+static int
+__update_load_avg(u64 now, int cpu, struct sched_avg *sa,
+		  unsigned long weight, int running, struct cfs_rq *cfs_rq)
+{
+	return ___update_load_avg(now, cpu, sa, weight, running, cfs_rq);
+}
+
 /*
  * Signed add and clamp on underflow.
  *
@@ -3014,6 +3034,9 @@ static inline void update_tg_load_avg(st
 void set_task_rq_fair(struct sched_entity *se,
 		      struct cfs_rq *prev, struct cfs_rq *next)
 {
+	u64 p_last_update_time;
+	u64 n_last_update_time;
+
 	if (!sched_feat(ATTACH_AGE_LOAD))
 		return;
 
@@ -3024,11 +3047,11 @@ void set_task_rq_fair(struct sched_entit
 	 * time. This will result in the wakee task is less decayed, but giving
 	 * the wakee more load sounds not bad.
 	 */
-	if (se->avg.last_update_time && prev) {
-		u64 p_last_update_time;
-		u64 n_last_update_time;
+	if (!(se->avg.last_update_time && prev))
+		return;
 
 #ifndef CONFIG_64BIT
+	{
 		u64 p_last_update_time_copy;
 		u64 n_last_update_time_copy;
 
@@ -3043,14 +3066,15 @@ void set_task_rq_fair(struct sched_entit
 
 		} while (p_last_update_time != p_last_update_time_copy ||
 			 n_last_update_time != n_last_update_time_copy);
+	}
 #else
-		p_last_update_time = prev->avg.last_update_time;
-		n_last_update_time = next->avg.last_update_time;
+	p_last_update_time = prev->avg.last_update_time;
+	n_last_update_time = next->avg.last_update_time;
 #endif
-		__update_load_avg(p_last_update_time, cpu_of(rq_of(prev)),
-				  &se->avg, 0, 0, NULL);
-		se->avg.last_update_time = n_last_update_time;
-	}
+	__update_load_avg_blocked_se(p_last_update_time,
+				     cpu_of(rq_of(prev)),
+				     &se->avg);
+	se->avg.last_update_time = n_last_update_time;
 }
 
 /* Take into account change of utilization of a child task group */
@@ -3329,9 +3353,9 @@ static inline void update_load_avg(struc
 	 * track group sched_entity load average for task_h_load calc in migration
 	 */
 	if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD)) {
-		__update_load_avg(now, cpu, &se->avg,
+		__update_load_avg_se(now, cpu, &se->avg,
 			  se->on_rq * scale_load_down(se->load.weight),
-			  cfs_rq->curr == se, NULL);
+			  cfs_rq->curr == se);
 	}
 
 	decayed  = update_cfs_rq_load_avg(now, cfs_rq, true);
@@ -3437,7 +3461,7 @@ void sync_entity_load_avg(struct sched_e
 	u64 last_update_time;
 
 	last_update_time = cfs_rq_last_update_time(cfs_rq);
-	__update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL);
+	__update_load_avg_blocked_se(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg);
 }
 
 /*