lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Message-ID: <20171212123520.GA1972@e108498-lin.cambridge.arm.com> Date: Tue, 12 Dec 2017 12:35:20 +0000 From: Quentin Perret <quentin.perret@....com> To: Vincent Guittot <vincent.guittot@...aro.org> Cc: Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...nel.org>, linux-kernel <linux-kernel@...r.kernel.org>, "rjw@...ysocki.net" <rjw@...ysocki.net>, Juri Lelli <juri.lelli@....com>, Dietmar Eggemann <dietmar.eggemann@....com>, Morten Rasmussen <Morten.Rasmussen@....com>, viresh kumar <viresh.kumar@...aro.org> Subject: Re: [PATCH v3 1/3] sched/pelt: Move pelt related code in a dedicated file On Tuesday 12 Dec 2017 at 12:21:39 (+0100), Vincent Guittot wrote: > Hi Quentin, > > On 11 December 2017 at 15:08, Quentin Perret <quentin.perret@....com> wrote: > > Hi Vincent, > > > > Although I agree that moving the PELT code in a dedicated file is > > probably the cleanest way to achieve what you want, I was wondering if > > you were able no measure any overhead due to moving the __update_load_avg_*() > > functions in a different translation unit ? This is introducing function calls > > in latency sensitive paths (wakeup for ex) so I'm just wondering what's > > the impact in practice. > > To answer your question, I haven't seen any performance difference > with perf bench sched pipe on a hikey board with the patchset > Then, where do you think that this will explicitly introduce function > call ? For example, without the patch, __update_load_avg_blocked_se() was static and the compiler had the opportunity to inline it where appropriate. With the patch, the function is now in a different translation unit so I think an actual function call is always needed. I'm not saying it's a big deal, I was just wondering if that might have a noticeable impact. The only alternative I see is to put the rt rq load update functions in fair.c but that doesn't quiet feel like the right thing to do anyway ... > It's not obvious for me that the patch that moves pelt code, > will generate such changes. size vmlinux gives me the exact same > results with and without the patch that moves pelt code. > size vmlinux of latest tip/sched/core sha1 a555e9d86ee3 > $ size vmlinux > text data bss dec hex filename > 10677259 5931388 402104 17010751 103903f vmlinux_orig > > size vmlinux with patch 1 rebased on latest tip/sched/core > $ size vmlinux > text data bss dec hex filename > 10677259 5931388 402104 17010751 103903f vmlinux_patch_rt > > > > > Thanks, > > Quentin > > > > On Wednesday 22 Nov 2017 at 15:35:53 (+0100), Vincent Guittot wrote: > >> We want to track rt_rq's utilization as a part of the estimation of the > >> whole rq's utilization. This is necessary because rt tasks can steal > >> utilization to cfs tasks and make them lighter than they are. > >> As we want to use the same load tracking mecanism for both and prevent > >> useless dependency between cfs and rt code, pelt code is moved in a > >> dedicated file. > >> > >> Signed-off-by: Vincent Guittot <vincent.guittot@...aro.org> > >> --- > >> kernel/sched/Makefile | 2 +- > >> kernel/sched/fair.c | 308 +------------------------------------------------- > >> kernel/sched/pelt.c | 308 ++++++++++++++++++++++++++++++++++++++++++++++++++ > >> kernel/sched/pelt.h | 17 +++ > >> kernel/sched/sched.h | 20 ++++ > >> 5 files changed, 347 insertions(+), 308 deletions(-) > >> create mode 100644 kernel/sched/pelt.c > >> create mode 100644 kernel/sched/pelt.h > >> > >> diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile > >> index e2f9d4f..5a6d1c1 100644 > >> --- a/kernel/sched/Makefile > >> +++ b/kernel/sched/Makefile > >> @@ -19,7 +19,7 @@ endif > >> obj-y += core.o loadavg.o clock.o cputime.o > >> obj-y += idle_task.o fair.o rt.o deadline.o > >> obj-y += wait.o wait_bit.o swait.o completion.o idle.o > >> -obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o > >> +obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o > >> obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o > >> obj-$(CONFIG_SCHEDSTATS) += stats.o > >> obj-$(CONFIG_SCHED_DEBUG) += debug.o > >> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > >> index 0989676..b88550e 100644 > >> --- a/kernel/sched/fair.c > >> +++ b/kernel/sched/fair.c > >> @@ -270,9 +270,6 @@ static inline struct rq *rq_of(struct cfs_rq *cfs_rq) > >> return cfs_rq->rq; > >> } > >> > >> -/* An entity is a task if it doesn't "own" a runqueue */ > >> -#define entity_is_task(se) (!se->my_q) > >> - > >> static inline struct task_struct *task_of(struct sched_entity *se) > >> { > >> SCHED_WARN_ON(!entity_is_task(se)); > >> @@ -434,7 +431,6 @@ static inline struct rq *rq_of(struct cfs_rq *cfs_rq) > >> return container_of(cfs_rq, struct rq, cfs); > >> } > >> > >> -#define entity_is_task(se) 1 > >> > >> #define for_each_sched_entity(se) \ > >> for (; se; se = NULL) > >> @@ -707,7 +703,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) > >> } > >> > >> #ifdef CONFIG_SMP > >> - > >> +#include "pelt.h" > >> #include "sched-pelt.h" > >> > >> static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu); > >> @@ -2723,19 +2719,6 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) > >> } while (0) > >> > >> #ifdef CONFIG_SMP > >> -/* > >> - * XXX we want to get rid of these helpers and use the full load resolution. > >> - */ > >> -static inline long se_weight(struct sched_entity *se) > >> -{ > >> - return scale_load_down(se->load.weight); > >> -} > >> - > >> -static inline long se_runnable(struct sched_entity *se) > >> -{ > >> - return scale_load_down(se->runnable_weight); > >> -} > >> - > >> static inline void > >> enqueue_runnable_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) > >> { > >> @@ -3038,289 +3021,6 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) > >> } > >> > >> #ifdef CONFIG_SMP > >> -/* > >> - * Approximate: > >> - * val * y^n, where y^32 ~= 0.5 (~1 scheduling period) > >> - */ > >> -static u64 decay_load(u64 val, u64 n) > >> -{ > >> - unsigned int local_n; > >> - > >> - if (unlikely(n > LOAD_AVG_PERIOD * 63)) > >> - return 0; > >> - > >> - /* after bounds checking we can collapse to 32-bit */ > >> - local_n = n; > >> - > >> - /* > >> - * As y^PERIOD = 1/2, we can combine > >> - * y^n = 1/2^(n/PERIOD) * y^(n%PERIOD) > >> - * With a look-up table which covers y^n (n<PERIOD) > >> - * > >> - * To achieve constant time decay_load. > >> - */ > >> - if (unlikely(local_n >= LOAD_AVG_PERIOD)) { > >> - val >>= local_n / LOAD_AVG_PERIOD; > >> - local_n %= LOAD_AVG_PERIOD; > >> - } > >> - > >> - val = mul_u64_u32_shr(val, runnable_avg_yN_inv[local_n], 32); > >> - return val; > >> -} > >> - > >> -static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3) > >> -{ > >> - u32 c1, c2, c3 = d3; /* y^0 == 1 */ > >> - > >> - /* > >> - * c1 = d1 y^p > >> - */ > >> - c1 = decay_load((u64)d1, periods); > >> - > >> - /* > >> - * p-1 > >> - * c2 = 1024 \Sum y^n > >> - * n=1 > >> - * > >> - * inf inf > >> - * = 1024 ( \Sum y^n - \Sum y^n - y^0 ) > >> - * n=0 n=p > >> - */ > >> - c2 = LOAD_AVG_MAX - decay_load(LOAD_AVG_MAX, periods) - 1024; > >> - > >> - return c1 + c2 + c3; > >> -} > >> - > >> -#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT) > >> - > >> -/* > >> - * Accumulate the three separate parts of the sum; d1 the remainder > >> - * of the last (incomplete) period, d2 the span of full periods and d3 > >> - * the remainder of the (incomplete) current period. > >> - * > >> - * d1 d2 d3 > >> - * ^ ^ ^ > >> - * | | | > >> - * |<->|<----------------->|<--->| > >> - * ... |---x---|------| ... |------|-----x (now) > >> - * > >> - * p-1 > >> - * u' = (u + d1) y^p + 1024 \Sum y^n + d3 y^0 > >> - * n=1 > >> - * > >> - * = u y^p + (Step 1) > >> - * > >> - * p-1 > >> - * d1 y^p + 1024 \Sum y^n + d3 y^0 (Step 2) > >> - * n=1 > >> - */ > >> -static __always_inline u32 > >> -accumulate_sum(u64 delta, int cpu, struct sched_avg *sa, > >> - unsigned long load, unsigned long runnable, int running) > >> -{ > >> - unsigned long scale_freq, scale_cpu; > >> - u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */ > >> - u64 periods; > >> - > >> - scale_freq = arch_scale_freq_capacity(NULL, cpu); > >> - scale_cpu = arch_scale_cpu_capacity(NULL, cpu); > >> - > >> - delta += sa->period_contrib; > >> - periods = delta / 1024; /* A period is 1024us (~1ms) */ > >> - > >> - /* > >> - * Step 1: decay old *_sum if we crossed period boundaries. > >> - */ > >> - if (periods) { > >> - sa->load_sum = decay_load(sa->load_sum, periods); > >> - sa->runnable_load_sum = > >> - decay_load(sa->runnable_load_sum, periods); > >> - sa->util_sum = decay_load((u64)(sa->util_sum), periods); > >> - > >> - /* > >> - * Step 2 > >> - */ > >> - delta %= 1024; > >> - contrib = __accumulate_pelt_segments(periods, > >> - 1024 - sa->period_contrib, delta); > >> - } > >> - sa->period_contrib = delta; > >> - > >> - contrib = cap_scale(contrib, scale_freq); > >> - if (load) > >> - sa->load_sum += load * contrib; > >> - if (runnable) > >> - sa->runnable_load_sum += runnable * contrib; > >> - if (running) > >> - sa->util_sum += contrib * scale_cpu; > >> - > >> - return periods; > >> -} > >> - > >> -/* > >> - * We can represent the historical contribution to runnable average as the > >> - * coefficients of a geometric series. To do this we sub-divide our runnable > >> - * history into segments of approximately 1ms (1024us); label the segment that > >> - * occurred N-ms ago p_N, with p_0 corresponding to the current period, e.g. > >> - * > >> - * [<- 1024us ->|<- 1024us ->|<- 1024us ->| ... > >> - * p0 p1 p2 > >> - * (now) (~1ms ago) (~2ms ago) > >> - * > >> - * Let u_i denote the fraction of p_i that the entity was runnable. > >> - * > >> - * We then designate the fractions u_i as our co-efficients, yielding the > >> - * following representation of historical load: > >> - * u_0 + u_1*y + u_2*y^2 + u_3*y^3 + ... > >> - * > >> - * We choose y based on the with of a reasonably scheduling period, fixing: > >> - * y^32 = 0.5 > >> - * > >> - * This means that the contribution to load ~32ms ago (u_32) will be weighted > >> - * approximately half as much as the contribution to load within the last ms > >> - * (u_0). > >> - * > >> - * When a period "rolls over" and we have new u_0`, multiplying the previous > >> - * sum again by y is sufficient to update: > >> - * load_avg = u_0` + y*(u_0 + u_1*y + u_2*y^2 + ... ) > >> - * = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}] > >> - */ > >> -static __always_inline int > >> -___update_load_sum(u64 now, int cpu, struct sched_avg *sa, > >> - unsigned long load, unsigned long runnable, int running) > >> -{ > >> - u64 delta; > >> - > >> - delta = now - sa->last_update_time; > >> - /* > >> - * This should only happen when time goes backwards, which it > >> - * unfortunately does during sched clock init when we swap over to TSC. > >> - */ > >> - if ((s64)delta < 0) { > >> - sa->last_update_time = now; > >> - return 0; > >> - } > >> - > >> - /* > >> - * Use 1024ns as the unit of measurement since it's a reasonable > >> - * approximation of 1us and fast to compute. > >> - */ > >> - delta >>= 10; > >> - if (!delta) > >> - return 0; > >> - > >> - sa->last_update_time += delta << 10; > >> - > >> - /* > >> - * running is a subset of runnable (weight) so running can't be set if > >> - * runnable is clear. But there are some corner cases where the current > >> - * se has been already dequeued but cfs_rq->curr still points to it. > >> - * This means that weight will be 0 but not running for a sched_entity > >> - * but also for a cfs_rq if the latter becomes idle. As an example, > >> - * this happens during idle_balance() which calls > >> - * update_blocked_averages() > >> - */ > >> - if (!load) > >> - runnable = running = 0; > >> - > >> - /* > >> - * Now we know we crossed measurement unit boundaries. The *_avg > >> - * accrues by two steps: > >> - * > >> - * Step 1: accumulate *_sum since last_update_time. If we haven't > >> - * crossed period boundaries, finish. > >> - */ > >> - if (!accumulate_sum(delta, cpu, sa, load, runnable, running)) > >> - return 0; > >> - > >> - return 1; > >> -} > >> - > >> -static __always_inline void > >> -___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runnable) > >> -{ > >> - u32 divider = LOAD_AVG_MAX - 1024 + sa->period_contrib; > >> - > >> - /* > >> - * Step 2: update *_avg. > >> - */ > >> - sa->load_avg = div_u64(load * sa->load_sum, divider); > >> - sa->runnable_load_avg = div_u64(runnable * sa->runnable_load_sum, divider); > >> - sa->util_avg = sa->util_sum / divider; > >> -} > >> - > >> -/* > >> - * sched_entity: > >> - * > >> - * task: > >> - * se_runnable() == se_weight() > >> - * > >> - * group: [ see update_cfs_group() ] > >> - * se_weight() = tg->weight * grq->load_avg / tg->load_avg > >> - * se_runnable() = se_weight(se) * grq->runnable_load_avg / grq->load_avg > >> - * > >> - * load_sum := runnable_sum > >> - * load_avg = se_weight(se) * runnable_avg > >> - * > >> - * runnable_load_sum := runnable_sum > >> - * runnable_load_avg = se_runnable(se) * runnable_avg > >> - * > >> - * XXX collapse load_sum and runnable_load_sum > >> - * > >> - * cfq_rs: > >> - * > >> - * load_sum = \Sum se_weight(se) * se->avg.load_sum > >> - * load_avg = \Sum se->avg.load_avg > >> - * > >> - * runnable_load_sum = \Sum se_runnable(se) * se->avg.runnable_load_sum > >> - * runnable_load_avg = \Sum se->avg.runable_load_avg > >> - */ > >> - > >> -static int > >> -__update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se) > >> -{ > >> - if (entity_is_task(se)) > >> - se->runnable_weight = se->load.weight; > >> - > >> - if (___update_load_sum(now, cpu, &se->avg, 0, 0, 0)) { > >> - ___update_load_avg(&se->avg, se_weight(se), se_runnable(se)); > >> - return 1; > >> - } > >> - > >> - return 0; > >> -} > >> - > >> -static int > >> -__update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se) > >> -{ > >> - if (entity_is_task(se)) > >> - se->runnable_weight = se->load.weight; > >> - > >> - if (___update_load_sum(now, cpu, &se->avg, !!se->on_rq, !!se->on_rq, > >> - cfs_rq->curr == se)) { > >> - > >> - ___update_load_avg(&se->avg, se_weight(se), se_runnable(se)); > >> - return 1; > >> - } > >> - > >> - return 0; > >> -} > >> - > >> -static int > >> -__update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq) > >> -{ > >> - if (___update_load_sum(now, cpu, &cfs_rq->avg, > >> - scale_load_down(cfs_rq->load.weight), > >> - scale_load_down(cfs_rq->runnable_weight), > >> - cfs_rq->curr != NULL)) { > >> - > >> - ___update_load_avg(&cfs_rq->avg, 1, 1); > >> - return 1; > >> - } > >> - > >> - return 0; > >> -} > >> - > >> #ifdef CONFIG_FAIR_GROUP_SCHED > >> /** > >> * update_tg_load_avg - update the tg's load avg > >> @@ -3831,12 +3531,6 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf); > >> > >> #else /* CONFIG_SMP */ > >> > >> -static inline int > >> -update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq) > >> -{ > >> - return 0; > >> -} > >> - > >> #define UPDATE_TG 0x0 > >> #define SKIP_AGE_LOAD 0x0 > >> #define DO_ATTACH 0x0 > >> diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c > >> new file mode 100644 > >> index 0000000..da6d84f > >> --- /dev/null > >> +++ b/kernel/sched/pelt.c > >> @@ -0,0 +1,308 @@ > >> +/* > >> + * Per Entity Load Tracking > >> + * > >> + * Copyright (C) 2007 Red Hat, Inc., Ingo Molnar <mingo@...hat.com> > >> + * > >> + * Interactivity improvements by Mike Galbraith > >> + * (C) 2007 Mike Galbraith <efault@....de> > >> + * > >> + * Various enhancements by Dmitry Adamushko. > >> + * (C) 2007 Dmitry Adamushko <dmitry.adamushko@...il.com> > >> + * > >> + * Group scheduling enhancements by Srivatsa Vaddagiri > >> + * Copyright IBM Corporation, 2007 > >> + * Author: Srivatsa Vaddagiri <vatsa@...ux.vnet.ibm.com> > >> + * > >> + * Scaled math optimizations by Thomas Gleixner > >> + * Copyright (C) 2007, Thomas Gleixner <tglx@...utronix.de> > >> + * > >> + * Adaptive scheduling granularity, math enhancements by Peter Zijlstra > >> + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra > >> + * > >> + * Move PELT related code from fair.c into this pelt.c file > >> + * Author: Vincent Guittot <vincent.guittot@...aro.org> > >> + */ > >> + > >> +#include <linux/sched.h> > >> +#include "sched.h" > >> +#include "sched-pelt.h" > >> + > >> +/* > >> + * Approximate: > >> + * val * y^n, where y^32 ~= 0.5 (~1 scheduling period) > >> + */ > >> +static u64 decay_load(u64 val, u64 n) > >> +{ > >> + unsigned int local_n; > >> + > >> + if (unlikely(n > LOAD_AVG_PERIOD * 63)) > >> + return 0; > >> + > >> + /* after bounds checking we can collapse to 32-bit */ > >> + local_n = n; > >> + > >> + /* > >> + * As y^PERIOD = 1/2, we can combine > >> + * y^n = 1/2^(n/PERIOD) * y^(n%PERIOD) > >> + * With a look-up table which covers y^n (n<PERIOD) > >> + * > >> + * To achieve constant time decay_load. > >> + */ > >> + if (unlikely(local_n >= LOAD_AVG_PERIOD)) { > >> + val >>= local_n / LOAD_AVG_PERIOD; > >> + local_n %= LOAD_AVG_PERIOD; > >> + } > >> + > >> + val = mul_u64_u32_shr(val, runnable_avg_yN_inv[local_n], 32); > >> + return val; > >> +} > >> + > >> +static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3) > >> +{ > >> + u32 c1, c2, c3 = d3; /* y^0 == 1 */ > >> + > >> + /* > >> + * c1 = d1 y^p > >> + */ > >> + c1 = decay_load((u64)d1, periods); > >> + > >> + /* > >> + * p-1 > >> + * c2 = 1024 \Sum y^n > >> + * n=1 > >> + * > >> + * inf inf > >> + * = 1024 ( \Sum y^n - \Sum y^n - y^0 ) > >> + * n=0 n=p > >> + */ > >> + c2 = LOAD_AVG_MAX - decay_load(LOAD_AVG_MAX, periods) - 1024; > >> + > >> + return c1 + c2 + c3; > >> +} > >> + > >> +#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT) > >> + > >> +/* > >> + * Accumulate the three separate parts of the sum; d1 the remainder > >> + * of the last (incomplete) period, d2 the span of full periods and d3 > >> + * the remainder of the (incomplete) current period. > >> + * > >> + * d1 d2 d3 > >> + * ^ ^ ^ > >> + * | | | > >> + * |<->|<----------------->|<--->| > >> + * ... |---x---|------| ... |------|-----x (now) > >> + * > >> + * p-1 > >> + * u' = (u + d1) y^p + 1024 \Sum y^n + d3 y^0 > >> + * n=1 > >> + * > >> + * = u y^p + (Step 1) > >> + * > >> + * p-1 > >> + * d1 y^p + 1024 \Sum y^n + d3 y^0 (Step 2) > >> + * n=1 > >> + */ > >> +static __always_inline u32 > >> +accumulate_sum(u64 delta, int cpu, struct sched_avg *sa, > >> + unsigned long load, unsigned long runnable, int running) > >> +{ > >> + unsigned long scale_freq, scale_cpu; > >> + u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */ > >> + u64 periods; > >> + > >> + scale_freq = arch_scale_freq_capacity(NULL, cpu); > >> + scale_cpu = arch_scale_cpu_capacity(NULL, cpu); > >> + > >> + delta += sa->period_contrib; > >> + periods = delta / 1024; /* A period is 1024us (~1ms) */ > >> + > >> + /* > >> + * Step 1: decay old *_sum if we crossed period boundaries. > >> + */ > >> + if (periods) { > >> + sa->load_sum = decay_load(sa->load_sum, periods); > >> + sa->runnable_load_sum = > >> + decay_load(sa->runnable_load_sum, periods); > >> + sa->util_sum = decay_load((u64)(sa->util_sum), periods); > >> + > >> + /* > >> + * Step 2 > >> + */ > >> + delta %= 1024; > >> + contrib = __accumulate_pelt_segments(periods, > >> + 1024 - sa->period_contrib, delta); > >> + } > >> + sa->period_contrib = delta; > >> + > >> + contrib = cap_scale(contrib, scale_freq); > >> + if (load) > >> + sa->load_sum += load * contrib; > >> + if (runnable) > >> + sa->runnable_load_sum += runnable * contrib; > >> + if (running) > >> + sa->util_sum += contrib * scale_cpu; > >> + > >> + return periods; > >> +} > >> + > >> +/* > >> + * We can represent the historical contribution to runnable average as the > >> + * coefficients of a geometric series. To do this we sub-divide our runnable > >> + * history into segments of approximately 1ms (1024us); label the segment that > >> + * occurred N-ms ago p_N, with p_0 corresponding to the current period, e.g. > >> + * > >> + * [<- 1024us ->|<- 1024us ->|<- 1024us ->| ... > >> + * p0 p1 p2 > >> + * (now) (~1ms ago) (~2ms ago) > >> + * > >> + * Let u_i denote the fraction of p_i that the entity was runnable. > >> + * > >> + * We then designate the fractions u_i as our co-efficients, yielding the > >> + * following representation of historical load: > >> + * u_0 + u_1*y + u_2*y^2 + u_3*y^3 + ... > >> + * > >> + * We choose y based on the with of a reasonably scheduling period, fixing: > >> + * y^32 = 0.5 > >> + * > >> + * This means that the contribution to load ~32ms ago (u_32) will be weighted > >> + * approximately half as much as the contribution to load within the last ms > >> + * (u_0). > >> + * > >> + * When a period "rolls over" and we have new u_0`, multiplying the previous > >> + * sum again by y is sufficient to update: > >> + * load_avg = u_0` + y*(u_0 + u_1*y + u_2*y^2 + ... ) > >> + * = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}] > >> + */ > >> +static __always_inline int > >> +___update_load_sum(u64 now, int cpu, struct sched_avg *sa, > >> + unsigned long load, unsigned long runnable, int running) > >> +{ > >> + u64 delta; > >> + > >> + delta = now - sa->last_update_time; > >> + /* > >> + * This should only happen when time goes backwards, which it > >> + * unfortunately does during sched clock init when we swap over to TSC. > >> + */ > >> + if ((s64)delta < 0) { > >> + sa->last_update_time = now; > >> + return 0; > >> + } > >> + > >> + /* > >> + * Use 1024ns as the unit of measurement since it's a reasonable > >> + * approximation of 1us and fast to compute. > >> + */ > >> + delta >>= 10; > >> + if (!delta) > >> + return 0; > >> + > >> + sa->last_update_time += delta << 10; > >> + > >> + /* > >> + * running is a subset of runnable (weight) so running can't be set if > >> + * runnable is clear. But there are some corner cases where the current > >> + * se has been already dequeued but cfs_rq->curr still points to it. > >> + * This means that weight will be 0 but not running for a sched_entity > >> + * but also for a cfs_rq if the latter becomes idle. As an example, > >> + * this happens during idle_balance() which calls > >> + * update_blocked_averages() > >> + */ > >> + if (!load) > >> + runnable = running = 0; > >> + > >> + /* > >> + * Now we know we crossed measurement unit boundaries. The *_avg > >> + * accrues by two steps: > >> + * > >> + * Step 1: accumulate *_sum since last_update_time. If we haven't > >> + * crossed period boundaries, finish. > >> + */ > >> + if (!accumulate_sum(delta, cpu, sa, load, runnable, running)) > >> + return 0; > >> + > >> + return 1; > >> +} > >> + > >> +static __always_inline void > >> +___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runnable) > >> +{ > >> + u32 divider = LOAD_AVG_MAX - 1024 + sa->period_contrib; > >> + > >> + /* > >> + * Step 2: update *_avg. > >> + */ > >> + sa->load_avg = div_u64(load * sa->load_sum, divider); > >> + sa->runnable_load_avg = div_u64(runnable * sa->runnable_load_sum, divider); > >> + sa->util_avg = sa->util_sum / divider; > >> +} > >> + > >> +/* > >> + * sched_entity: > >> + * > >> + * task: > >> + * se_runnable() == se_weight() > >> + * > >> + * group: [ see update_cfs_group() ] > >> + * se_weight() = tg->weight * grq->load_avg / tg->load_avg > >> + * se_runnable() = se_weight(se) * grq->runnable_load_avg / grq->load_avg > >> + * > >> + * load_sum := runnable_sum > >> + * load_avg = se_weight(se) * runnable_avg > >> + * > >> + * runnable_load_sum := runnable_sum > >> + * runnable_load_avg = se_runnable(se) * runnable_avg > >> + * > >> + * XXX collapse load_sum and runnable_load_sum > >> + * > >> + * cfq_rs: > >> + * > >> + * load_sum = \Sum se_weight(se) * se->avg.load_sum > >> + * load_avg = \Sum se->avg.load_avg > >> + * > >> + * runnable_load_sum = \Sum se_runnable(se) * se->avg.runnable_load_sum > >> + * runnable_load_avg = \Sum se->avg.runable_load_avg > >> + */ > >> + > >> +int __update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se) > >> +{ > >> + if (entity_is_task(se)) > >> + se->runnable_weight = se->load.weight; > >> + > >> + if (___update_load_sum(now, cpu, &se->avg, 0, 0, 0)) { > >> + ___update_load_avg(&se->avg, se_weight(se), se_runnable(se)); > >> + return 1; > >> + } > >> + > >> + return 0; > >> +} > >> + > >> +int __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se) > >> +{ > >> + if (entity_is_task(se)) > >> + se->runnable_weight = se->load.weight; > >> + > >> + if (___update_load_sum(now, cpu, &se->avg, !!se->on_rq, !!se->on_rq, > >> + cfs_rq->curr == se)) { > >> + > >> + ___update_load_avg(&se->avg, se_weight(se), se_runnable(se)); > >> + return 1; > >> + } > >> + > >> + return 0; > >> +} > >> + > >> +int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq) > >> +{ > >> + if (___update_load_sum(now, cpu, &cfs_rq->avg, > >> + scale_load_down(cfs_rq->load.weight), > >> + scale_load_down(cfs_rq->runnable_weight), > >> + cfs_rq->curr != NULL)) { > >> + > >> + ___update_load_avg(&cfs_rq->avg, 1, 1); > >> + return 1; > >> + } > >> + > >> + return 0; > >> +} > >> diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h > >> new file mode 100644 > >> index 0000000..c312d8c > >> --- /dev/null > >> +++ b/kernel/sched/pelt.h > >> @@ -0,0 +1,17 @@ > >> +#ifdef CONFIG_SMP > >> + > >> +int __update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se); > >> +int __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se); > >> +int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq); > >> + > >> +#else > >> + > >> +static inline int > >> +update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq) > >> +{ > >> + return 0; > >> +} > >> + > >> +#endif > >> + > >> + > >> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h > >> index 45ab0bf..6fefef6 100644 > >> --- a/kernel/sched/sched.h > >> +++ b/kernel/sched/sched.h > >> @@ -528,6 +528,7 @@ struct rt_rq { > >> unsigned long rt_nr_total; > >> int overloaded; > >> struct plist_head pushable_tasks; > >> + > >> #endif /* CONFIG_SMP */ > >> int rt_queued; > >> > >> @@ -602,7 +603,26 @@ struct dl_rq { > >> u64 bw_ratio; > >> }; > >> > >> +#ifdef CONFIG_FAIR_GROUP_SCHED > >> +/* An entity is a task if it doesn't "own" a runqueue */ > >> +#define entity_is_task(se) (!se->my_q) > >> +#else > >> +#define entity_is_task(se) 1 > >> +#endif > >> + > >> #ifdef CONFIG_SMP > >> +/* > >> + * XXX we want to get rid of these helpers and use the full load resolution. > >> + */ > >> +static inline long se_weight(struct sched_entity *se) > >> +{ > >> + return scale_load_down(se->load.weight); > >> +} > >> + > >> +static inline long se_runnable(struct sched_entity *se) > >> +{ > >> + return scale_load_down(se->runnable_weight); > >> +} > >> > >> static inline bool sched_asym_prefer(int a, int b) > >> { > >> -- > >> 2.7.4 > >>
Powered by blists - more mailing lists