linux-kernel - Re: [PATCH 02/30] sched: revert the revert of: weight calculations

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20080630180702.GC23606@balbir.in.ibm.com>
Date:	Mon, 30 Jun 2008 23:37:02 +0530
From:	Balbir Singh <balbir@...ux.vnet.ibm.com>
To:	Peter Zijlstra <a.p.zijlstra@...llo.nl>
Cc:	linux-kernel@...r.kernel.org, Ingo Molnar <mingo@...e.hu>,
	Srivatsa Vaddagiri <vatsa@...ux.vnet.ibm.com>,
	Mike Galbraith <efault@....de>
Subject: Re: [PATCH 02/30] sched: revert the revert of: weight calculations

* Peter Zijlstra <a.p.zijlstra@...llo.nl> [2008-06-27 13:41:11]:

> Try again..
> 
> initial commit: 8f1bc385cfbab474db6c27b5af1e439614f3025c
> revert: f9305d4a0968201b2818dbed0dc8cb0d4ee7aeb3
> 
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@...llo.nl>
> ---
> 
> ---
>  kernel/sched.c          |    9 +---
>  kernel/sched_fair.c     |  105 ++++++++++++++++++++++++++++++++----------------
>  kernel/sched_features.h |    1 
>  3 files changed, 76 insertions(+), 39 deletions(-)
> 
> Index: linux-2.6/kernel/sched.c
> ===================================================================
> --- linux-2.6.orig/kernel/sched.c
> +++ linux-2.6/kernel/sched.c
> @@ -1342,6 +1342,9 @@ static void __resched_task(struct task_s
>   */
>  #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))
> 
> +/*
> + * delta *= weight / lw
> + */
>  static unsigned long
>  calc_delta_mine(unsigned long delta_exec, unsigned long weight,
>  		struct load_weight *lw)
> @@ -1369,12 +1372,6 @@ calc_delta_mine(unsigned long delta_exec
>  	return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);
>  }
> 
> -static inline unsigned long
> -calc_delta_fair(unsigned long delta_exec, struct load_weight *lw)
> -{
> -	return calc_delta_mine(delta_exec, NICE_0_LOAD, lw);
> -}
> -
>  static inline void update_load_add(struct load_weight *lw, unsigned long inc)
>  {
>  	lw->weight += inc;
> Index: linux-2.6/kernel/sched_fair.c
> ===================================================================
> --- linux-2.6.orig/kernel/sched_fair.c
> +++ linux-2.6/kernel/sched_fair.c
> @@ -334,6 +334,34 @@ int sched_nr_latency_handler(struct ctl_
>  #endif
> 
>  /*
> + * delta *= w / rw
> + */
> +static inline unsigned long
> +calc_delta_weight(unsigned long delta, struct sched_entity *se)
> +{
> +	for_each_sched_entity(se) {
> +		delta = calc_delta_mine(delta,
> +				se->load.weight, &cfs_rq_of(se)->load);
> +	}
> +
> +	return delta;
> +}
> +
> +/*
> + * delta *= rw / w
> + */
> +static inline unsigned long
> +calc_delta_fair(unsigned long delta, struct sched_entity *se)
> +{
> +	for_each_sched_entity(se) {
> +		delta = calc_delta_mine(delta,
> +				cfs_rq_of(se)->load.weight, &se->load);
> +	}
> +
> +	return delta;
> +}
> +

These functions can do with better comments

delta is scaled up as we move up the hierarchy

Why is calc_delta_weight() different from calc_delta_fair()?

> +/*
>   * The idea is to set a period in which each task runs once.
>   *
>   * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch
> @@ -362,47 +390,54 @@ static u64 __sched_period(unsigned long 
>   */
>  static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
>  {
> -	u64 slice = __sched_period(cfs_rq->nr_running);
> -
> -	for_each_sched_entity(se) {
> -		cfs_rq = cfs_rq_of(se);
> -
> -		slice *= se->load.weight;
> -		do_div(slice, cfs_rq->load.weight);
> -	}
> -
> -
> -	return slice;
> +	return calc_delta_weight(__sched_period(cfs_rq->nr_running), se);
>  }
> 
>  /*
>   * We calculate the vruntime slice of a to be inserted task
>   *
> - * vs = s/w = p/rw
> + * vs = s*rw/w = p
>   */
>  static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
>  {
>  	unsigned long nr_running = cfs_rq->nr_running;
> -	unsigned long weight;
> -	u64 vslice;
> 
>  	if (!se->on_rq)
>  		nr_running++;
> 
> -	vslice = __sched_period(nr_running);
> +	return __sched_period(nr_running);

Do we always return a constant value based on nr_running? Am I
misreading the diff by any chance?

> +}
> +
> +/*
> + * The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in
> + * that it favours >=0 over <0.
> + *
> + *   -20         |
> + *               |
> + *     0 --------+-------
> + *             .'
> + *    19     .'
> + *
> + */
> +static unsigned long
> +calc_delta_asym(unsigned long delta, struct sched_entity *se)
> +{
> +	struct load_weight lw = {
> +		.weight = NICE_0_LOAD,
> +		.inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT)
> +	};

Could you please explain this

weight is 1 << 10
and inv_weight is 1 << 22



> 
>  	for_each_sched_entity(se) {
> -		cfs_rq = cfs_rq_of(se);
> +		struct load_weight *se_lw = &se->load;
> 
> -		weight = cfs_rq->load.weight;
> -		if (!se->on_rq)
> -			weight += se->load.weight;
> +		if (se->load.weight < NICE_0_LOAD)
> +			se_lw = &lw;

Why do we do this?

> 
> -		vslice *= NICE_0_LOAD;
> -		do_div(vslice, weight);
> +		delta = calc_delta_mine(delta,
> +				cfs_rq_of(se)->load.weight, se_lw);
>  	}
> 
> -	return vslice;
> +	return delta;
>  }
> 
>  /*
> @@ -419,11 +454,7 @@ __update_curr(struct cfs_rq *cfs_rq, str
> 
>  	curr->sum_exec_runtime += delta_exec;
>  	schedstat_add(cfs_rq, exec_clock, delta_exec);
> -	delta_exec_weighted = delta_exec;
> -	if (unlikely(curr->load.weight != NICE_0_LOAD)) {
> -		delta_exec_weighted = calc_delta_fair(delta_exec_weighted,
> -							&curr->load);
> -	}
> +	delta_exec_weighted = calc_delta_fair(delta_exec, curr);
>  	curr->vruntime += delta_exec_weighted;
>  }
> 
> @@ -609,8 +640,17 @@ place_entity(struct cfs_rq *cfs_rq, stru
> 
>  	if (!initial) {
>  		/* sleeps upto a single latency don't count. */
> -		if (sched_feat(NEW_FAIR_SLEEPERS))
> -			vruntime -= sysctl_sched_latency;
> +		if (sched_feat(NEW_FAIR_SLEEPERS)) {
> +			unsigned long thresh = sysctl_sched_latency;
> +
> +			/*
> +			 * convert the sleeper threshold into virtual time
> +			 */
> +			if (sched_feat(NORMALIZED_SLEEPER))
> +				thresh = calc_delta_fair(thresh, se);
> +
> +			vruntime -= thresh;
> +		}
> 
>  		/* ensure we never gain time by being placed backwards. */
>  		vruntime = max_vruntime(se->vruntime, vruntime);
> @@ -1111,11 +1151,10 @@ static unsigned long wakeup_gran(struct 
>  	unsigned long gran = sysctl_sched_wakeup_granularity;
> 
>  	/*
> -	 * More easily preempt - nice tasks, while not making
> -	 * it harder for + nice tasks.
> +	 * More easily preempt - nice tasks, while not making it harder for
> +	 * + nice tasks.
>  	 */
> -	if (unlikely(se->load.weight > NICE_0_LOAD))
> -		gran = calc_delta_fair(gran, &se->load);
> +	gran = calc_delta_asym(sysctl_sched_wakeup_granularity, se);
> 
>  	return gran;
>  }
> Index: linux-2.6/kernel/sched_features.h
> ===================================================================
> --- linux-2.6.orig/kernel/sched_features.h
> +++ linux-2.6/kernel/sched_features.h
> @@ -1,4 +1,5 @@
>  SCHED_FEAT(NEW_FAIR_SLEEPERS, 1)
> +SCHED_FEAT(NORMALIZED_SLEEPER, 1)
>  SCHED_FEAT(WAKEUP_PREEMPT, 1)
>  SCHED_FEAT(START_DEBIT, 1)
>  SCHED_FEAT(AFFINE_WAKEUPS, 1)

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/