[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1212844084.19205.85.camel@lappy.programming.kicks-ass.net>
Date: Sat, 07 Jun 2008 15:08:04 +0200
From: Peter Zijlstra <peterz@...radead.org>
To: Mike Galbraith <efault@....de>
Cc: Greg Smith <gsmith@...gsmith.com>, Ingo Molnar <mingo@...e.hu>,
Dhaval Giani <dhaval@...ux.vnet.ibm.com>,
lkml <linux-kernel@...r.kernel.org>,
Srivatsa Vaddagiri <vatsa@...ux.vnet.ibm.com>
Subject: Re: [patch] Re: PostgreSQL pgbench performance regression in
2.6.23+
On Sat, 2008-06-07 at 13:38 +0200, Mike Galbraith wrote:
Interesting.. Looks good.
> Index: linux-2.6.26.git/kernel/sched_fair.c
> ===================================================================
> --- linux-2.6.26.git.orig/kernel/sched_fair.c
> +++ linux-2.6.26.git/kernel/sched_fair.c
> @@ -664,6 +664,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
>
> update_stats_dequeue(cfs_rq, se);
> if (sleep) {
> + se->last_preempter = NULL;
> update_avg_stats(cfs_rq, se);
> #ifdef CONFIG_SCHEDSTATS
> if (entity_is_task(se)) {
> @@ -692,8 +693,10 @@ check_preempt_tick(struct cfs_rq *cfs_rq
>
> ideal_runtime = sched_slice(cfs_rq, curr);
> delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
> - if (delta_exec > ideal_runtime)
> + if (delta_exec > ideal_runtime) {
> + curr->last_preempter = NULL;
> resched_task(rq_of(cfs_rq)->curr);
> + }
> }
>
> static void
> @@ -994,6 +997,7 @@ wake_affine(struct rq *rq, struct sched_
> unsigned int imbalance)
> {
> struct task_struct *curr = this_rq->curr;
> + struct sched_entity *se = &curr->se, *pse = &p->se;
> unsigned long tl = this_load;
> unsigned long tl_per_task;
> int balanced;
> @@ -1002,14 +1006,26 @@ wake_affine(struct rq *rq, struct sched_
> return 0;
>
> /*
> + * If the current task is being wakeup preempted by multiple tasks
> + * that it awakened, such that it can't get significant work done
> + * between preemptions, try to spread these preemption sources.
> + */
> + if (sync && se->last_preempter && se->last_preempter != pse) {
> + u64 se_last_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime;
> +
> + if (se_last_exec < sysctl_sched_migration_cost)
> + return 0;
> + }
> +
> + /*
> * If sync wakeup then subtract the (maximum possible)
> * effect of the currently running task from the load
> * of the current CPU:
> */
> if (sync)
> - tl -= current->se.load.weight;
> + tl -= se->load.weight;
>
> - balanced = 100*(tl + p->se.load.weight) <= imbalance*load;
> + balanced = 100*(tl + pse->load.weight) <= imbalance*load;
>
> /*
> * If the currently running task will sleep within
> @@ -1017,8 +1033,8 @@ wake_affine(struct rq *rq, struct sched_
> * woken task:
> */
> if (sync && balanced && curr->sched_class == &fair_sched_class) {
> - if (curr->se.avg_overlap < sysctl_sched_migration_cost &&
> - p->se.avg_overlap < sysctl_sched_migration_cost)
> + if (se->avg_overlap < sysctl_sched_migration_cost &&
> + pse->avg_overlap < sysctl_sched_migration_cost)
> return 1;
> }
>
> @@ -1219,8 +1235,27 @@ static void check_preempt_wakeup(struct
> pse = parent_entity(pse);
> }
>
> - if (wakeup_preempt_entity(se, pse) == 1)
> - resched_task(curr);
> + if (wakeup_preempt_entity(se, pse) == 1) {
> + int preempt = 1;
> +
> + /*
> + * If current task is being prempted by multiple wakees,
> + * tag it for 1:N affine wakeup preemption avoidance.
> + */
> + if (se->last_preempter && se->last_preempter != pse &&
> + se->load.weight >= pse->load.weight) {
> + u64 exec = se->sum_exec_runtime - se->prev_sum_exec_runtime;
> +
> + if (exec < sysctl_sched_migration_cost)
> + preempt = 0;
> + }
> +
> + if (se == ¤t->se)
> + se->last_preempter = pse;
> +
> + if (preempt)
> + resched_task(curr);
> + }
> }
>
> static struct task_struct *pick_next_task_fair(struct rq *rq)
> Index: linux-2.6.26.git/include/linux/sched.h
> ===================================================================
> --- linux-2.6.26.git.orig/include/linux/sched.h
> +++ linux-2.6.26.git/include/linux/sched.h
> @@ -963,6 +963,7 @@ struct sched_entity {
>
> u64 last_wakeup;
> u64 avg_overlap;
> + struct sched_entity *last_preempter;
>
> #ifdef CONFIG_SCHEDSTATS
> u64 wait_start;
> Index: linux-2.6.26.git/kernel/sched.c
> ===================================================================
> --- linux-2.6.26.git.orig/kernel/sched.c
> +++ linux-2.6.26.git/kernel/sched.c
> @@ -2176,6 +2176,7 @@ static void __sched_fork(struct task_str
> p->se.prev_sum_exec_runtime = 0;
> p->se.last_wakeup = 0;
> p->se.avg_overlap = 0;
> + p->se.last_preempter = NULL;
>
> #ifdef CONFIG_SCHEDSTATS
> p->se.wait_start = 0;
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists