[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAKfTPtD4RgqR4um3faHeR5AC2Uw5+cbH6vee4wq-5Qox9bqwQA@mail.gmail.com>
Date: Fri, 15 Oct 2021 20:02:01 +0200
From: Vincent Guittot <vincent.guittot@...aro.org>
To: Peter Zijlstra <peterz@...radead.org>
Cc: Ingo Molnar <mingo@...hat.com>, Juri Lelli <juri.lelli@...hat.com>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Steven Rostedt <rostedt@...dmis.org>,
Ben Segall <bsegall@...gle.com>, Mel Gorman <mgorman@...e.de>,
Daniel Bristot de Oliveira <bristot@...hat.com>,
linux-kernel <linux-kernel@...r.kernel.org>,
Tim Chen <tim.c.chen@...ux.intel.com>
Subject: Re: [PATCH v2 3/4] sched/fair: Wait before decaying max_newidle_lb_cost
On Fri, 15 Oct 2021 at 19:41, Peter Zijlstra <peterz@...radead.org> wrote:
>
> On Fri, Oct 15, 2021 at 02:46:53PM +0200, Vincent Guittot wrote:
> > Decay max_newidle_lb_cost only when it has not been updated for a while
> > and ensure to not decay a recently changed value.
>
> I was more thinking something long these lines; ofcourse, no idea how
> well it actually behaves.
>
> Index: linux-2.6/include/linux/sched/topology.h
> ===================================================================
> --- linux-2.6.orig/include/linux/sched/topology.h
> +++ linux-2.6/include/linux/sched/topology.h
> @@ -98,7 +98,6 @@ struct sched_domain {
>
> /* idle_balance() stats */
> u64 max_newidle_lb_cost;
> - unsigned long next_decay_max_lb_cost;
>
> u64 avg_scan_cost; /* select_idle_sibling */
>
> Index: linux-2.6/kernel/sched/fair.c
> ===================================================================
> --- linux-2.6.orig/kernel/sched/fair.c
> +++ linux-2.6/kernel/sched/fair.c
> @@ -10241,6 +10241,17 @@ void update_max_interval(void)
> }
>
> /*
> + * Asymmetric IIR filter, 1/4th down, 3/4th up.
> + */
> +static void update_newidle_cost(u64 *cost, u64 new)
> +{
> + s64 diff = new - *cost;
> + if (diff > 0)
> + diff *= 3;
> + *cost += diff / 4;
> +}
I tried to use something similar which was based on update_avg() but
there were some performance regressions:
some regressions were linked to not jumping to the new max directly. I
assume some level were started whereas it would take too much time
and some regressions happened if the decay was too quick
> +
> +/*
> * It checks each scheduling domain to see if it is due to be balanced,
> * and initiates a balancing operation if so.
> *
> @@ -10256,33 +10267,18 @@ static void rebalance_domains(struct rq
> /* Earliest time when we have to do rebalance again */
> unsigned long next_balance = jiffies + 60*HZ;
> int update_next_balance = 0;
> - int need_serialize, need_decay = 0;
> - u64 max_cost = 0;
> + int need_serialize;
>
> rcu_read_lock();
> for_each_domain(cpu, sd) {
> - /*
> - * Decay the newidle max times here because this is a regular
> - * visit to all the domains. Decay ~1% per second.
> - */
> - if (time_after(jiffies, sd->next_decay_max_lb_cost)) {
> - sd->max_newidle_lb_cost =
> - (sd->max_newidle_lb_cost * 253) / 256;
> - sd->next_decay_max_lb_cost = jiffies + HZ;
> - need_decay = 1;
> - }
> - max_cost += sd->max_newidle_lb_cost;
>
> /*
> * Stop the load balance at this level. There is another
> * CPU in our sched group which is doing load balancing more
> * actively.
> */
> - if (!continue_balancing) {
> - if (need_decay)
> - continue;
> + if (!continue_balancing)
> break;
> - }
>
> interval = get_sd_balance_interval(sd, busy);
>
> @@ -10313,14 +10309,7 @@ out:
> update_next_balance = 1;
> }
> }
> - if (need_decay) {
> - /*
> - * Ensure the rq-wide value also decays but keep it at a
> - * reasonable floor to avoid funnies with rq->avg_idle.
> - */
> - rq->max_idle_balance_cost =
> - max((u64)sysctl_sched_migration_cost, max_cost);
> - }
> +
> rcu_read_unlock();
>
> /*
> @@ -10909,8 +10898,7 @@ static int newidle_balance(struct rq *th
>
> t1 = sched_clock_cpu(this_cpu);
> domain_cost = t1 - t0;
> - if (domain_cost > sd->max_newidle_lb_cost)
> - sd->max_newidle_lb_cost = domain_cost;
> + update_newidle_cost(&sd->max_newidle_lb_cost, domain_cost);
>
> curr_cost += domain_cost;
> t0 = t1;
> @@ -10930,8 +10918,7 @@ static int newidle_balance(struct rq *th
>
> raw_spin_rq_lock(this_rq);
>
> - if (curr_cost > this_rq->max_idle_balance_cost)
> - this_rq->max_idle_balance_cost = curr_cost;
> + update_newidle_cost(&this_rq->max_idle_balance_cost, curr_cost);
>
> /*
> * While browsing the domains, we released the rq lock, a task could
>
Powered by blists - more mailing lists