lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAKfTPtCLYnhQH3Mcx5dhRi9+9_679fdrhm8iZBSpgyu8VmevYw@mail.gmail.com>
Date:	Tue, 4 Jun 2013 13:11:47 +0200
From:	Vincent Guittot <vincent.guittot@...aro.org>
To:	Frederic Weisbecker <fweisbec@...il.com>
Cc:	Peter Zijlstra <peterz@...radead.org>,
	linux-kernel <linux-kernel@...r.kernel.org>,
	"linaro-kernel@...ts.linaro.org" <linaro-kernel@...ts.linaro.org>,
	Ingo Molnar <mingo@...nel.org>
Subject: Re: [PATCH] sched: fix clear NOHZ_BALANCE_KICK

On 4 June 2013 12:26, Frederic Weisbecker <fweisbec@...il.com> wrote:
> On Tue, Jun 04, 2013 at 11:36:11AM +0200, Peter Zijlstra wrote:
>>
>> The best I can seem to come up with is something like the below; but I think
>> its ghastly. Surely we can do something saner with that bit.
>>
>> Having to clear it at 3 different places is just wrong.
>
> We could clear the flag early in scheduler_ipi() and set some
> specific value in rq->idle_balance that tells we want nohz idle
> balancing from the softirq, something like this untested:

I'm not sure that we can have less than 2 places to clear it: cancel
place or acknowledge place otherwise we can face a situation where
idle load balance will be triggered 2 consecutive times because
NOHZ_BALANCE_KICK will be cleared before the idle load balance has
been done and had a chance to migrate tasks.

>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 58453b8..330136b 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -630,15 +630,14 @@ void wake_up_nohz_cpu(int cpu)
>                 wake_up_idle_cpu(cpu);
>  }
>
> -static inline bool got_nohz_idle_kick(void)
> +static inline bool got_nohz_idle_kick(int cpu)
>  {
> -       int cpu = smp_processor_id();
> -       return idle_cpu(cpu) && test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu));
> +       return test_and_clear_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu));
>  }
>
>  #else /* CONFIG_NO_HZ_COMMON */
>
> -static inline bool got_nohz_idle_kick(void)
> +static inline bool got_nohz_idle_kick(int cpu)
>  {
>         return false;
>  }
> @@ -1393,8 +1392,12 @@ static void sched_ttwu_pending(void)
>
>  void scheduler_ipi(void)
>  {
> -       if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()
> -           && !tick_nohz_full_cpu(smp_processor_id()))
> +       int cpu = smp_processor_id();
> +       bool idle_kick = got_nohz_idle_kick(cpu);
> +
> +       if (!(idle_kick && idle_cpu(cpu))
> +           && llist_empty(&this_rq()->wake_list)
> +           && !tick_nohz_full_cpu(cpu)
>                 return;
>
>         /*
> @@ -1417,8 +1420,8 @@ void scheduler_ipi(void)
>         /*
>          * Check if someone kicked us for doing the nohz idle load balance.
>          */
> -       if (unlikely(got_nohz_idle_kick() && !need_resched())) {
> -               this_rq()->idle_balance = 1;
> +           if (unlikely(idle_kick && idle_cpu(cpu) && !need_resched())) {
> +               this_rq()->idle_balance = IDLE_NOHZ_BALANCE;
>                 raise_softirq_irqoff(SCHED_SOFTIRQ);
>         }
>         irq_exit();
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index c61a614..816e7b0 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -5577,15 +5577,14 @@ out:
>   * In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the
>   * rebalancing for all the cpus for whom scheduler ticks are stopped.
>   */
> -static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
> +static void nohz_idle_balance(int this_cpu)
>  {
>         struct rq *this_rq = cpu_rq(this_cpu);
>         struct rq *rq;
>         int balance_cpu;
>
> -       if (idle != CPU_IDLE ||
> -           !test_bit(NOHZ_BALANCE_KICK, nohz_flags(this_cpu)))
> -               goto end;
> +       if (this_rq->idle_balance != IDLE_NOHZ_BALANCE)
> +               return;
>
>         for_each_cpu(balance_cpu, nohz.idle_cpus_mask) {
>                 if (balance_cpu == this_cpu || !idle_cpu(balance_cpu))
> @@ -5612,8 +5611,12 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
>                         this_rq->next_balance = rq->next_balance;
>         }
>         nohz.next_balance = this_rq->next_balance;
> -end:
> -       clear_bit(NOHZ_BALANCE_KICK, nohz_flags(this_cpu));
> +
> +       /* There could be concurrent updates from irqs but we don't care */
> +       if (idle_cpu(this_cpu))
> +               this_rq->idle_balance = IDLE_BALANCE;
> +       else
> +               this_rq->idle_balance = 0;
>  }
>
>  /*
> @@ -5679,7 +5682,7 @@ need_kick:
>         return 1;
>  }
>  #else
> -static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { }
> +static void nohz_idle_balance(int this_cpu) { }
>  #endif
>
>  /*
> @@ -5700,7 +5703,7 @@ static void run_rebalance_domains(struct softirq_action *h)
>          * balancing on behalf of the other idle cpus whose ticks are
>          * stopped.
>          */
> -       nohz_idle_balance(this_cpu, idle);
> +       nohz_idle_balance(this_cpu);
>  }
>
>  static inline int on_null_domain(int cpu)
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index ce39224..e9de976 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -387,6 +387,11 @@ extern struct root_domain def_root_domain;
>
>  #endif /* CONFIG_SMP */
>
> +enum idle_balance_type {
> +       IDLE_BALANCE = 1,
> +       IDLE_NOHZ_BALANCE = 2,
> +};
> +
>  /*
>   * This is the main, per-CPU runqueue data structure.
>   *
> @@ -458,7 +463,7 @@ struct rq {
>
>         unsigned long cpu_power;
>
> -       unsigned char idle_balance;
> +       enum idle_balance_type idle_balance;
>         /* For active balancing */
>         int post_schedule;
>         int active_balance;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ