lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20131113150745.25d597fa@gandalf.local.home>
Date:	Wed, 13 Nov 2013 15:07:45 -0500
From:	Steven Rostedt <rostedt@...dmis.org>
To:	Thomas Gleixner <tglx@...utronix.de>
Cc:	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>,
	Matthew Whitehead <tedheadster@...il.com>,
	john.stultz@...aro.org, linux-kernel@...r.kernel.org,
	mwhitehe@...hat.com
Subject: Re: nohz problem with idle time on old hardware

On Wed, 13 Nov 2013 21:01:57 +0100 (CET)
Thomas Gleixner <tglx@...utronix.de> wrote:


> ---------------->
> Subject: NOHZ: Check for nohz active instead of nohz enabled
> 
> RCU and the fine grained idle time accounting functions check
> tick_nohz_enabled. But that variable is merily telling that NOHZ has
> been enabled in the config and not been disabled on the command line.
> 
> But it does not tell anything about nohz being active. That's what all
> this should check for.
> 
> Matthew reported, that the idle accounting on his old P1 machine
> showed bogus values, when he enabled NOHZ in the config and did not
> disable it on the kernel command line. The reason is that his machine
> uses (refined) jiffies as a clocksource which explains why the "fine"
> grained accounting went into lala land, because it depends on when the
> system goes and leaves idle relative to the jiffies increment.
> 
> Provide a tick_nohz_active indicator and let RCU and the accounting
> code use this instead of tick_nohz_enable.
> 
> Reported-by: Matthew Whitehead <tedheadster@...il.com>

Matthew, can you test this patch to make sure it causes the idle issue
to go away (remember to remove nohz=off from your command line).

Reviewed-by: Steven Rostedt <rostedt@...dmis.org>

-- Steve

> Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
> ---
> diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> index 3822ac0..da6e6de 100644
> --- a/kernel/rcu/tree_plugin.h
> +++ b/kernel/rcu/tree_plugin.h
> @@ -1632,7 +1632,7 @@ module_param(rcu_idle_gp_delay, int, 0644);
>  static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY;
>  module_param(rcu_idle_lazy_gp_delay, int, 0644);
>  
> -extern int tick_nohz_enabled;
> +extern int tick_nohz_active;
>  
>  /*
>   * Try to advance callbacks for all flavors of RCU on the current CPU, but
> @@ -1729,7 +1729,7 @@ static void rcu_prepare_for_idle(int cpu)
>  	int tne;
>  
>  	/* Handle nohz enablement switches conservatively. */
> -	tne = ACCESS_ONCE(tick_nohz_enabled);
> +	tne = ACCESS_ONCE(tick_nohz_active);
>  	if (tne != rdtp->tick_nohz_enabled_snap) {
>  		if (rcu_cpu_has_callbacks(cpu, NULL))
>  			invoke_rcu_core(); /* force nohz to see update. */
> diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
> index 3612fc7..a12df5a 100644
> --- a/kernel/time/tick-sched.c
> +++ b/kernel/time/tick-sched.c
> @@ -361,8 +361,8 @@ void __init tick_nohz_init(void)
>  /*
>   * NO HZ enabled ?
>   */
> -int tick_nohz_enabled __read_mostly  = 1;
> -
> +static int tick_nohz_enabled __read_mostly  = 1;
> +int tick_nohz_active  __read_mostly;
>  /*
>   * Enable / Disable tickless mode
>   */
> @@ -465,7 +465,7 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
>  	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
>  	ktime_t now, idle;
>  
> -	if (!tick_nohz_enabled)
> +	if (!tick_nohz_active)
>  		return -1;
>  
>  	now = ktime_get();
> @@ -506,7 +506,7 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
>  	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
>  	ktime_t now, iowait;
>  
> -	if (!tick_nohz_enabled)
> +	if (!tick_nohz_active)
>  		return -1;
>  
>  	now = ktime_get();
> @@ -799,11 +799,6 @@ void tick_nohz_idle_enter(void)
>  	local_irq_disable();
>  
>  	ts = &__get_cpu_var(tick_cpu_sched);
> -	/*
> -	 * set ts->inidle unconditionally. even if the system did not
> -	 * switch to nohz mode the cpu frequency governers rely on the
> -	 * update of the idle time accounting in tick_nohz_start_idle().
> -	 */
>  	ts->inidle = 1;
>  	__tick_nohz_idle_enter(ts);
>  
> @@ -973,7 +968,7 @@ static void tick_nohz_switch_to_nohz(void)
>  	struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
>  	ktime_t next;
>  
> -	if (!tick_nohz_enabled)
> +	if (!tick_nohz_active)
>  		return;
>  
>  	local_irq_disable();
> @@ -981,7 +976,7 @@ static void tick_nohz_switch_to_nohz(void)
>  		local_irq_enable();
>  		return;
>  	}
> -
> +	tick_nohz_active = 1;
>  	ts->nohz_mode = NOHZ_MODE_LOWRES;
>  
>  	/*
> @@ -1139,8 +1134,10 @@ void tick_setup_sched_timer(void)
>  	}
>  
>  #ifdef CONFIG_NO_HZ_COMMON
> -	if (tick_nohz_enabled)
> +	if (tick_nohz_enabled) {
>  		ts->nohz_mode = NOHZ_MODE_HIGHRES;
> +		tick_nohz_active = 1;
> +	}
>  #endif
>  }
>  #endif /* HIGH_RES_TIMERS */

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ