lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 6 Apr 2010 22:45:37 +0200
From:	Dominik Brodowski <linux@...inikbrodowski.net>
To:	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
Cc:	Alan Stern <stern@...land.harvard.edu>,
	linux-kernel@...r.kernel.org, Thomas Gleixner <tglx@...utronix.de>,
	Ingo Molnar <mingo@...e.hu>,
	Peter Zijlstra <peterz@...radead.org>,
	Arjan van de Ven <arjan@...ux.intel.com>,
	Dmitry Torokhov <dtor@...l.ru>
Subject: Re: A few questions and issues with dynticks, NOHZ and powertop

On Mon, Apr 05, 2010 at 03:31:30PM -0700, Paul E. McKenney wrote:
> > > 5) powertop and hrtimer_start_range_ns (tick_sched_timer) on a SMP kernel
> > > booted with "nosmp":
> > > 
> > > Wakeups-from-idle per second :  9.9     interval: 15.0s
> > > ...
> > >   48.5% (  9.4)     <kernel core> : hrtimer_start (tick_sched_timer) 
> > >   26.1% (  5.1)     <kernel core> : cursor_timer_handler
> > >   (cursor_timer_handle
> > >   20.6% (  4.0)     <kernel core> : usb_hcd_poll_rh_status (rh_timer_func) 
> > >    1.0% (  0.2)     <kernel core> : arm_supers_timer
> > >   (sync_supers_timer_fn) 
> > >    0.7% (  0.1)       <interrupt> : ata_piix 
> > >    ...
> > > 
> > > Accoding to http://www.linuxpowertop.org , the count in the brackets is
> > > how
> > > many wakeups per seconds were caused by one source. Adding all _except_
> > >   48.5% (  9.4)     <kernel core> : hrtimer_start (tick_sched_timer)
> > > up leads to the 9.9.
> 
> OK, so you further instrumented the hrtimer_sched_timer (or was it
> tick_sched_timer?) to find the number that you were attributing to
> rcu_needs_cpu()?

That's what I did -- to tick_nohz_stop_sched_tick(), to see why the
tick_sched_timer did not get stopped. Or why powertop thinks it did not
get stopped...

Patch below (works only for 1 or 2 CPUs) for all interested parties, 
_NOT_ intended for submission, though ;)

0x0001: CPU #1
0x0002: inidle
0x0004: not inidle
0x0008: INACTIVE
0x0010: need_resched
0x0020: softirq
0x0040: rcu_needs_
0x0080: printk_needs_
0x0100: arch_needs_
0x0200: off-by-one 
0x0400: skip
0x0800: loadbalance:
0x1000: cancel timer (SUCCESS)
0x2000: timer was already in the past
0x4000: hrtimer_start() (SUCCESS)
0x8000: stop_tick (SUCCESS)

Fixing USB-autosuspend and disabling the fbcon cursor, I'm down to
~0.9 wakeups-per-second booting with "nosmp".

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index f992762..6ba7bae 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -210,19 +210,35 @@ void tick_nohz_stop_sched_tick(int inidle)
 	struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
 	u64 time_delta;
 	int cpu;
+	static int testrun_counter = 0;
+	int testrun = 0;
+	u64 debugdata = 0;
+
+	testrun_counter++;
+
+	if ((testrun_counter % 123) == 0)
+		testrun = 1;
 
 	local_irq_save(flags);
 
 	cpu = smp_processor_id();
 	ts = &per_cpu(tick_cpu_sched, cpu);
 
+	debugdata = cpu;
+
+	if (inidle)
+		debugdata |= 0x2;
+
+
 	/*
 	 * Call to tick_nohz_start_idle stops the last_update_time from being
 	 * updated. Thus, it must not be called in the event we are called from
 	 * irq_exit() with the prior state different than idle.
 	 */
-	if (!inidle && !ts->inidle)
+	if (!inidle && !ts->inidle) {
+		debugdata |= 0x4;
 		goto end;
+	}
 
 	/*
 	 * Set ts->inidle unconditionally. Even if the system did not
@@ -245,11 +261,15 @@ void tick_nohz_stop_sched_tick(int inidle)
 			tick_do_timer_cpu = TICK_DO_TIMER_NONE;
 	}
 
-	if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
+	if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) {
+		debugdata |= 0x8;		
 		goto end;
+	}
 
-	if (need_resched())
+	if (need_resched()) {
+		debugdata |= 0x10;
 		goto end;
+	}
 
 	if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
 		static int ratelimit;
@@ -259,6 +279,7 @@ void tick_nohz_stop_sched_tick(int inidle)
 			       (unsigned int) local_softirq_pending());
 			ratelimit++;
 		}
+		debugdata |= 0x20;
 		goto end;
 	}
 
@@ -275,6 +296,12 @@ void tick_nohz_stop_sched_tick(int inidle)
 	    arch_needs_cpu(cpu)) {
 		next_jiffies = last_jiffies + 1;
 		delta_jiffies = 1;
+		if (rcu_needs_cpu(cpu))
+			debugdata |= 0x40;
+		if (printk_needs_cpu(cpu))
+			debugdata |= 0x80;
+		if (arch_needs_cpu(cpu))
+			debugdata |= 0x100;
 	} else {
 		/* Get the next timer wheel timer */
 		next_jiffies = get_next_timer_interrupt(last_jiffies);
@@ -284,8 +311,10 @@ void tick_nohz_stop_sched_tick(int inidle)
 	 * Do not stop the tick, if we are only one off
 	 * or if the cpu is required for rcu
 	 */
-	if (!ts->tick_stopped && delta_jiffies == 1)
+	if (!ts->tick_stopped && delta_jiffies == 1) {
+		debugdata |= 0x200;
 		goto out;
+	}
 
 	/* Schedule the tick, if we are at least one jiffie off */
 	if ((long)delta_jiffies >= 1) {
@@ -341,8 +370,10 @@ void tick_nohz_stop_sched_tick(int inidle)
 			cpumask_set_cpu(cpu, nohz_cpu_mask);
 
 		/* Skip reprogram of event if its not changed */
-		if (ts->tick_stopped && ktime_equal(expires, dev->next_event))
+		if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) {
+			debugdata |= 0x400;
 			goto out;
+		}
 
 		/*
 		 * nohz_stop_sched_tick can be called several times before
@@ -357,8 +388,10 @@ void tick_nohz_stop_sched_tick(int inidle)
 				 * sched tick not stopped!
 				 */
 				cpumask_clear_cpu(cpu, nohz_cpu_mask);
+				debugdata |= 0x800;
 				goto out;
 			}
+			debugdata |= 0x8000;
 
 			ts->idle_tick = hrtimer_get_expires(&ts->sched_timer);
 			ts->tick_stopped = 1;
@@ -375,7 +408,8 @@ void tick_nohz_stop_sched_tick(int inidle)
 		 * If the expiration time == KTIME_MAX, then
 		 * in this case we simply stop the tick timer.
 		 */
-		 if (unlikely(expires.tv64 == KTIME_MAX)) {
+		if (unlikely(expires.tv64 == KTIME_MAX)) {
+			debugdata |= 0x1000;
 			if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
 				hrtimer_cancel(&ts->sched_timer);
 			goto out;
@@ -384,9 +418,13 @@ void tick_nohz_stop_sched_tick(int inidle)
 		if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
 			hrtimer_start(&ts->sched_timer, expires,
 				      HRTIMER_MODE_ABS_PINNED);
+			debugdata |= 0x2000;
 			/* Check, if the timer was already in the past */
-			if (hrtimer_active(&ts->sched_timer))
+			if (hrtimer_active(&ts->sched_timer)) {
+				debugdata &= ~(0x2000);
+				debugdata |= 0x4000;
 				goto out;
+			}
 		} else if (!tick_program_event(expires, 0))
 				goto out;
 		/*
@@ -403,6 +441,8 @@ out:
 	ts->last_jiffies = last_jiffies;
 	ts->sleep_length = ktime_sub(dev->next_event, now);
 end:
+	if (testrun)
+		printk(KERN_DEBUG "0x%0llx\n", debugdata);
 	local_irq_restore(flags);
 }
 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ