lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1292858662-5650-11-git-send-email-fweisbec@gmail.com>
Date:	Mon, 20 Dec 2010 16:24:17 +0100
From:	Frederic Weisbecker <fweisbec@...il.com>
To:	LKML <linux-kernel@...r.kernel.org>
Cc:	LKML <linux-kernel@...r.kernel.org>,
	Frederic Weisbecker <fweisbec@...il.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>,
	Ingo Molnar <mingo@...e.hu>,
	Steven Rostedt <rostedt@...dmis.org>,
	Lai Jiangshan <laijs@...fujitsu.com>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Anton Blanchard <anton@....ibm.com>,
	Tim Pepper <lnxninja@...ux.vnet.ibm.com>
Subject: [RFC PATCH 10/15] nohz_task: Enter in extended quiescent state when in userspace

A nohz task can safely enter into extended quiescent state when
it goes into userspace, this avoids a remote cpu to force the
nohz task to be interrupted in order to notify quiescent states.

We enter into an extended quiescent state when:

- A nohz task resumes to userspace and is alone running on the
CPU (we check if the local cpu is in nohz mode, which means
no other task compete on that CPU). If the tick is still running
then entering into extended QS will be done later from the second
case:

- When the tick stops and verify the current task is a nohz one,
is alone running on the CPU and runs in userspace.

We exit the extended quiescent state when:

- A nohz task enters the kernel and is alone running on the CPU.
Again we check if the local cpu is in nohz mode for that. If
the tick is still running then it means we are not in an extended
QS and we don't do anything.

- The tick restarts because a new task is enqueued.

Whether the nohz task is in userspace or not is tracked by the
per cpu nohz_task_ext_qs variable.

Architectures need to provide some backend to notify userspace
exit/entry in order to support this mode.
It needs to implement the TIF_NOHZ flag that switches to slow
path syscall mode and to notify exceptions entry/exit.

We don't need to handle irqs or nmis as those are already handled
by RCU through rcu_enter_irq/nmi helpers.

Signed-off-by: Frederic Weisbecker <fweisbec@...il.com>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Peter Zijlstra <a.p.zijlstra@...llo.nl>
Cc: Paul E. McKenney <paulmck@...ux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@...e.hu>
Cc: Steven Rostedt <rostedt@...dmis.org>
Cc: Lai Jiangshan <laijs@...fujitsu.com>
Cc: Andrew Morton <akpm@...ux-foundation.org>
Cc: Anton Blanchard <anton@....ibm.com>
Cc: Tim Pepper <lnxninja@...ux.vnet.ibm.com>
---
 arch/Kconfig             |    4 +++
 include/linux/tick.h     |   16 ++++++++++-
 kernel/sched.c           |    3 ++
 kernel/time/tick-sched.c |   61 +++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 81 insertions(+), 3 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index e631791..d1ebea3 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -177,5 +177,9 @@ config HAVE_ARCH_JUMP_LABEL
 
 config HAVE_NO_HZ_TASK
 	bool
+	help
+	  Features necessary hooks for a task wanting to enter nohz
+	  while running alone on a CPU: thread flag for syscall hooks
+	  and exceptions entry/exit hooks.
 
 source "kernel/gcov/Kconfig"
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 7465a47..a704bb7 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -8,6 +8,7 @@
 
 #include <linux/clockchips.h>
 #include <linux/percpu-defs.h>
+#include <asm/ptrace.h>
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 
@@ -130,10 +131,21 @@ extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
 
 #ifdef CONFIG_NO_HZ_TASK
 DECLARE_PER_CPU(int, task_nohz_mode);
+DECLARE_PER_CPU(int, nohz_task_ext_qs);
+
+extern void tick_nohz_task_enter_kernel(void);
+extern void tick_nohz_task_exit_kernel(void);
+extern void tick_nohz_task_enter_exception(struct pt_regs *regs);
+extern void tick_nohz_task_exit_exception(struct pt_regs *regs);
 extern int tick_nohz_task_mode(void);
-#else
+
+#else /* !NO_HZ_TASK */
+static inline void tick_nohz_task_enter_kernel(void) { }
+static inline void tick_nohz_task_exit_kernel(void) { }
+static inline void tick_nohz_task_enter_exception(struct pt_regs *regs) { }
+static inline void tick_nohz_task_exit_exception(struct pt_regs *regs) { }
 static inline int tick_nohz_task_mode(void) { return 0; }
-#endif
+#endif /* !NO_HZ_TASK */
 
 # else /* !NO_HZ */
 static inline void tick_nohz_stop_sched_tick(int inidle) { }
diff --git a/kernel/sched.c b/kernel/sched.c
index b99f192..4412493 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2464,6 +2464,9 @@ static void nohz_task_cpu_update(void *unused)
 	if (rq->nr_running > 1 || rcu_pending(cpu) || rcu_needs_cpu(cpu)) {
 		__get_cpu_var(task_nohz_mode) = 0;
 		tick_nohz_restart_sched_tick();
+
+		if (__get_cpu_var(nohz_task_ext_qs))
+			rcu_exit_nohz();
 	}
 }
 
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 88011b9..9a4aa39 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -720,6 +720,62 @@ void tick_check_idle(int cpu)
 }
 
 #ifdef CONFIG_NO_HZ_TASK
+DEFINE_PER_CPU(int, nohz_task_ext_qs);
+
+void tick_nohz_task_exit_kernel(void)
+{
+	unsigned long flags;
+
+	if (!test_thread_flag(TIF_NOHZ))
+		return;
+
+	local_irq_save(flags);
+
+	__get_cpu_var(nohz_task_ext_qs) = 1;
+	/*
+	 * Only enter extended QS if the tick is not running.
+	 * Otherwise the tick will handle that later when it
+	 * will decide to stop.
+	 */
+	if (__get_cpu_var(task_nohz_mode))
+		rcu_enter_nohz();
+
+	local_irq_restore(flags);
+}
+
+void tick_nohz_task_enter_kernel(void)
+{
+	unsigned long flags;
+
+	if (!test_thread_flag(TIF_NOHZ))
+		return;
+
+	local_irq_save(flags);
+
+	__get_cpu_var(nohz_task_ext_qs) = 0;
+	/*
+	 * If the tick was running, then we weren't in
+	 * rcu extended period. Only exit extended QS
+	 * if we were in such state.
+	 */
+	if (__get_cpu_var(task_nohz_mode))
+		rcu_exit_nohz();
+
+	local_irq_restore(flags);
+}
+
+void tick_nohz_task_enter_exception(struct pt_regs *regs)
+{
+	if (user_mode(regs))
+		tick_nohz_task_enter_kernel();
+}
+
+void tick_nohz_task_exit_exception(struct pt_regs *regs)
+{
+	if (user_mode(regs))
+		tick_nohz_task_exit_kernel();
+}
+
 int tick_nohz_task_mode(void)
 {
 	return __get_cpu_var(task_nohz_mode);
@@ -730,8 +786,11 @@ static void tick_nohz_task_stop_tick(void)
 	if (!test_thread_flag(TIF_NOHZ) || __get_cpu_var(task_nohz_mode))
 		return;
 
-	if (nohz_task_can_stop_tick())
+	if (nohz_task_can_stop_tick()) {
 		__get_cpu_var(task_nohz_mode) = 1;
+		if (__get_cpu_var(nohz_task_ext_qs))
+			rcu_enter_nohz();
+	}
 }
 #else
 static void tick_nohz_task_stop_tick(void) { }
-- 
1.7.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ