lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1332338318-5958-33-git-send-email-fweisbec@gmail.com>
Date:	Wed, 21 Mar 2012 14:58:37 +0100
From:	Frederic Weisbecker <fweisbec@...il.com>
To:	LKML <linux-kernel@...r.kernel.org>,
	linaro-sched-sig@...ts.linaro.org
Cc:	Frederic Weisbecker <fweisbec@...il.com>,
	Alessio Igor Bogani <abogani@...nel.org>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Avi Kivity <avi@...hat.com>,
	Chris Metcalf <cmetcalf@...era.com>,
	Christoph Lameter <cl@...ux.com>,
	Daniel Lezcano <daniel.lezcano@...aro.org>,
	Geoff Levand <geoff@...radead.org>,
	Gilad Ben Yossef <gilad@...yossef.com>,
	Ingo Molnar <mingo@...nel.org>,
	Max Krasnyansky <maxk@...lcomm.com>,
	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>,
	Peter Zijlstra <peterz@...radead.org>,
	Stephen Hemminger <shemminger@...tta.com>,
	Steven Rostedt <rostedt@...dmis.org>,
	Sven-Thorsten Dietrich <thebigcorporation@...il.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	Zen Lin <zen@...nhuawei.org>
Subject: [PATCH 31/32] nohz: Exit RCU idle mode when we schedule before resuming userspace

When a CPU running tickless resumes userspace, it enters into
RCU idle mode. But if we are preempted on kernel exit, after we
entered RCU idle mode but before we actually resumed userspace,
through an explicit call to schedule, we need to re-enable RCU in
case this function makes use of RCU read side critical section
and also for the next task to be scheduled.

NOTE: If we are preempted while running adaptive tickless, it means
we will receive an IPI that will escape the RCU idle mode for us. So
this patch is useful only when such IPI arrives too late.

Signed-off-by: Frederic Weisbecker <fweisbec@...il.com>
Cc: Alessio Igor Bogani <abogani@...nel.org>
Cc: Andrew Morton <akpm@...ux-foundation.org>
Cc: Avi Kivity <avi@...hat.com>
Cc: Chris Metcalf <cmetcalf@...era.com>
Cc: Christoph Lameter <cl@...ux.com>
Cc: Daniel Lezcano <daniel.lezcano@...aro.org>
Cc: Geoff Levand <geoff@...radead.org>
Cc: Gilad Ben Yossef <gilad@...yossef.com>
Cc: Ingo Molnar <mingo@...nel.org>
Cc: Max Krasnyansky <maxk@...lcomm.com>
Cc: Paul E. McKenney <paulmck@...ux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: Stephen Hemminger <shemminger@...tta.com>
Cc: Steven Rostedt <rostedt@...dmis.org>
Cc: Sven-Thorsten Dietrich <thebigcorporation@...il.com>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Zen Lin <zen@...nhuawei.org>
---
 arch/x86/kernel/entry_64.S |    8 ++++----
 include/linux/tick.h       |    3 ++-
 kernel/sched/core.c        |   14 ++++++++++++++
 kernel/time/tick-sched.c   |    9 ++++++---
 4 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 54f269c..c86d963 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -522,7 +522,7 @@ sysret_careful:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
 	pushq_cfi %rdi
-	call schedule
+	call schedule_user
 	popq_cfi %rdi
 	jmp sysret_check
 
@@ -630,7 +630,7 @@ int_careful:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
 	pushq_cfi %rdi
-	call schedule
+	call schedule_user
 	popq_cfi %rdi
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
@@ -898,7 +898,7 @@ retint_careful:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
 	pushq_cfi %rdi
-	call  schedule
+	call  schedule_user
 	popq_cfi %rdi
 	GET_THREAD_INFO(%rcx)
 	DISABLE_INTERRUPTS(CLBR_NONE)
@@ -1398,7 +1398,7 @@ paranoid_userspace:
 paranoid_schedule:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_ANY)
-	call schedule
+	call schedule_user
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
 	jmp paranoid_userspace
diff --git a/include/linux/tick.h b/include/linux/tick.h
index e2a49ad..93add37 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -162,7 +162,7 @@ extern void tick_nohz_exit_exception(struct pt_regs *regs);
 extern void tick_nohz_check_adaptive(void);
 extern void tick_nohz_pre_schedule(void);
 extern void tick_nohz_post_schedule(void);
-extern void tick_nohz_cpu_exit_qs(void);
+extern void tick_nohz_cpu_exit_qs(bool irq);
 extern bool tick_nohz_account_tick(void);
 extern void tick_nohz_flush_current_times(bool restart_tick);
 #else /* !CPUSETS_NO_HZ */
@@ -173,6 +173,7 @@ static inline void tick_nohz_exit_exception(struct pt_regs *regs) { }
 static inline void tick_nohz_check_adaptive(void) { }
 static inline void tick_nohz_pre_schedule(void) { }
 static inline void tick_nohz_post_schedule(void) { }
+static inline void tick_nohz_cpu_exit_qs(bool irq) { }
 static inline bool tick_nohz_account_tick(void) { return false; }
 #endif /* CPUSETS_NO_HZ */
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5debfd7..cd4cb58 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3358,6 +3358,20 @@ int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
 }
 #endif
 
+asmlinkage void __sched schedule_user(void)
+{
+	/*
+	 * We may arrive here before resuming userspace.
+	 * If we are running tickless, RCU may be in idle
+	 * mode. We need to reenable RCU for the next task
+	 * and also in case schedule() make use of RCU itself.
+	 */
+	preempt_disable();
+	tick_nohz_cpu_exit_qs(false);
+	preempt_enable_no_resched();
+	schedule();
+}
+
 #ifdef CONFIG_PREEMPT
 /*
  * this is the entry point to schedule() from in-kernel preemption
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 6c66977..8b6a21b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -962,10 +962,13 @@ void tick_nohz_enter_kernel(void)
 	local_irq_restore(flags);
 }
 
-void tick_nohz_cpu_exit_qs(void)
+void tick_nohz_cpu_exit_qs(bool irq)
 {
 	if (__get_cpu_var(nohz_task_ext_qs)) {
-		rcu_user_exit_irq();
+		if (irq)
+			rcu_user_exit_irq();
+		else
+			rcu_user_exit();
 		__get_cpu_var(nohz_task_ext_qs) = 0;
 	}
 }
@@ -1005,7 +1008,7 @@ static void tick_nohz_restart_adaptive(void)
 	tick_nohz_flush_current_times(true);
 	tick_nohz_restart_sched_tick();
 	clear_thread_flag(TIF_NOHZ);
-	tick_nohz_cpu_exit_qs();
+	tick_nohz_cpu_exit_qs(true);
 }
 
 void tick_nohz_check_adaptive(void)
-- 
1.7.5.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ