lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 18 Nov 2014 15:15:19 -0800
From:	Andy Lutomirski <luto@...capital.net>
To:	Borislav Petkov <bp@...en8.de>, x86@...nel.org,
	Linus Torvalds <torvalds@...ux-foundation.org>
Cc:	linux-kernel@...r.kernel.org,
	Peter Zijlstra <peterz@...radead.org>,
	Oleg Nesterov <oleg@...hat.com>,
	Tony Luck <tony.luck@...el.com>,
	Andi Kleen <andi@...stfloor.org>,
	Andy Lutomirski <luto@...capital.net>
Subject: [PATCH v3 3/3] sched, x86: Check that we're on the right stack in schedule and __might_sleep

On x86, sleeping while on an IST or irq stack has a surprisingly
good chance of working, but it can also fail dramatically.  Add an
arch hook to allow schedule and __might_sleep to catch sleeping on
the wrong stack.

This will also catch do_exit from a funny stack, which could leave
an IST stack shifted or an NMI nesting count incremented.

Signed-off-by: Andy Lutomirski <luto@...capital.net>
---
 arch/x86/Kconfig                   |  1 +
 arch/x86/include/asm/thread_info.h | 17 +++++++++++++++++
 arch/x86/kernel/irq_32.c           | 13 +++----------
 include/linux/thread_info.h        |  7 +++++++
 kernel/Kconfig.locks               |  3 +++
 kernel/sched/core.c                | 14 ++++++++++----
 6 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ded8a6774ac9..a811286636d2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -137,6 +137,7 @@ config X86
 	select HAVE_ACPI_APEI_NMI if ACPI
 	select ACPI_LEGACY_TABLES_LOOKUP if ACPI
 	select X86_FEATURE_NAMES if PROC_FS
+	select HAVE_ARCH_SCHEDULE_ALLOWED
 
 config INSTRUCTION_DECODER
 	def_bool y
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 547e344a6dc6..05701f132473 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -170,6 +170,23 @@ static inline struct thread_info *current_thread_info(void)
 	return ti;
 }
 
+static inline unsigned long current_stack_pointer(void)
+{
+	unsigned long sp;
+#ifdef CONFIG_X86_64
+	asm("mov %%rsp,%0" : "=g" (sp));
+#else
+	asm("mov %%esp,%0" : "=g" (sp));
+#endif
+	return sp;
+}
+
+static inline bool arch_schedule_allowed(void)
+{
+	return ((current_stack_pointer() ^ this_cpu_read_stable(kernel_stack))
+		& ~(THREAD_SIZE - 1)) == 0;
+}
+
 #else /* !__ASSEMBLY__ */
 
 /* how to get the thread information struct from ASM */
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 63ce838e5a54..28d28f5eb8f4 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -69,16 +69,9 @@ static void call_on_stack(void *func, void *stack)
 		     : "memory", "cc", "edx", "ecx", "eax");
 }
 
-/* how to get the current stack pointer from C */
-#define current_stack_pointer ({		\
-	unsigned long sp;			\
-	asm("mov %%esp,%0" : "=g" (sp));	\
-	sp;					\
-})
-
 static inline void *current_stack(void)
 {
-	return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
+	return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
 }
 
 static inline int
@@ -103,7 +96,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
 
 	/* Save the next esp at the bottom of the stack */
 	prev_esp = (u32 *)irqstk;
-	*prev_esp = current_stack_pointer;
+	*prev_esp = current_stack_pointer();
 
 	if (unlikely(overflow))
 		call_on_stack(print_stack_overflow, isp);
@@ -156,7 +149,7 @@ void do_softirq_own_stack(void)
 
 	/* Push the previous esp onto the stack */
 	prev_esp = (u32 *)irqstk;
-	*prev_esp = current_stack_pointer;
+	*prev_esp = current_stack_pointer();
 
 	call_on_stack(__do_softirq, isp);
 }
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index ff307b548ed3..6deaf7e97009 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -145,6 +145,13 @@ static inline bool test_and_clear_restore_sigmask(void)
 #error "no set_restore_sigmask() provided and default one won't work"
 #endif
 
+#ifndef CONFIG_HAVE_ARCH_SCHEDULE_ALLOWED
+static inline bool arch_schedule_allowed(void)
+{
+	return true;
+}
+#endif
+
 #endif	/* __KERNEL__ */
 
 #endif /* _LINUX_THREAD_INFO_H */
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 76768ee812b2..2714dc34695a 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -237,3 +237,6 @@ config ARCH_USE_QUEUE_RWLOCK
 config QUEUE_RWLOCK
 	def_bool y if ARCH_USE_QUEUE_RWLOCK
 	depends on SMP
+
+config HAVE_ARCH_SCHEDULE_ALLOWED
+       bool
\ No newline at end of file
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 240157c13ddc..e51ab65a9750 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2705,8 +2705,12 @@ static inline void schedule_debug(struct task_struct *prev)
 	 * Test if we are atomic. Since do_exit() needs to call into
 	 * schedule() atomically, we ignore that path. Otherwise whine
 	 * if we are scheduling when we should not.
+	 *
+	 * If architectural conditions for scheduling are not met,
+	 * complain even if we are in do_exit.
 	 */
-	if (unlikely(in_atomic_preempt_off() && prev->state != TASK_DEAD))
+	if (unlikely((in_atomic_preempt_off() && prev->state != TASK_DEAD) ||
+		     !arch_schedule_allowed()))
 		__schedule_bug(prev);
 	rcu_sleep_check();
 
@@ -7200,10 +7204,12 @@ static inline int preempt_count_equals(int preempt_offset)
 void __might_sleep(const char *file, int line, int preempt_offset)
 {
 	static unsigned long prev_jiffy;	/* ratelimiting */
+	bool arch_ok;
 
 	rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
+	arch_ok = arch_schedule_allowed();
 	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
-	     !is_idle_task(current)) ||
+	     !is_idle_task(current) && arch_ok) ||
 	    system_state != SYSTEM_RUNNING || oops_in_progress)
 		return;
 	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
@@ -7214,8 +7220,8 @@ void __might_sleep(const char *file, int line, int preempt_offset)
 		"BUG: sleeping function called from invalid context at %s:%d\n",
 			file, line);
 	printk(KERN_ERR
-		"in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
-			in_atomic(), irqs_disabled(),
+		"in_atomic(): %d, irqs_disabled(): %d, arch_schedule_allowed: %d, pid: %d, name: %s\n",
+			in_atomic(), irqs_disabled(), (int)arch_ok,
 			current->pid, current->comm);
 
 	debug_show_held_locks(current);
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ