lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 21 Jul 2016 16:21:54 -0500
From:	Josh Poimboeuf <jpoimboe@...hat.com>
To:	Thomas Gleixner <tglx@...utronix.de>,
	Ingo Molnar <mingo@...nel.org>,
	"H . Peter Anvin" <hpa@...or.com>
Cc:	x86@...nel.org, linux-kernel@...r.kernel.org,
	Andy Lutomirski <luto@...capital.net>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Steven Rostedt <rostedt@...dmis.org>,
	Brian Gerst <brgerst@...il.com>,
	Kees Cook <keescook@...omium.org>,
	Peter Zijlstra <peterz@...radead.org>,
	Frederic Weisbecker <fweisbec@...il.com>,
	Byungchul Park <byungchul.park@....com>
Subject: [PATCH 17/19] x86/entry/dumpstack: encode pt_regs pointer in frame pointer

With frame pointers, when a task is interrupted, its stack is no longer
completely reliable because the function could have been interrupted
before it had a chance to save the previous frame pointer on the stack.
So the caller of the interrupted function could get skipped by a stack
trace.

This is problematic for live patching, which needs to know whether a
stack trace of a sleeping task can be relied upon.  There's currently no
way to detect if a sleeping task was interrupted by a page fault
exception or preemption before it went to sleep.

Another issue is that when dumping the stack of an interrupted task, the
unwinder has no way of knowing where the saved pt_regs registers are, so
it can't print them.

This solves those issues by encoding the pt_regs pointer in the frame
pointer on entry from an interrupt or an exception.  The frame pointer
unwinder is also updated to decode it.

Suggested-by: Andy Lutomirski <luto@...capital.net>
Signed-off-by: Josh Poimboeuf <jpoimboe@...hat.com>
---
 arch/x86/entry/calling.h       | 21 ++++++++++++++++++++
 arch/x86/entry/entry_64.S      |  7 ++++++-
 arch/x86/include/asm/unwind.h  | 11 +++++++++++
 arch/x86/kernel/unwind_frame.c | 44 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 9a9e588..ff5a5a3 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -201,6 +201,27 @@ For 32-bit we have the following conventions - kernel is built with
 	.byte 0xf1
 	.endm
 
+	/*
+	 * This is a sneaky trick to help the unwinder find pt_regs on the
+	 * stack.  The frame pointer is replaced with an encoded pointer to
+	 * pt_regs.  The encoding is just a clearing of the highest-order bit,
+	 * which makes it an invalid address and is also a signal to the
+	 * unwinder that it's a pt_regs pointer in disguise.
+	 *
+	 * NOTE: This must be called *after* SAVE_EXTRA_REGS because it
+	 * corrupts rbp.
+	 */
+.macro ENCODE_FRAME_POINTER ptregs_offset=0
+#ifdef CONFIG_FRAME_POINTER
+	.if \ptregs_offset
+		leaq \ptregs_offset(%rsp), %rbp
+	.else
+		mov %rsp, %rbp
+	.endif
+	btr $63, %rbp
+#endif
+.endm
+
 #endif /* CONFIG_X86_64 */
 
 /*
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index b846875..7f492e2 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -431,6 +431,7 @@ END(irq_entries_start)
 	ALLOC_PT_GPREGS_ON_STACK
 	SAVE_C_REGS
 	SAVE_EXTRA_REGS
+	ENCODE_FRAME_POINTER
 
 	testb	$3, CS(%rsp)
 	jz	1f
@@ -893,6 +894,7 @@ ENTRY(xen_failsafe_callback)
 	ALLOC_PT_GPREGS_ON_STACK
 	SAVE_C_REGS
 	SAVE_EXTRA_REGS
+	ENCODE_FRAME_POINTER
 	jmp	error_exit
 END(xen_failsafe_callback)
 
@@ -936,6 +938,7 @@ ENTRY(paranoid_entry)
 	cld
 	SAVE_C_REGS 8
 	SAVE_EXTRA_REGS 8
+	ENCODE_FRAME_POINTER 8
 	movl	$1, %ebx
 	movl	$MSR_GS_BASE, %ecx
 	rdmsr
@@ -983,6 +986,7 @@ ENTRY(error_entry)
 	cld
 	SAVE_C_REGS 8
 	SAVE_EXTRA_REGS 8
+	ENCODE_FRAME_POINTER 8
 	xorl	%ebx, %ebx
 	testb	$3, CS+8(%rsp)
 	jz	.Lerror_kernelspace
@@ -1165,6 +1169,7 @@ ENTRY(nmi)
 	pushq	%r13		/* pt_regs->r13 */
 	pushq	%r14		/* pt_regs->r14 */
 	pushq	%r15		/* pt_regs->r15 */
+	ENCODE_FRAME_POINTER
 
 	/*
 	 * At this point we no longer need to worry about stack damage
@@ -1182,7 +1187,7 @@ ENTRY(nmi)
 	 * do_nmi doesn't modify pt_regs.
 	 */
 	SWAPGS
-	jmp	restore_c_regs_and_iret
+	jmp	restore_regs_and_iret
 
 .Lnmi_from_kernel:
 	/*
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 61c6e95..6d461ee 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -14,6 +14,7 @@ struct unwind_state {
 	int graph_idx;
 #ifdef CONFIG_FRAME_POINTER
 	unsigned long *bp;
+	struct pt_regs *regs;
 #endif
 };
 
@@ -35,6 +36,11 @@ static inline unsigned long *unwind_get_return_address_ptr(struct unwind_state *
 
 unsigned long unwind_get_return_address(struct unwind_state *state);
 
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+{
+	return state->regs;
+}
+
 #else /* !CONFIG_FRAME_POINTER */
 
 static inline unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
@@ -50,6 +56,11 @@ static inline unsigned long unwind_get_return_address(struct unwind_state *state
 	return *state->sp;
 }
 
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+{
+	return NULL;
+}
+
 #endif /* CONFIG_FRAME_POINTER */
 
 static inline unsigned long *unwind_get_stack_ptr(struct unwind_state *state)
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 1234480..2536353 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -18,6 +18,31 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
 }
 EXPORT_SYMBOL_GPL(unwind_get_return_address);
 
+/*
+ * This determines if the frame pointer actually contains an encoded pointer to
+ * pt_regs on the stack.  See ENCODE_FRAME_POINTER.
+ */
+static struct pt_regs *decode_frame_pointer(struct unwind_state *state,
+					    unsigned long *bp)
+{
+	struct pt_regs *regs = (struct pt_regs *)bp;
+	unsigned long *task_begin = task_stack_page(state->task);
+	unsigned long *task_end   = task_stack_page(state->task) + THREAD_SIZE;
+
+	if (test_and_set_bit(BITS_PER_LONG - 1, (unsigned long *)&regs))
+		return NULL;
+
+	if (on_stack(&state->stack_info, regs, sizeof(*regs)))
+		return regs;
+
+	if ((unsigned long *)regs >= task_begin &&
+	    (unsigned long *)regs < task_end &&
+	    (unsigned long *)(regs + 1) <= task_end)
+		return regs;
+
+	return NULL;
+}
+
 static unsigned long *update_stack_state(struct unwind_state *state, void *addr,
 					 size_t len)
 {
@@ -58,14 +83,32 @@ static bool unwind_next_frame_bp(struct unwind_state *state, unsigned long *bp)
 	return true;
 }
 
+static bool unwind_next_frame_regs(struct unwind_state *state,
+				   struct pt_regs *regs)
+{
+	update_stack_state(state, regs, sizeof(*regs));
+	if (state->stack_info.type == STACK_TYPE_UNKNOWN)
+		return false;
+
+	state->regs = regs;
+
+	return unwind_next_frame_bp(state, (unsigned long *)regs->bp);
+}
+
 bool unwind_next_frame(struct unwind_state *state)
 {
+	struct pt_regs *regs;
 	unsigned long *bp;
 
+	state->regs = NULL;
+
 	if (unwind_done(state))
 		return false;
 
 	bp = (unsigned long *)*state->bp;
+	regs = decode_frame_pointer(state, bp);
+	if (regs)
+		return unwind_next_frame_regs(state, regs);
 
 	return unwind_next_frame_bp(state, bp);
 }
@@ -79,6 +122,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
 	state->task = task;
 	state->sp = sp;
 	state->bp = get_frame_pointer(task, regs);
+	state->regs = NULL;
 
 	get_stack_info(sp, state->task, &state->stack_info, &state->stack_mask);
 
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ