[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20190502183058.GD2650@hirez.programming.kicks-ass.net>
Date: Thu, 2 May 2019 20:30:58 +0200
From: Peter Zijlstra <peterz@...radead.org>
To: Linus Torvalds <torvalds@...ux-foundation.org>
Cc: Steven Rostedt <rostedt@...dmis.org>,
Linux List Kernel Mailing <linux-kernel@...r.kernel.org>,
Ingo Molnar <mingo@...nel.org>,
Andrew Morton <akpm@...ux-foundation.org>,
Andy Lutomirski <luto@...nel.org>,
Nicolai Stange <nstange@...e.de>,
Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
"H. Peter Anvin" <hpa@...or.com>,
the arch/x86 maintainers <x86@...nel.org>,
Josh Poimboeuf <jpoimboe@...hat.com>,
Jiri Kosina <jikos@...nel.org>,
Miroslav Benes <mbenes@...e.cz>,
Petr Mladek <pmladek@...e.com>,
Joe Lawrence <joe.lawrence@...hat.com>,
Shuah Khan <shuah@...nel.org>,
Konrad Rzeszutek Wilk <konrad.wilk@...cle.com>,
Tim Chen <tim.c.chen@...ux.intel.com>,
Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
Mimi Zohar <zohar@...ux.ibm.com>,
Juergen Gross <jgross@...e.com>,
Nick Desaulniers <ndesaulniers@...gle.com>,
Nayna Jain <nayna@...ux.ibm.com>,
Masahiro Yamada <yamada.masahiro@...ionext.com>,
Joerg Roedel <jroedel@...e.de>,
"open list:KERNEL SELFTEST FRAMEWORK"
<linux-kselftest@...r.kernel.org>, stable <stable@...r.kernel.org>
Subject: Re: [RFC][PATCH 1/2] x86: Allow breakpoints to emulate call functions
On Thu, May 02, 2019 at 08:18:11PM +0200, Peter Zijlstra wrote:
> ARGH; I knew it was too pretty :/ Yes, something like what you suggest
> will be needed, I'll go look at that once my brain recovers a bit from
> staring at entry code all day.
I forgot I can just run the thing, and it works!
---
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 7b23431be5cb..73b7bca8712f 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -203,7 +203,7 @@
.Lend_\@:
.endm
-.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
+.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 clear_csh=1
cld
PUSH_GS
pushl %fs
@@ -225,7 +225,7 @@
/* Switch to kernel stack if necessary */
.if \switch_stacks > 0
- SWITCH_TO_KERNEL_STACK
+ SWITCH_TO_KERNEL_STACK \clear_csh
.endif
.endm
@@ -377,8 +377,9 @@
#define CS_FROM_ENTRY_STACK (1 << 31)
#define CS_FROM_USER_CR3 (1 << 30)
+#define CS_FROM_INT3 (1 << 29)
-.macro SWITCH_TO_KERNEL_STACK
+.macro SWITCH_TO_KERNEL_STACK clear_csh=1
ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
@@ -391,12 +392,13 @@
* that register for the time this macro runs
*/
+ .if \clear_csh
/*
- * The high bits of the CS dword (__csh) are used for
- * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case
- * hardware didn't do this for us.
+ * The high bits of the CS dword (__csh) are used for CS_FROM_*. Clear
+ * them in case hardware didn't do this for us.
*/
andl $(0x0000ffff), PT_CS(%esp)
+ .endif
/* Are we on the entry stack? Bail out if not! */
movl PER_CPU_VAR(cpu_entry_area), %ecx
@@ -1019,6 +1021,40 @@ ENTRY(entry_INT80_32)
/* Restore user state */
RESTORE_REGS pop=4 # skip orig_eax/error_code
.Lirq_return:
+ testl $CS_FROM_INT3, 4(%esp)
+ jz .Lno_iret_fixup
+
+ /*
+ * Undo the magic from ENTRY(int3), in particular consider the case
+ * where regs->sp has been modified.
+ *
+ * Reconstruct the 3 entry IRET frame right after the (modified)
+ * regs->sp without lowering %esp in between, such that an NMI in the
+ * middle doesn't scribble our stack.
+ */
+
+ pushl %eax
+ pushl %ecx
+ movl 5*4(%esp), %eax # (modified) regs->sp
+
+ movl 4*4(%esp), %ecx # flags
+ movl %ecx, -4(%eax)
+
+ movl 3*4(%esp), %ecx # cs
+ andl $0x0000ffff, %ecx
+ movl %ecx, -8(%eax)
+
+ movl 2*4(%esp), %ecx # ip
+ movl %ecx, -12(%eax)
+
+ movl 1*4(%esp), %ecx # eax
+ movl %ecx, -16(%eax)
+
+ popl %ecx
+ lea -16(%eax), %esp
+ popl %eax
+
+.Lno_iret_fixup:
/*
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
* when returning from IPI handler and when returning from
@@ -1477,9 +1513,57 @@ END(nmi)
ENTRY(int3)
ASM_CLAC
+
+ /*
+ * The high bits of the CS dword (__csh) are used for CS_FROM_*. Clear
+ * them in case hardware didn't do this for us.
+ */
+ andl $0x0000ffff, 4(%esp)
+
+#ifdef CONFIG_VM86
+ testl $X86_EFLAGS_VM, 8(%esp)
+ jnz .Lfrom_usermode_no_gap
+#endif
+ testl $SEGMENT_RPL_MASK, 4(%esp)
+ jnz .Lfrom_usermode_no_gap
+
+ /*
+ * Here from kernel mode; so the (exception) stack looks like:
+ *
+ * 12(esp) - <previous context>
+ * 8(esp) - flags
+ * 4(esp) - cs
+ * 0(esp) - ip
+ *
+ * Lets build a 5 entry IRET frame after that, such that struct pt_regs
+ * is complete and in particular regs->sp is correct. This gives us
+ * the original 3 enties as gap:
+ *
+ * 32(esp) - <previous context>
+ * 28(esp) - orig_flags / gap
+ * 24(esp) - orig_cs / gap
+ * 20(esp) - orig_ip / gap
+ * 16(esp) - ss
+ * 12(esp) - sp
+ * 8(esp) - flags
+ * 4(esp) - cs
+ * 0(esp) - ip
+ */
+ pushl %ss # ss
+ pushl %esp # sp (points at ss)
+ pushl 4*4(%esp) # flags
+ pushl 4*4(%esp) # cs
+ pushl 4*4(%esp) # ip
+
+ add $16, 12(%esp) # point sp back at the previous context
+
+ orl $CS_FROM_INT3, 4(%esp) # mark magic IRET
+
+.Lfrom_usermode_no_gap:
+
pushl $-1 # mark this as an int
- SAVE_ALL switch_stacks=1
+ SAVE_ALL switch_stacks=1 clear_csh=0
ENCODE_FRAME_POINTER
TRACE_IRQS_OFF
xorl %edx, %edx # zero error code
Powered by blists - more mailing lists