[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <040374ca9800988a0ed35ea9ddeb4a762c1371fa.1437690860.git.luto@kernel.org>
Date: Thu, 23 Jul 2015 15:37:46 -0700
From: Andy Lutomirski <luto@...nel.org>
To: X86 ML <x86@...nel.org>, linux-kernel@...r.kernel.org
Cc: Brian Gerst <brgerst@...il.com>,
Steven Rostedt <rostedt@...dmis.org>, Willy Tarreau <w@....eu>,
Borislav Petkov <bp@...en8.de>,
Thomas Gleixner <tglx@...utronix.de>,
Peter Zijlstra <peterz@...radead.org>,
Linus Torvalds <torvalds@...ux-foundation.org>,
Andy Lutomirski <luto@...nel.org>
Subject: [PATCH 1/3] x86/entry/64: Refactor IRQ stacks and make then NMI-safe
This will allow IRQ stacks to nest inside NMIs or similar entries
that can happen during IRQ stack setup or teardown.
The Xen code here has a confusing comment.
Signed-off-by: Andy Lutomirski <luto@...nel.org>
---
arch/x86/entry/entry_64.S | 72 ++++++++++++++++++++++++++------------------
arch/x86/kernel/cpu/common.c | 2 +-
arch/x86/kernel/process_64.c | 4 +++
3 files changed, 47 insertions(+), 31 deletions(-)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index d3033183ed70..5f7df8949fa7 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -491,6 +491,39 @@ ENTRY(irq_entries_start)
END(irq_entries_start)
/*
+ * Enters the IRQ stack if we're not already using it. NMI-safe. Clobbers
+ * flags and puts old RSP into old_rsp, and leaves all other GPRs alone.
+ * Requires kernel GSBASE.
+ *
+ * The invariant is that, if irq_count != 0, then we're either on the
+ * IRQ stack or an IST stack, even if an NMI interrupts IRQ stack entry
+ * or exit.
+ */
+.macro ENTER_IRQ_STACK old_rsp
+ movq %rsp, \old_rsp
+ cmpl $0, PER_CPU_VAR(irq_count)
+ jne 694f
+ movq PER_CPU_VAR(irq_stack_ptr), %rsp
+ /*
+ * Right now, we're on the irq stack with irq_count == 0. A nested
+ * IRQ stack switch could clobber the stack. That's fine: the stack
+ * is empty.
+ */
+694:
+ incl PER_CPU_VAR(irq_count)
+ pushq \old_rsp
+.endm
+
+/*
+ * Undoes ENTER_IRQ_STACK
+ */
+.macro LEAVE_IRQ_STACK
+ /* We need to be off the IRQ stack before decrementing irq_count. */
+ popq %rsp
+ decl PER_CPU_VAR(irq_count)
+.endm
+
+/*
* Interrupt entry/exit.
*
* Interrupt entry points save only callee clobbered registers in fast path.
@@ -518,17 +551,7 @@ END(irq_entries_start)
#endif
1:
- /*
- * Save previous stack pointer, optionally switch to interrupt stack.
- * irq_count is used to check if a CPU is already on an interrupt stack
- * or not. While this is essentially redundant with preempt_count it is
- * a little cheaper to use a separate counter in the PDA (short of
- * moving irq_enter into assembly, which would be too much work)
- */
- movq %rsp, %rdi
- incl PER_CPU_VAR(irq_count)
- cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
- pushq %rdi
+ ENTER_IRQ_STACK old_rsp=%rdi
/* We entered an interrupt context - irqs are off: */
TRACE_IRQS_OFF
@@ -548,10 +571,8 @@ common_interrupt:
ret_from_intr:
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- decl PER_CPU_VAR(irq_count)
- /* Restore saved previous stack */
- popq %rsp
+ LEAVE_IRQ_STACK
testb $3, CS(%rsp)
jz retint_kernel
@@ -863,14 +884,9 @@ bad_gs:
/* Call softirq on interrupt stack. Interrupts are off. */
ENTRY(do_softirq_own_stack)
- pushq %rbp
- mov %rsp, %rbp
- incl PER_CPU_VAR(irq_count)
- cmove PER_CPU_VAR(irq_stack_ptr), %rsp
- push %rbp /* frame pointer backlink */
+ ENTER_IRQ_STACK old_rsp=%r11
call __do_softirq
- leaveq
- decl PER_CPU_VAR(irq_count)
+ LEAVE_IRQ_STACK
ret
END(do_softirq_own_stack)
@@ -889,25 +905,21 @@ idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
* So, on entry to the handler we detect whether we interrupted an
* existing activation in its critical region -- if so, we pop the current
* activation and restart the handler using the previous one.
+ *
+ * XXX: I have no idea what this comment is talking about. --luto
*/
ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */
-
+ ENTER_IRQ_STACK old_rsp=%r11
/*
* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
* see the correct pointer to the pt_regs
*/
- movq %rdi, %rsp /* we don't return, adjust the stack frame */
-11: incl PER_CPU_VAR(irq_count)
- movq %rsp, %rbp
- cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
- pushq %rbp /* frame pointer backlink */
call xen_evtchn_do_upcall
- popq %rsp
- decl PER_CPU_VAR(irq_count)
+ LEAVE_IRQ_STACK
#ifndef CONFIG_PREEMPT
call xen_maybe_preempt_hcall
#endif
- jmp error_exit
+ ret
END(xen_do_hypervisor_callback)
/*
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 1c528b06f802..e9968531ce56 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1161,7 +1161,7 @@ EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(char *, irq_stack_ptr) =
init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
-DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
+DEFINE_PER_CPU(unsigned int, irq_count) __visible;
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
EXPORT_PER_CPU_SYMBOL(__preempt_count);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 0831ba3bcf95..65783f6eb22c 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -280,6 +280,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
unsigned fsindex, gsindex;
fpu_switch_t fpu_switch;
+#ifdef CONFIG_DEBUG_ENTRY
+ WARN_ON(this_cpu_read(irq_count));
+#endif
+
fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
/* We must save %fs and %gs before load_TLS() because
--
2.4.3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists