lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue,  2 Apr 2019 22:03:53 +0200
From:   Daniel Bristot de Oliveira <bristot@...hat.com>
To:     linux-kernel@...r.kernel.org
Cc:     Steven Rostedt <rostedt@...dmis.org>,
        Arnaldo Carvalho de Melo <acme@...nel.org>,
        Ingo Molnar <mingo@...hat.com>,
        Andy Lutomirski <luto@...nel.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        Borislav Petkov <bp@...en8.de>,
        Peter Zijlstra <peterz@...radead.org>,
        "H. Peter Anvin" <hpa@...or.com>,
        "Joel Fernandes (Google)" <joel@...lfernandes.org>,
        Jiri Olsa <jolsa@...hat.com>,
        Namhyung Kim <namhyung@...nel.org>,
        Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
        Tommaso Cucinotta <tommaso.cucinotta@...tannapisa.it>,
        Romulo Silva de Oliveira <romulo.deoliveira@...c.br>,
        Clark Williams <williams@...hat.com>, x86@...nel.org
Subject: [RFC PATCH 1/7] x86/entry: Add support for early task context tracking

Currently, the identification of the context is made through the
preempt_counter, but it is set after the execution of the first functions
of the IRQ/NMI, causing potential problems in the identification of the
current status. For instance, ftrace/perf might drop events in the early
stage of IRQ/NMI handlers because the preempt_counter was not set.

The proposed approach is to use a dedicated per-cpu variable to keep
track of the context of execution, with values set before the execution
of the first C function of the interrupt handler.

This is a PoC in the x86_64.

Signed-off-by: Daniel Bristot de Oliveira <bristot@...hat.com>
Cc: Steven Rostedt <rostedt@...dmis.org>
Cc: Arnaldo Carvalho de Melo <acme@...nel.org>
Cc: Ingo Molnar <mingo@...hat.com>
Cc: Andy Lutomirski <luto@...nel.org>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Borislav Petkov <bp@...en8.de>
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: "H. Peter Anvin" <hpa@...or.com>
Cc: "Joel Fernandes (Google)" <joel@...lfernandes.org>
Cc: Jiri Olsa <jolsa@...hat.com>
Cc: Namhyung Kim <namhyung@...nel.org>
Cc: Alexander Shishkin <alexander.shishkin@...ux.intel.com>
Cc: Tommaso Cucinotta <tommaso.cucinotta@...tannapisa.it>
Cc: Romulo Silva de Oliveira <romulo.deoliveira@...c.br>
Cc: Clark Williams <williams@...hat.com>
Cc: linux-kernel@...r.kernel.org
Cc: x86@...nel.org
---
 arch/x86/entry/entry_64.S       |  9 +++++++++
 arch/x86/include/asm/irqflags.h | 30 ++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/common.c    |  4 ++++
 include/linux/irqflags.h        |  4 ++++
 kernel/softirq.c                |  5 ++++-
 5 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1f0efdb7b629..1471b544241f 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -545,6 +545,7 @@ ENTRY(interrupt_entry)
 	testb	$3, CS+8(%rsp)
 	jz	1f
 
+	TASK_CONTEXT_SET_BIT context=TASK_CTX_IRQ
 	/*
 	 * IRQ from user mode.
 	 *
@@ -561,6 +562,8 @@ ENTRY(interrupt_entry)
 
 1:
 	ENTER_IRQ_STACK old_rsp=%rdi save_ret=1
+
+	TASK_CONTEXT_SET_BIT context=TASK_CTX_IRQ
 	/* We entered an interrupt context - irqs are off: */
 	TRACE_IRQS_OFF
 
@@ -586,6 +589,7 @@ ret_from_intr:
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
 
+	TASK_CONTEXT_RESET_BIT context=TASK_CTX_IRQ
 	LEAVE_IRQ_STACK
 
 	testb	$3, CS(%rsp)
@@ -780,6 +784,7 @@ ENTRY(\sym)
 	call	interrupt_entry
 	UNWIND_HINT_REGS indirect=1
 	call	\do_sym	/* rdi points to pt_regs */
+	TASK_CONTEXT_RESET_BIT context=TASK_CTX_IRQ
 	jmp	ret_from_intr
 END(\sym)
 _ASM_NOKPROBE(\sym)
@@ -1403,9 +1408,11 @@ ENTRY(nmi)
 	 * done with the NMI stack.
 	 */
 
+	TASK_CONTEXT_SET_BIT context=TASK_CTX_NMI
 	movq	%rsp, %rdi
 	movq	$-1, %rsi
 	call	do_nmi
+	TASK_CONTEXT_RESET_BIT context=TASK_CTX_NMI
 
 	/*
 	 * Return back to user mode.  We must *not* do the normal exit
@@ -1615,10 +1622,12 @@ end_repeat_nmi:
 	call	paranoid_entry
 	UNWIND_HINT_REGS
 
+	TASK_CONTEXT_SET_BIT context=TASK_CTX_NMI
 	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
 	movq	%rsp, %rdi
 	movq	$-1, %rsi
 	call	do_nmi
+	TASK_CONTEXT_RESET_BIT context=TASK_CTX_NMI
 
 	/* Always restore stashed CR3 value (see paranoid_entry) */
 	RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 058e40fed167..5a12bc3ea02b 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -3,6 +3,7 @@
 #define _X86_IRQFLAGS_H_
 
 #include <asm/processor-flags.h>
+#include <asm/percpu.h>
 
 #ifndef __ASSEMBLY__
 
@@ -202,4 +203,33 @@ static inline int arch_irqs_disabled(void)
 #endif
 #endif /* __ASSEMBLY__ */
 
+#ifdef CONFIG_X86_64
+/*
+ * NOTE: I know I need to implement this to the 32 bits as well.
+ * But... this is just a POC.
+ */
+#define ARCH_HAS_TASK_CONTEXT   1
+
+#define TASK_CTX_THREAD		0x0
+#define TASK_CTX_SOFTIRQ	0x1
+#define TASK_CTX_IRQ		0x2
+#define TASK_CTX_NMI		0x4
+
+#ifdef __ASSEMBLY__
+.macro TASK_CONTEXT_SET_BIT context:req
+	orb	$\context, PER_CPU_VAR(task_context)
+.endm
+
+.macro TASK_CONTEXT_RESET_BIT context:req
+	andb	$~\context, PER_CPU_VAR(task_context)
+.endm
+#else /* __ASSEMBLY__ */
+DECLARE_PER_CPU(unsigned char, task_context);
+
+static __always_inline void task_context_set(unsigned char context)
+{
+	raw_cpu_write_1(task_context, context);
+}
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_X86_64 */
 #endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cb28e98a0659..1acbec22319b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1531,6 +1531,8 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
 DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
 EXPORT_PER_CPU_SYMBOL(__preempt_count);
 
+DEFINE_PER_CPU(unsigned char, task_context) __visible = 0;
+
 /* May not be marked __init: used by software suspend */
 void syscall_init(void)
 {
@@ -1604,6 +1606,8 @@ EXPORT_PER_CPU_SYMBOL(current_task);
 DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
 EXPORT_PER_CPU_SYMBOL(__preempt_count);
 
+DEFINE_PER_CPU(unsigned char, task_context) __visible = 0;
+
 /*
  * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
  * the top of the kernel stack.  Use an extra percpu variable to track the
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 21619c92c377..1c3473bbe5d2 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -168,4 +168,8 @@ do {						\
 
 #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags)
 
+#ifndef ARCH_HAS_TASK_CONTEXT
+#define task_context_set(context) do {} while (0)
+#endif
+
 #endif
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 10277429ed84..324de769dc07 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -410,8 +410,11 @@ void irq_exit(void)
 #endif
 	account_irq_exit_time(current);
 	preempt_count_sub(HARDIRQ_OFFSET);
-	if (!in_interrupt() && local_softirq_pending())
+	if (!in_interrupt() && local_softirq_pending()) {
+		task_context_set(TASK_CTX_SOFTIRQ);
 		invoke_softirq();
+		task_context_set(TASK_CTX_IRQ);
+	}
 
 	tick_irq_exit();
 	rcu_irq_exit();
-- 
2.20.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ