lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 17 Aug 2016 11:19:54 +0200
From:	Peter Zijlstra <peterz@...radead.org>
To:	Steven Rostedt <rostedt@...dmis.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	Ingo Molnar <mingo@...nel.org>,
	Alexander Shishkin <alexander.shishkin@...ux.intel.com>
Cc:	linux-kernel@...r.kernel.org
Subject: [RFC] ftrace / perf 'recursion'


blergh, now with LKML added...

---

Much like: d525211f9d1b ("perf: Fix irq_work 'tail' recursion")

I found another infinite recursion problem with irq_work:

 <IRQ>  [<ffffffff811bb985>] ? perf_output_begin_forward+0x5/0x1e0
 [<ffffffff81067835>] ? arch_irq_work_raise+0x5/0x40
 [<ffffffff811ba170>] ? perf_event_output_forward+0x30/0x60
 [<ffffffff81067835>] arch_irq_work_raise+0x5/0x40
 [<ffffffff811ab547>] irq_work_queue+0x97/0xa0
 [<ffffffff81067835>] ? arch_irq_work_raise+0x5/0x40
 [<ffffffff811ab547>] ? irq_work_queue+0x97/0xa0
 [<ffffffff811af88f>] __perf_event_overflow+0xcf/0x1b0
 [<ffffffff811afa0a>] perf_swevent_overflow+0x9a/0xc0
 [<ffffffff811afa8d>] perf_swevent_event+0x5d/0x80
 [<ffffffff811b0472>] perf_tp_event+0x1a2/0x1b0
 [<ffffffff81a559b0>] ? _raw_spin_trylock+0x30/0x30
 [<ffffffff8119dc73>] ? perf_ftrace_function_call+0x83/0xd0
 [<ffffffff8117db25>] ? ftrace_ops_assist_func+0xb5/0x110
 [<ffffffff8117db25>] ? ftrace_ops_assist_func+0xb5/0x110
 [<ffffffff810df52d>] ? do_send_sig_info+0x5d/0x80
 [<ffffffff81a559b0>] ? _raw_spin_trylock+0x30/0x30
 [<ffffffff8119db3f>] ? perf_trace_buf_alloc+0x1f/0xa0
 [<ffffffff8124740b>] ? kill_fasync+0x6b/0x90
 [<ffffffff81a559b0>] ? _raw_spin_trylock+0x30/0x30
 [<ffffffff8119dc73>] ? perf_ftrace_function_call+0x83/0xd0
 [<ffffffff81067753>] ? smp_irq_work_interrupt+0x33/0x40
 [<ffffffff810d6f20>] ? irq_enter+0x70/0x70
 [<ffffffff8119dcaf>] perf_ftrace_function_call+0xbf/0xd0
 [<ffffffff8117db25>] ? ftrace_ops_assist_func+0xb5/0x110
 [<ffffffff8117db25>] ftrace_ops_assist_func+0xb5/0x110
 [<ffffffff81067753>] ? smp_irq_work_interrupt+0x33/0x40
 [<ffffffff810d6f20>] ? irq_enter+0x70/0x70
 [<ffffffffa157e077>] 0xffffffffa157e077
 [<ffffffff8124740b>] ? kill_fasync+0x6b/0x90
 [<ffffffff810d6f25>] ? irq_exit+0x5/0xb0
 [<ffffffff810d6f25>] irq_exit+0x5/0xb0
 [<ffffffff81067753>] smp_irq_work_interrupt+0x33/0x40
 [<ffffffff810d6f25>] ? irq_exit+0x5/0xb0
 [<ffffffff81067753>] ? smp_irq_work_interrupt+0x33/0x40
 [<ffffffff81a580b9>] irq_work_interrupt+0x89/0x90
 <EOI>

Here every irq_work execution triggers another irq_work queue, which
gets us stuck in an IRQ loop ad infinitum.

This is through function tracing of irq_exit(), but the same can be done
through function tracing of pretty much anything else around there and
through the explicit IRQ_WORK_VECTOR tracepoints.

The only 'solution' is something like the below, which I absolutely
detest because it makes the irq_work code slower for everyone.

Also, this doesn't fix the problem for any other arch :/

I would much rather tag the whole irq_work thing notrace and remove the
tracepoints, but I'm sure that'll not be a popular solution either :/



---
 arch/x86/kernel/irq_work.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
index 3512ba607361..a8a7999f1147 100644
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -10,26 +10,41 @@
 #include <asm/apic.h>
 #include <asm/trace/irq_vectors.h>
 
+/*
+ * I'm sure header recursion will bite my head off
+ */
+#ifdef CONFIG_PERF_EVENTS
+extern int perf_swevent_get_recursion_context(void);
+extern void perf_swevent_put_recursion_context(int rctx);
+#else
+static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
+static inline void perf_swevent_put_recursion_context(int rctx)		{ }
+#endif
+
 static inline void __smp_irq_work_interrupt(void)
 {
 	inc_irq_stat(apic_irq_work_irqs);
 	irq_work_run();
 }
 
-__visible void smp_irq_work_interrupt(struct pt_regs *regs)
+__visible notrace void smp_irq_work_interrupt(struct pt_regs *regs)
 {
+	int rctx = perf_swevent_get_recursion_context();
 	ipi_entering_ack_irq();
 	__smp_irq_work_interrupt();
 	exiting_irq();
+	perf_swevent_put_recursionn_context(rctx);
 }
 
-__visible void smp_trace_irq_work_interrupt(struct pt_regs *regs)
+__visible notrace void smp_trace_irq_work_interrupt(struct pt_regs *regs)
 {
+	int rctx = perf_swevent_get_recursion_context();
 	ipi_entering_ack_irq();
 	trace_irq_work_entry(IRQ_WORK_VECTOR);
 	__smp_irq_work_interrupt();
 	trace_irq_work_exit(IRQ_WORK_VECTOR);
 	exiting_irq();
+	perf_swevent_put_recursionn_context(rctx);
 }
 
 void arch_irq_work_raise(void)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ