[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <f2ca7336162b6dc45f413cfe4e0056e6aa32e7ed.1559051152.git.bristot@redhat.com>
Date: Tue, 28 May 2019 17:16:23 +0200
From: Daniel Bristot de Oliveira <bristot@...hat.com>
To: linux-kernel@...r.kernel.org
Cc: williams@...hat.com, daniel@...stot.me,
"Steven Rostedt (VMware)" <rostedt@...dmis.org>,
Ingo Molnar <mingo@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
Thomas Gleixner <tglx@...utronix.de>,
"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>,
Matthias Kaehlcke <mka@...omium.org>,
"Joel Fernandes (Google)" <joel@...lfernandes.org>,
Frederic Weisbecker <frederic@...nel.org>,
Yangtao Li <tiny.windzz@...il.com>,
Tommaso Cucinotta <tommaso.cucinotta@...tannapisa.it>
Subject: [RFC 2/3] preempt_tracer: Disable IRQ while starting/stopping due to a preempt_counter change
The preempt_disable/enable tracepoint only traces in the disable <-> enable
case, which is correct. But think about this case:
---------------------------- %< ------------------------------
THREAD IRQ
| |
preempt_disable() {
__preempt_count_add(1)
-------> smp_apic_timer_interrupt() {
preempt_disable()
do not trace (preempt count >= 1)
....
preempt_enable()
do not trace (preempt count >= 1)
}
trace_preempt_disable();
}
---------------------------- >% ------------------------------
The tracepoint will be skipped.
It is possible to observe this problem using this kernel module:
http://bristot.me/files/efficient_verification/wip.tar.gz [*]
and doing the following trace:
# cd /sys/kernel/debug/tracing/
# echo 1 > snapshot
# cat available_events | grep preempt_ > set_event
# echo irq_vectors >> /sys/kernel/debug/tracing/set_event
# insmod wip.ko
/* wait for a snapshot creation */
# tail -100 snapshot
[...]
tail-5572 [001] ....1.. 2888.401184: preempt_enable: caller=_raw_spin_unlock_irqrestore+0x2a/0x70 parent= (null)
tail-5572 [001] ....1.. 2888.401184: preempt_disable: caller=migrate_disable+0x8b/0x1e0 parent=migrate_disable+0x8b/0x1e0
tail-5572 [001] ....111 2888.401184: preempt_enable: caller=migrate_disable+0x12f/0x1e0 parent=migrate_disable+0x12f/0x1e0
tail-5572 [001] d..h212 2888.401189: local_timer_entry: vector=236
tail-5572 [001] dN.h512 2888.401192: process_event: event sched_waking not expected in the state preemptive
tail-5572 [001] dN.h512 2888.401200: <stack trace>
=> process_event
=> __handle_event
=> ttwu_do_wakeup
=> try_to_wake_up
=> invoke_rcu_core
=> rcu_check_callbacks
=> update_process_times
=> tick_sched_handle
=> tick_sched_timer
=> __hrtimer_run_queues
=> hrtimer_interrupt
=> smp_apic_timer_interrupt
=> apic_timer_interrupt
=> trace_event_raw_event_preemptirq_template
=> trace_preempt_off
=> get_page_from_freelist
=> __alloc_pages_nodemask
=> __handle_mm_fault
=> handle_mm_fault
=> __do_page_fault
=> do_page_fault
=> async_page_fault
To avoid skipping the trace, the change in the counter should be "atomic"
with the start/stop, w.r.t the interrupts.
Disable interrupts while the adding/starting stopping/subtracting.
Signed-off-by: Daniel Bristot de Oliveira <bristot@...hat.com>
Cc: "Steven Rostedt (VMware)" <rostedt@...dmis.org>
Cc: Ingo Molnar <mingo@...hat.com>
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: "Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
Cc: Matthias Kaehlcke <mka@...omium.org>
Cc: "Joel Fernandes (Google)" <joel@...lfernandes.org>
Cc: Frederic Weisbecker <frederic@...nel.org>
Cc: Yangtao Li <tiny.windzz@...il.com>
Cc: Tommaso Cucinotta <tommaso.cucinotta@...tannapisa.it>
Cc: linux-kernel@...r.kernel.org
---
[*] with some luck we will talk about this at the Plumbers.
kernel/sched/core.c | 48 +++++++++++++++++++++++++++++++++------------
kernel/softirq.c | 2 +-
2 files changed, 37 insertions(+), 13 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8c0b414e45dc..be4117d7384f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3163,6 +3163,16 @@ void preempt_latency_start(int val)
}
}
+static inline void preempt_add_start_latency(int val)
+{
+ unsigned long flags;
+
+ raw_local_irq_save(flags);
+ __preempt_count_add(val);
+ preempt_latency_start(val);
+ raw_local_irq_restore(flags);
+}
+
void preempt_count_add(int val)
{
#ifdef CONFIG_DEBUG_PREEMPT
@@ -3172,7 +3182,7 @@ void preempt_count_add(int val)
if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
return;
#endif
- __preempt_count_add(val);
+ preempt_add_start_latency(val);
#ifdef CONFIG_DEBUG_PREEMPT
/*
* Spinlock count overflowing soon?
@@ -3180,7 +3190,6 @@ void preempt_count_add(int val)
DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
PREEMPT_MASK - 10);
#endif
- preempt_latency_start(val);
}
EXPORT_SYMBOL(preempt_count_add);
NOKPROBE_SYMBOL(preempt_count_add);
@@ -3195,6 +3204,16 @@ void preempt_latency_stop(int val)
trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
}
+static inline void preempt_sub_stop_latency(int val)
+{
+ unsigned long flags;
+
+ raw_local_irq_save(flags);
+ preempt_latency_stop(val);
+ __preempt_count_sub(val);
+ raw_local_irq_restore(flags);
+}
+
void preempt_count_sub(int val)
{
#ifdef CONFIG_DEBUG_PREEMPT
@@ -3211,8 +3230,7 @@ void preempt_count_sub(int val)
return;
#endif
- preempt_latency_stop(val);
- __preempt_count_sub(val);
+ preempt_sub_stop_latency(val);
}
EXPORT_SYMBOL(preempt_count_sub);
NOKPROBE_SYMBOL(preempt_count_sub);
@@ -3220,6 +3238,16 @@ NOKPROBE_SYMBOL(preempt_count_sub);
#else
static inline void preempt_latency_start(int val) { }
static inline void preempt_latency_stop(int val) { }
+
+static inline void preempt_sub_stop_latency(int val)
+{
+ __preempt_count_sub(val);
+}
+
+static inline void preempt_add_start_latency(int val)
+{
+ __preempt_count_add(val);
+}
#endif
static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
@@ -3585,11 +3613,9 @@ static void __sched notrace preempt_schedule_common(void)
* traced. The other to still record the preemption latency,
* which can also be traced by the function tracer.
*/
- preempt_disable_notrace();
- preempt_latency_start(1);
+ preempt_add_start_latency(1);
__schedule(true);
- preempt_latency_stop(1);
- preempt_enable_no_resched_notrace();
+ preempt_sub_stop_latency(1);
/*
* Check again in case we missed a preemption opportunity
@@ -3653,8 +3679,7 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
* traced. The other to still record the preemption latency,
* which can also be traced by the function tracer.
*/
- preempt_disable_notrace();
- preempt_latency_start(1);
+ preempt_add_start_latency(1);
/*
* Needs preempt disabled in case user_exit() is traced
* and the tracer calls preempt_enable_notrace() causing
@@ -3664,8 +3689,7 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
__schedule(true);
exception_exit(prev_ctx);
- preempt_latency_stop(1);
- preempt_enable_no_resched_notrace();
+ preempt_sub_stop_latency(1);
} while (need_resched());
}
EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index c9ad89c3dfed..9c64522ecc76 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -130,9 +130,9 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
*/
if (softirq_count() == (cnt & SOFTIRQ_MASK))
trace_softirqs_off(ip);
- raw_local_irq_restore(flags);
preempt_latency_start(cnt);
+ raw_local_irq_restore(flags);
}
EXPORT_SYMBOL(__local_bh_disable_ip);
--
2.20.1
Powered by blists - more mailing lists