[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20120929134957.GA30257@somewhere>
Date: Sat, 29 Sep 2012 15:50:07 +0200
From: Frederic Weisbecker <fweisbec@...il.com>
To: "Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
Cc: Sasha Levin <levinsasha928@...il.com>,
Dave Jones <davej@...hat.com>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: Re: rcu: eqs related warnings in linux-next
On Sat, Sep 29, 2012 at 06:37:37AM -0700, Paul E. McKenney wrote:
> On Sat, Sep 29, 2012 at 02:25:04PM +0200, Frederic Weisbecker wrote:
> > 2012/9/29 Sasha Levin <levinsasha928@...il.com>:
> > > Maybe I could help here a bit.
> > >
> > > lappy linux # addr2line -i -e vmlinux ffffffff8111d45f
> > > /usr/src/linux/kernel/timer.c:549
> > > /usr/src/linux/include/linux/jump_label.h:101
> > > /usr/src/linux/include/trace/events/timer.h:44
> > > /usr/src/linux/kernel/timer.c:601
> > > /usr/src/linux/kernel/timer.c:734
> > > /usr/src/linux/kernel/timer.c:886
> > >
> > > Which means that it was about to:
> > >
> > > debug_object_activate(timer, &timer_debug_descr);
>
> Understood and agreed, hence my severe diagnostic patch.
>
> > I can't find anything in the debug object code that might fault.
> > I was suspecting some per cpu allocated memory: per cpu allocation
> > sometimes use vmalloc
> > which uses lazy paging using faults. But I can't find such thing there.
> >
> > May be there is some faulting specific to KVM...
>
> Sasha, is the easily reproducible? If so, could you please try the
> previous patch? It will likely give us more information on where
> this bug really lives. (Yes, it might totally obscure the bug, but
> in that case we will just need to try some other perturbation.)
Isn't your patch actually removing the timer? But if so, we won't fault
anymore, or may be you want to check if we fault also outside the timer?
Just in case, I'm posting a second patch that dumps the regs when we
fault in the middle of an RCU user mode API. This way we can find
the precise rip where we fault:
---
>From db4ef9708e606754ac8a3f83b9f293383d263108 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@...il.com>
Date: Sat, 29 Sep 2012 14:16:09 +0200
Subject: [PATCH] rcu: Debug nasty rcu user mode API recursion
Add some debug code to chase down the origin of the fault.
Not-Signed-off-by: Frederic Weisbecker <fweisbec@...il.com>
---
arch/x86/mm/fault.c | 1 +
include/linux/rcupdate.h | 1 +
kernel/rcutree.c | 32 ++++++++++++++++++++++++++++++++
kernel/rcutree.h | 1 +
4 files changed, 35 insertions(+)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a530b23..a5f0eb5 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1232,6 +1232,7 @@ good_area:
dotraplinkage void __kprobes
do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
+ rcu_check_user_recursion(regs);
exception_enter(regs);
__do_page_fault(regs, error_code);
exception_exit(regs);
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 7c968e4..14ba908 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -199,6 +199,7 @@ extern void rcu_user_enter_after_irq(void);
extern void rcu_user_exit_after_irq(void);
extern void rcu_user_hooks_switch(struct task_struct *prev,
struct task_struct *next);
+extern void rcu_check_user_recursion(struct pt_regs *regs);
#else
static inline void rcu_user_enter(void) { }
static inline void rcu_user_exit(void) { }
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 4fb2376..63b84f5 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -405,6 +405,20 @@ void rcu_idle_enter(void)
EXPORT_SYMBOL_GPL(rcu_idle_enter);
#ifdef CONFIG_RCU_USER_QS
+void rcu_check_user_recursion(struct pt_regs *regs)
+{
+ unsigned long flags;
+ static int printed;
+
+ local_irq_save(flags);
+ if (__this_cpu_read(rcu_dynticks.recursion) && !printed) {
+ printed = 1;
+ printk("Found recursion\n");
+ show_regs(regs);
+ }
+ local_irq_restore(flags);
+}
+
/**
* rcu_user_enter - inform RCU that we are resuming userspace.
*
@@ -433,10 +447,20 @@ void rcu_user_enter(void)
local_irq_save(flags);
rdtp = &__get_cpu_var(rcu_dynticks);
+ if (WARN_ON_ONCE(rdtp->recursion)) {
+ local_irq_restore(flags);
+ return;
+ }
+
+ rdtp->recursion = true;
+ barrier();
+
if (!rdtp->ignore_user_qs && !rdtp->in_user) {
rdtp->in_user = true;
rcu_eqs_enter(true);
}
+ rdtp->recursion = false;
+
local_irq_restore(flags);
}
@@ -590,10 +614,18 @@ void rcu_user_exit(void)
local_irq_save(flags);
rdtp = &__get_cpu_var(rcu_dynticks);
+ if (WARN_ON_ONCE(rdtp->recursion)) {
+ local_irq_restore(flags);
+ return;
+ }
+
+ rdtp->recursion = true;
+ barrier();
if (rdtp->in_user) {
rdtp->in_user = false;
rcu_eqs_exit(true);
}
+ rdtp->recursion = false;
local_irq_restore(flags);
}
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 5faf05d..1bde9d5 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -103,6 +103,7 @@ struct rcu_dynticks {
int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
#ifdef CONFIG_RCU_USER_QS
+ bool recursion;
bool ignore_user_qs; /* Treat userspace as extended QS or not */
bool in_user; /* Is the CPU in userland from RCU POV? */
#endif
--
1.7.9.5
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists