lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Sat, 29 Sep 2012 15:50:07 +0200
From:	Frederic Weisbecker <fweisbec@...il.com>
To:	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
Cc:	Sasha Levin <levinsasha928@...il.com>,
	Dave Jones <davej@...hat.com>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: Re: rcu: eqs related warnings in linux-next

On Sat, Sep 29, 2012 at 06:37:37AM -0700, Paul E. McKenney wrote:
> On Sat, Sep 29, 2012 at 02:25:04PM +0200, Frederic Weisbecker wrote:
> > 2012/9/29 Sasha Levin <levinsasha928@...il.com>:
> > > Maybe I could help here a bit.
> > >
> > > lappy linux # addr2line -i -e vmlinux ffffffff8111d45f
> > > /usr/src/linux/kernel/timer.c:549
> > > /usr/src/linux/include/linux/jump_label.h:101
> > > /usr/src/linux/include/trace/events/timer.h:44
> > > /usr/src/linux/kernel/timer.c:601
> > > /usr/src/linux/kernel/timer.c:734
> > > /usr/src/linux/kernel/timer.c:886
> > >
> > > Which means that it was about to:
> > >
> > >         debug_object_activate(timer, &timer_debug_descr);
> 
> Understood and agreed, hence my severe diagnostic patch.
> 
> > I can't find anything in the debug object code that might fault.
> > I was suspecting some per cpu allocated memory: per cpu allocation
> > sometimes use vmalloc
> > which uses lazy paging using faults. But I can't find such thing there.
> > 
> > May be there is some faulting specific to KVM...
> 
> Sasha, is the easily reproducible?  If so, could you please try the
> previous patch?  It will likely give us more information on where
> this bug really lives.  (Yes, it might totally obscure the bug, but
> in that case we will just need to try some other perturbation.)

Isn't your patch actually removing the timer? But if so, we won't fault
anymore, or may be you want to check if we fault also outside the timer?

Just in case, I'm posting a second patch that dumps the regs when we
fault in the middle of an RCU user mode API. This way we can find
the precise rip where we fault:

---
>From db4ef9708e606754ac8a3f83b9f293383d263108 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@...il.com>
Date: Sat, 29 Sep 2012 14:16:09 +0200
Subject: [PATCH] rcu: Debug nasty rcu user mode API recursion

Add some debug code to chase down the origin of the fault.

Not-Signed-off-by: Frederic Weisbecker <fweisbec@...il.com>
---
 arch/x86/mm/fault.c      |    1 +
 include/linux/rcupdate.h |    1 +
 kernel/rcutree.c         |   32 ++++++++++++++++++++++++++++++++
 kernel/rcutree.h         |    1 +
 4 files changed, 35 insertions(+)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a530b23..a5f0eb5 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1232,6 +1232,7 @@ good_area:
 dotraplinkage void __kprobes
 do_page_fault(struct pt_regs *regs, unsigned long error_code)
 {
+	rcu_check_user_recursion(regs);
 	exception_enter(regs);
 	__do_page_fault(regs, error_code);
 	exception_exit(regs);
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 7c968e4..14ba908 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -199,6 +199,7 @@ extern void rcu_user_enter_after_irq(void);
 extern void rcu_user_exit_after_irq(void);
 extern void rcu_user_hooks_switch(struct task_struct *prev,
 				  struct task_struct *next);
+extern void rcu_check_user_recursion(struct pt_regs *regs);
 #else
 static inline void rcu_user_enter(void) { }
 static inline void rcu_user_exit(void) { }
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 4fb2376..63b84f5 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -405,6 +405,20 @@ void rcu_idle_enter(void)
 EXPORT_SYMBOL_GPL(rcu_idle_enter);
 
 #ifdef CONFIG_RCU_USER_QS
+void rcu_check_user_recursion(struct pt_regs *regs)
+{
+	unsigned long flags;
+	static int printed;
+
+	local_irq_save(flags);
+	if (__this_cpu_read(rcu_dynticks.recursion) && !printed) {
+		printed = 1;
+		printk("Found recursion\n");
+		show_regs(regs);
+	}
+	local_irq_restore(flags);
+}
+
 /**
  * rcu_user_enter - inform RCU that we are resuming userspace.
  *
@@ -433,10 +447,20 @@ void rcu_user_enter(void)
 
 	local_irq_save(flags);
 	rdtp = &__get_cpu_var(rcu_dynticks);
+	if (WARN_ON_ONCE(rdtp->recursion)) {
+		local_irq_restore(flags);
+		return;
+	}
+
+	rdtp->recursion = true;
+	barrier();
+
 	if (!rdtp->ignore_user_qs && !rdtp->in_user) {
 		rdtp->in_user = true;
 		rcu_eqs_enter(true);
 	}
+	rdtp->recursion = false;
+
 	local_irq_restore(flags);
 }
 
@@ -590,10 +614,18 @@ void rcu_user_exit(void)
 
 	local_irq_save(flags);
 	rdtp = &__get_cpu_var(rcu_dynticks);
+	if (WARN_ON_ONCE(rdtp->recursion)) {
+		local_irq_restore(flags);
+		return;
+	}
+
+	rdtp->recursion = true;
+	barrier();
 	if (rdtp->in_user) {
 		rdtp->in_user = false;
 		rcu_eqs_exit(true);
 	}
+	rdtp->recursion = false;
 	local_irq_restore(flags);
 }
 
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 5faf05d..1bde9d5 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -103,6 +103,7 @@ struct rcu_dynticks {
 	int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
 #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
 #ifdef CONFIG_RCU_USER_QS
+	bool recursion;
 	bool ignore_user_qs;	    /* Treat userspace as extended QS or not */
 	bool in_user;		    /* Is the CPU in userland from RCU POV? */
 #endif
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists