lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <4F1FCF02.9060209@numascale-asia.com>
Date:	Wed, 25 Jan 2012 17:44:34 +0800
From:	Daniel J Blueman <daniel@...ascale-asia.com>
To:	Paul E McKenney <paulmck@...ux.vnet.ibm.com>,
	Dipankar Sarma <dipankar@...ibm.com>
CC:	Steffen Persvold <sp@...ascale.com>, linux-kernel@...r.kernel.org,
	x86@...nel.org
Subject: RCU qsmask !=0 warnings on large-SMP...

During validation of an interconnect which extends the AMD Opteron 
HyperTransport to many servers (Numascale NumaConnect), we observe RCU 
warnings [1] when larger systems are under specific workloads, and 
subsequent hanging.

This is reproducable via RCU-intensive activities, eg the rcutorture 
module (let me know if interested in another reproducer 'bigmap' that 
hits the spot) on eg stock 3.1.8 and 3.2.1. Reproducing on a subset of 
cores for clarity and with further debug [2], we observe struct 
rcu_node's qsmask bitfield not being cleared on typically three tree 
nodes when expected to be [3] and we see the RCU grace periods stop 
incrementing.

Has this been encountered previously?

Many thanks,
   Daniel

--- [1]

WARNING: at kernel/rcutree_plugin.h:990 
rcu_preempt_check_blocked_tasks+0x27/0x30()
Hardware name: ProLiant DL165 G6
Modules linked in:
Pid: 21880, comm: bigmap Not tainted 3.1.8-numaconnect10+ #5
Call Trace:
<IRQ>  [<ffffffff810b9027>] ? rcu_preempt_check_blocked_tasks+0x27/0x30
  [<ffffffff8106e03b>] warn_slowpath_common+0x8b/0xc0
  [<ffffffff810c5901>] ? perf_event_task_tick+0x71/0xa0
  [<ffffffff8106e085>] warn_slowpath_null+0x15/0x20
  [<ffffffff810b9027>] rcu_preempt_check_blocked_tasks+0x27/0x30
  [<ffffffff810b9193>] rcu_start_gp+0x163/0x200
  [<ffffffff810ba0eb>] __rcu_process_callbacks+0x8b/0xd0
  [<ffffffff810ba150>] rcu_process_callbacks+0x20/0x50
  [<ffffffff8107422d>] __do_softirq+0x9d/0x140
  [<ffffffff815be92c>] call_softirq+0x1c/0x30
  [<ffffffff8103478a>] do_softirq+0x4a/0x80
  [<ffffffff81074605>] irq_exit+0xa5/0xb0
  [<ffffffff8104a005>] smp_apic_timer_interrupt+0x45/0x60
  [<ffffffff815bd44b>] apic_timer_interrupt+0x6b/0x70
<EOI>  [<ffffffff810b97e0>] ? call_rcu_sched+0x10/0x20
  [<ffffffff8113ce60>] ? fsnotify+0x70/0x2d0
  [<ffffffff81121b43>] ? iput_final+0xe3/0x150
  [<ffffffff81102b45>] ? kmem_cache_free+0x15/0xa0
  [<ffffffff81125d2a>] ? vfsmount_lock_global_lock_online+0x4a/0xc0
  [<ffffffff8110b9ed>] __fput+0x17d/0x1f0
  [<ffffffff8110bc07>] fput+0x17/0x20
  [<ffffffff810ed856>] remove_vma+0x36/0x70
  [<ffffffff810ed97e>] exit_mmap+0xee/0x120
  [<ffffffff8106c414>] mmput+0x44/0xf0
  [<ffffffff81070df9>] exit_mm+0x109/0x140
  [<ffffffff810724d6>] do_exit+0x1b6/0x3d0
  [<ffffffff8107bc8b>] ? __dequeue_signal+0x5b/0xa0
  [<ffffffff8107272c>] do_group_exit+0x3c/0xa0
  [<ffffffff8107e970>] get_signal_to_deliver+0x1a0/0x320
  [<ffffffff8103273c>] do_signal+0x4c/0x120
  [<ffffffff8107d05d>] ? kill_something_info+0x3d/0x130
  [<ffffffff8107dc1b>] ? sys_kill+0x7b/0x90
  [<ffffffff8103287d>] do_notify_resume+0x6d/0x80
  [<ffffffff815bcc5b>] int_signal+0x12/0x17
---[ end trace 826a49848cffd17f ]---

--- [2]

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 6b76d81..7875427 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -828,6 +828,39 @@ rcu_start_gp_per_cpu(struct rcu_state *rsp, struct 
rcu_node *rnp, struct rcu_dat
         __note_new_gpnum(rsp, rnp, rdp);
  }

+static void
+_debug_print(struct rcu_data *rdp)
+{
+       printk(KERN_ERR "%3d %p c=%lu g=%lu pq=%d pgp=%lu qp=%d of=%lu 
ri=%lu ql=%ld qs=%c%c%c%c b=%ld ci=%lu co=%lu ca=%lu\n",
+              rdp->cpu, rdp,
+              rdp->completed, rdp->gpnum,
+              rdp->passed_quiesce, rdp->passed_quiesce_gpnum,
+              rdp->qs_pending,
+              rdp->offline_fqs, rdp->resched_ipi,
+              rdp->qlen,
+              ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
+                   rdp->nxttail[RCU_NEXT_TAIL]],
+              ".R"[rdp->nxttail[RCU_WAIT_TAIL] !=
+                   rdp->nxttail[RCU_NEXT_READY_TAIL]],
+              ".W"[rdp->nxttail[RCU_DONE_TAIL] !=
+                   rdp->nxttail[RCU_WAIT_TAIL]],
+              ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]],
+              rdp->blimit, rdp->n_cbs_invoked, rdp->n_cbs_orphaned, 
rdp->n_cbs_adopted);
+}
+
+static void
+rcu_debug_print(struct rcu_state *rsp, struct rcu_node *rnp)
+{
+       int cpu;
+
+       if (rnp->qsmask) {
+               printk(KERN_ERR "CPU %d, treason uncloaked, rsp @ %p 
(%s), rsp->gpnum is %lu, rnp @ %p, qsmask is 0x%lx\n",
+                      smp_processor_id(), rsp, rsp->name, rsp->gpnum, 
rnp, rnp->qsmask);
+               for_each_online_cpu(cpu)
+                       _debug_print(per_cpu_ptr(rsp->rda, cpu));
+       }
+}
+
  /*
   * Start a new RCU grace period if warranted, re-initializing the 
hierarchy
   * in preparation for detecting the next grace period.  The caller 
must hold
@@ -873,6 +906,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)

         /* Special-case the common single-level case. */
         if (NUM_RCU_NODES == 1) {
+               rcu_debug_print(rsp, rnp);
                 rcu_preempt_check_blocked_tasks(rnp);
                 rnp->qsmask = rnp->qsmaskinit;
                 rnp->gpnum = rsp->gpnum;
@@ -912,6 +946,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
          */
         rcu_for_each_node_breadth_first(rsp, rnp) {
                 raw_spin_lock(&rnp->lock);      /* irqs already 
disabled. */
+               rcu_debug_print(rsp, rnp);
                 rcu_preempt_check_blocked_tasks(rnp);
                 rnp->qsmask = rnp->qsmaskinit;
                 rnp->gpnum = rsp->gpnum;

--- [3]

CPU 48, treason uncloaked, rsp @ ffffffff81a1d000 (rcu_sched), 
rsp->gpnum is 4058, rsp->completed is 4057, rnp @ ffffffff81a1d000, 
qsmask is 0x1
   0 ffff8803f840d4e0 c=4057 g=4058 pq=1 pgp=4058 qp=0 of=0 ri=279 ql=1 
qs=..W. b=10 ci=145949 co=0 ca=0
  12 ffff880bd040d4e0 c=4057 g=4058 pq=1 pgp=4058 qp=0 of=7 ri=10 ql=2 
qs=.R.. b=10 ci=561 co=0 ca=0
  24 ffff8813d040d4e0 c=4057 g=4058 pq=1 pgp=4058 qp=0 of=6 ri=14 ql=0 
qs=.... b=10 ci=406 co=0 ca=0
  36 ffff881bd040d4e0 c=4057 g=4058 pq=1 pgp=4058 qp=0 of=5 ri=22 ql=0 
qs=.... b=10 ci=215 co=0 ca=0
  48 ffff8823d040d4e0 c=4057 g=4057 pq=1 pgp=4057 qp=0 of=6 ri=14 ql=26 
qs=.RWD b=10 ci=7345 co=0 ca=0
  60 ffff882bd040d4e0 c=4057 g=4058 pq=1 pgp=4058 qp=0 of=6 ri=16 ql=0 
qs=.... b=10 ci=50 co=0 ca=0
  72 ffff8833d040d4e0 c=4057 g=4058 pq=1 pgp=4058 qp=0 of=6 ri=26 ql=0 
qs=.... b=10 ci=22 co=0 ca=0
------------[ cut here ]------------
WARNING: at kernel/rcutree_plugin.h:1011 
rcu_preempt_check_blocked_tasks+0x27/0x30()
Hardware name: H8QI6
Modules linked in: rcutorture
Pid: 4562, comm: rcu_torture_rea Not tainted 3.2.1-numaconnect10+ #59
Call Trace:
<IRQ>  [<ffffffff810bb8e7>] ? rcu_preempt_check_blocked_tasks+0x27/0x30
  [<ffffffff8106f74b>] warn_slowpath_common+0x8b/0xc0
  [<ffffffff8106f795>] warn_slowpath_null+0x15/0x20
  [<ffffffff810bb8e7>] rcu_preempt_check_blocked_tasks+0x27/0x30
  [<ffffffff810bb9f2>] rcu_start_gp+0x102/0x1b0
  [<ffffffff810bc60b>] __rcu_process_callbacks+0x8b/0xd0
  [<ffffffff810bce70>] rcu_process_callbacks+0x20/0x40
  [<ffffffff81075b3d>] __do_softirq+0x9d/0x140
  [<ffffffffa0001530>] ? rcu_torture_shuffle+0x80/0x80 [rcutorture]
  [<ffffffff815da36c>] call_softirq+0x1c/0x30
  [<ffffffff8103451a>] do_softirq+0x4a/0x80
  [<ffffffff81075eb3>] irq_exit+0x43/0x60
  [<ffffffff8104af95>] smp_apic_timer_interrupt+0x45/0x60
  [<ffffffff815d8e8b>] apic_timer_interrupt+0x6b/0x70
<EOI>  [<ffffffff815d58bf>] ? __schedule+0x34f/0x710
  [<ffffffff810618a5>] ? update_curr+0x85/0xd0
  [<ffffffff81090735>] ? sched_clock_local+0x15/0x80
  [<ffffffff81090865>] ? sched_clock_cpu+0x65/0x90
  [<ffffffffa0001530>] ? rcu_torture_shuffle+0x80/0x80 [rcutorture]
  [<ffffffff815d5f5a>] schedule+0x3a/0x60
  [<ffffffffa0001660>] rcu_torture_reader+0x130/0x230 [rcutorture]
  [<ffffffffa0001de0>] ? rcu_torture_writer+0x160/0x160 [rcutorture]
  [<ffffffffa0001530>] ? rcu_torture_shuffle+0x80/0x80 [rcutorture]
  [<ffffffff8108aa26>] kthread+0x96/0xa0
  [<ffffffff815da274>] kernel_thread_helper+0x4/0x10
  [<ffffffff8108a990>] ? kthread_stop+0x70/0x70
  [<ffffffff815da270>] ? gs_change+0xb/0xb
---[ end trace 19f7bd7f233f1506 ]---
CPU 48, treason uncloaked, rsp @ ffffffff81a1d000 (rcu_sched), 
rsp->gpnum is 4058, rsp->completed is 4057, rnp @ ffffffff81a1d100, 
qsmask is 0x8
   0 ffff8803f840d4e0 c=4057 g=4058 pq=1 pgp=4058 qp=0 of=0 ri=279 ql=1 
qs=..W. b=10 ci=145949 co=0 ca=0
  12 ffff880bd040d4e0 c=4057 g=4058 pq=1 pgp=4058 qp=0 of=7 ri=10 ql=3 
qs=NR.. b=10 ci=561 co=0 ca=0
  24 ffff8813d040d4e0 c=4057 g=4058 pq=1 pgp=4058 qp=0 of=6 ri=14 ql=0 
qs=.... b=10 ci=406 co=0 ca=0
  36 ffff881bd040d4e0 c=4057 g=4058 pq=1 pgp=4058 qp=0 of=5 ri=22 ql=0 
qs=.... b=10 ci=215 co=0 ca=0
  48 ffff8823d040d4e0 c=4057 g=4057 pq=1 pgp=4057 qp=0 of=6 ri=19 ql=26 
qs=.RWD b=10 ci=7345 co=0 ca=0
  60 ffff882bd040d4e0 c=4057 g=4058 pq=1 pgp=4058 qp=0 of=6 ri=16 ql=0 
qs=.... b=10 ci=50 co=0 ca=0
  72 ffff8833d040d4e0 c=4057 g=4058 pq=1 pgp=4058 qp=0 of=6 ri=26 ql=0 
qs=.... b=10 ci=22 co=0 ca=0
CPU 48, treason uncloaked, rsp @ ffffffff81a1d000 (rcu_sched), 
rsp->gpnum is 4058, rsp->completed is 4057, rnp @ ffffffff81a1d800, 
qsmask is 0x1
   0 ffff8803f840d4e0 c=4057 g=4058 pq=1 pgp=4058 qp=0 of=0 ri=280 ql=1 
qs=..W. b=10 ci=145949 co=0 ca=0
  12 ffff880bd040d4e0 c=4057 g=4058 pq=1 pgp=4058 qp=0 of=7 ri=11 ql=3 
qs=NR.. b=10 ci=561 co=0 ca=0
  24 ffff8813d040d4e0 c=4057 g=4058 pq=1 pgp=4058 qp=0 of=6 ri=15 ql=0 
qs=.... b=10 ci=406 co=0 ca=0
  36 ffff881bd040d4e0 c=4057 g=4058 pq=1 pgp=4058 qp=0 of=5 ri=23 ql=0 
qs=.... b=10 ci=215 co=0 ca=0
  48 ffff8823d040d4e0 c=4057 g=4057 pq=1 pgp=4057 qp=0 of=6 ri=21 ql=26 
qs=.RWD b=10 ci=7345 co=0 ca=0
  60 ffff882bd040d4e0 c=4057 g=4058 pq=1 pgp=4058 qp=0 of=6 ri=16 ql=0 
qs=.... b=10 ci=50 co=0 ca=0
  72 ffff8833d040d4e0 c=4057 g=4058 pq=1 pgp=4058 qp=0 of=6 ri=26 ql=0 
qs=.... b=10 ci=22 co=0 ca=0

-- 
Daniel J Blueman
Principal Software Engineer, Numascale Asia

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ