linux-kernel - Re: [RCU] kernel hangs in wait_rcu

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 16 Dec 2014 23:00:20 +0530
From:	Arun KS <arunks.linux@...il.com>
To:	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Cc:	Paul McKenney <paulmck@...ux.vnet.ibm.com>, josh@...htriplett.org,
	rostedt@...dmis.org,
	Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
	laijs@...fujitsu.com
Subject: Re: [RCU] kernel hangs in wait_rcu_gp during suspend path

Hello,

Adding some more info.

Below is the rcu_data data structure corresponding to cpu4.

struct rcu_data {
  completed = 5877,
  gpnum = 5877,
  passed_quiesce = true,
  qs_pending = false,
  beenonline = true,
  preemptible = true,
  mynode = 0xc117f340 <rcu_preempt_state>,
  grpmask = 16,
  nxtlist = 0xedaaec00,
  nxttail = {0xc54366c4, 0xe84d350c, 0xe84d350c, 0xe84d350c},
  nxtcompleted = {4294967035, 5878, 5878, 5878},
  qlen_lazy = 105,
  qlen = 415,
  qlen_last_fqs_check = 0,
  n_cbs_invoked = 86323,
  n_nocbs_invoked = 0,
  n_cbs_orphaned = 0,
  n_cbs_adopted = 139,
  n_force_qs_snap = 0,
  blimit = 10,
  dynticks = 0xc5436758,
  dynticks_snap = 7582140,
  dynticks_fqs = 41,
  offline_fqs = 0,
  n_rcu_pending = 59404,
  n_rp_qs_pending = 5,
  n_rp_report_qs = 4633,
  n_rp_cb_ready = 32,
  n_rp_cpu_needs_gp = 41088,
  n_rp_gp_completed = 2844,
  n_rp_gp_started = 1150,
  n_rp_need_nothing = 9657,
  barrier_head = {
    next = 0x0,
    func = 0x0
  },
  oom_head = {
    next = 0x0,
    func = 0x0
  },
  cpu = 4,
  rsp = 0xc117f340 <rcu_preempt_state>
}



Also pasting complete rcu_preempt_state.



rcu_preempt_state = $9 = {
  node = {{
      lock = {
        raw_lock = {
          {
            slock = 3129850509,
            tickets = {
              owner = 47757,
              next = 47757
            }
          }
        },
        magic = 3735899821,
        owner_cpu = 4294967295,
        owner = 0xffffffff
      },
      gpnum = 5877,
      completed = 5877,
      qsmask = 0,
      expmask = 0,
      qsmaskinit = 240,
      grpmask = 0,
      grplo = 0,
      grphi = 7,
      grpnum = 0 '\000',
      level = 0 '\000',
      parent = 0x0,
      blkd_tasks = {
        next = 0xc117f378 <rcu_preempt_state+56>,
        prev = 0xc117f378 <rcu_preempt_state+56>
      },
      gp_tasks = 0x0,
      exp_tasks = 0x0,
      need_future_gp = {1, 0},
      fqslock = {
        raw_lock = {
          {
            slock = 0,
            tickets = {
              owner = 0,
              next = 0
            }
          }
        },
        magic = 3735899821,
        owner_cpu = 4294967295,
        owner = 0xffffffff
      }
    }},
  level = {0xc117f340 <rcu_preempt_state>},
  levelcnt = {1, 0, 0, 0, 0},
  levelspread = "\b",
  rda = 0xc115e6b0 <rcu_preempt_data>,
  call = 0xc01975ac <call_rcu>,
  fqs_state = 0 '\000',
  boost = 0 '\000',
  gpnum = 5877,
  completed = 5877,
  gp_kthread = 0xf0c9e600,
  gp_wq = {
    lock = {
      {
        rlock = {
          raw_lock = {
            {
              slock = 2160230594,
              tickets = {
                owner = 32962,
                next = 32962
              }
            }
          },
          magic = 3735899821,
          owner_cpu = 4294967295,
          owner = 0xffffffff
        }
      }
    },
    task_list = {
      next = 0xf0cd1f20,
      prev = 0xf0cd1f20
    }
  },
  gp_flags = 1,
  orphan_lock = {
    raw_lock = {
      {
        slock = 327685,
        tickets = {
          owner = 5,
          next = 5
        }
      }
    },
    magic = 3735899821,
    owner_cpu = 4294967295,
    owner = 0xffffffff
  },
  orphan_nxtlist = 0x0,
  orphan_nxttail = 0xc117f490 <rcu_preempt_state+336>,
  orphan_donelist = 0x0,
  orphan_donetail = 0xc117f498 <rcu_preempt_state+344>,
  qlen_lazy = 0,
  qlen = 0,
  onoff_mutex = {
    count = {
      counter = 1
    },
    wait_lock = {
      {
        rlock = {
          raw_lock = {
            {
              slock = 811479134,
              tickets = {
                owner = 12382,
                next = 12382
              }
            }
          },
          magic = 3735899821,
          owner_cpu = 4294967295,
          owner = 0xffffffff
        }
      }
    },
    wait_list = {
      next = 0xc117f4bc <rcu_preempt_state+380>,
      prev = 0xc117f4bc <rcu_preempt_state+380>
    },
    owner = 0x0,
    name = 0x0,
    magic = 0xc117f4a8 <rcu_preempt_state+360>
  },
  barrier_mutex = {
    count = {
      counter = 1
    },
    wait_lock = {
      {
        rlock = {
          raw_lock = {
            {
              slock = 0,
              tickets = {
                owner = 0,
                next = 0
              }
            }
          },
          magic = 3735899821,
          owner_cpu = 4294967295,
          owner = 0xffffffff
        }
      }
    },
    wait_list = {
      next = 0xc117f4e4 <rcu_preempt_state+420>,
      prev = 0xc117f4e4 <rcu_preempt_state+420>
    },
    owner = 0x0,
    name = 0x0,
    magic = 0xc117f4d0 <rcu_preempt_state+400>
  },
  barrier_cpu_count = {
    counter = 0
  },
  barrier_completion = {
    done = 0,
    wait = {
      lock = {
        {
          rlock = {
            raw_lock = {
              {
                slock = 0,
                tickets = {
                  owner = 0,
                  next = 0
                }
              }
            },
            magic = 0,
            owner_cpu = 0,
            owner = 0x0
          }
        }
      },
      task_list = {
        next = 0x0,
        prev = 0x0
      }
    }
  },
  n_barrier_done = 0,
  expedited_start = {
    counter = 0
  },
  expedited_done = {
    counter = 0
  },
  expedited_wrap = {
    counter = 0
  },
  expedited_tryfail = {
    counter = 0
  },
  expedited_workdone1 = {
    counter = 0
  },
  expedited_workdone2 = {
    counter = 0
  },
  expedited_normal = {
    counter = 0
  },
  expedited_stoppedcpus = {
    counter = 0
  },
  expedited_done_tries = {
    counter = 0
  },
  expedited_done_lost = {
    counter = 0
  },
  expedited_done_exit = {
    counter = 0
  },
  jiffies_force_qs = 4294963917,
  n_force_qs = 4028,
  n_force_qs_lh = 0,
  n_force_qs_ngp = 0,
  gp_start = 4294963911,
  jiffies_stall = 4294966011,
  gp_max = 17,
  name = 0xc0d833ab "rcu_preempt",
  abbr = 112 'p',
  flavors = {
    next = 0xc117f2ec <rcu_bh_state+556>,
    prev = 0xc117f300 <rcu_struct_flavors>
  },
  wakeup_work = {
    flags = 3,
    llnode = {
      next = 0x0
    },
    func = 0xc0195aa8 <rsp_wakeup>
  }
}

Hope this helps.

Thanks,
Arun


On Tue, Dec 16, 2014 at 11:59 AM, Arun KS <arunks.linux@...il.com> wrote:
> Hello,
>
> I dig little deeper to understand the situation.
> All other cpus are in idle thread already.
> As per my understanding, for the grace period to end, at-least one of
> the following should happen on all online cpus,
>
> 1. a context switch.
> 2. user space switch.
> 3. switch to idle thread.
>
> In this situation, since all the other cores are already in idle,  non
> of the above are meet on all online cores.
> So grace period is getting extended and never finishes. Below is the
> state of runqueue when the hang happens.
> --------------start------------------------------------
> crash> runq
> CPU 0 [OFFLINE]
>
> CPU 1 [OFFLINE]
>
> CPU 2 [OFFLINE]
>
> CPU 3 [OFFLINE]
>
> CPU 4 RUNQUEUE: c3192e40
>   CURRENT: PID: 0      TASK: f0874440  COMMAND: "swapper/4"
>   RT PRIO_ARRAY: c3192f20
>      [no tasks queued]
>   CFS RB_ROOT: c3192eb0
>      [no tasks queued]
>
> CPU 5 RUNQUEUE: c31a0e40
>   CURRENT: PID: 0      TASK: f0874980  COMMAND: "swapper/5"
>   RT PRIO_ARRAY: c31a0f20
>      [no tasks queued]
>   CFS RB_ROOT: c31a0eb0
>      [no tasks queued]
>
> CPU 6 RUNQUEUE: c31aee40
>   CURRENT: PID: 0      TASK: f0874ec0  COMMAND: "swapper/6"
>   RT PRIO_ARRAY: c31aef20
>      [no tasks queued]
>   CFS RB_ROOT: c31aeeb0
>      [no tasks queued]
>
> CPU 7 RUNQUEUE: c31bce40
>   CURRENT: PID: 0      TASK: f0875400  COMMAND: "swapper/7"
>   RT PRIO_ARRAY: c31bcf20
>      [no tasks queued]
>   CFS RB_ROOT: c31bceb0
>      [no tasks queued]
> --------------end------------------------------------
>
> If my understanding is correct the below patch should help, because it
> will expedite grace periods during suspend,
> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=d1d74d14e98a6be740a6f12456c7d9ad47be9c9c
>
> But I wonder why it was not taken to stable trees. Can we take it?
> Appreciate your help.
>
> Thanks,
> Arun
>
> On Mon, Dec 15, 2014 at 10:34 PM, Arun KS <arunks.linux@...il.com> wrote:
>> Hi,
>>
>> Here is the backtrace of the process hanging in wait_rcu_gp,
>>
>> PID: 247    TASK: e16e7380  CPU: 4   COMMAND: "kworker/u16:5"
>>  #0 [<c09fead0>] (__schedule) from [<c09fcab0>]
>>  #1 [<c09fcab0>] (schedule_timeout) from [<c09fe050>]
>>  #2 [<c09fe050>] (wait_for_common) from [<c013b2b4>]
>>  #3 [<c013b2b4>] (wait_rcu_gp) from [<c0142f50>]
>>  #4 [<c0142f50>] (atomic_notifier_chain_unregister) from [<c06b2ab8>]
>>  #5 [<c06b2ab8>] (cpufreq_interactive_disable_sched_input) from [<c06b32a8>]
>>  #6 [<c06b32a8>] (cpufreq_governor_interactive) from [<c06abbf8>]
>>  #7 [<c06abbf8>] (__cpufreq_governor) from [<c06ae474>]
>>  #8 [<c06ae474>] (__cpufreq_remove_dev_finish) from [<c06ae8c0>]
>>  #9 [<c06ae8c0>] (cpufreq_cpu_callback) from [<c0a0185c>]
>> #10 [<c0a0185c>] (notifier_call_chain) from [<c0121888>]
>> #11 [<c0121888>] (__cpu_notify) from [<c0121a04>]
>> #12 [<c0121a04>] (cpu_notify_nofail) from [<c09ee7f0>]
>> #13 [<c09ee7f0>] (_cpu_down) from [<c0121b70>]
>> #14 [<c0121b70>] (disable_nonboot_cpus) from [<c016788c>]
>> #15 [<c016788c>] (suspend_devices_and_enter) from [<c0167bcc>]
>> #16 [<c0167bcc>] (pm_suspend) from [<c0167d94>]
>> #17 [<c0167d94>] (try_to_suspend) from [<c0138460>]
>> #18 [<c0138460>] (process_one_work) from [<c0138b18>]
>> #19 [<c0138b18>] (worker_thread) from [<c013dc58>]
>> #20 [<c013dc58>] (kthread) from [<c01061b8>]
>>
>> Will this patch helps here,
>> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=d1d74d14e98a6be740a6f12456c7d9ad47be9c9c
>>
>> I couldn't really understand why it got struck in  synchronize_rcu().
>> Please give some pointers to debug this further.
>>
>> Below are the configs enable related to RCU.
>>
>> CONFIG_TREE_PREEMPT_RCU=y
>> CONFIG_PREEMPT_RCU=y
>> CONFIG_RCU_STALL_COMMON=y
>> CONFIG_RCU_FANOUT=32
>> CONFIG_RCU_FANOUT_LEAF=16
>> CONFIG_RCU_FAST_NO_HZ=y
>> CONFIG_RCU_CPU_STALL_TIMEOUT=21
>> CONFIG_RCU_CPU_STALL_VERBOSE=y
>>
>> Kernel version is 3.10.28
>> Architecture is ARM
>>
>> Thanks,
>> Arun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/