lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:   Wed, 26 Jan 2022 02:34:31 +0000
From:   "Zhang, Qiang1" <qiang1.zhang@...el.com>
To:     "paulmck@...nel.org" <paulmck@...nel.org>,
        Ammar Faizi <ammarfaizi2@...weeb.org>
CC:     "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: RE: [PATCH v3] rcu: Add per-CPU rcuc task info to RCU CPU stall
 warnings


On Tue, Jan 25, 2022 at 02:04:04PM +0700, Ammar Faizi wrote:
> On 1/25/22 9:47 AM, Zqiang wrote:
> > When the 'use_softirq' be set zero, all RCU_SOFTIRQ processing be 
> > moved to per-CPU rcuc kthreads, if the rcuc kthreads is being 
> > starved, quiescent state can not report in time. the RCU stall may 
> > be triggered. this commit adds a stack trace of this CPU and dump 
> > rcuc kthreads stack to help analyze what prevents rcuc kthreads from 
> > running.
> > 
> > Suggested-by: Ammar Faizi <ammarfaizi2@...weeb.org>
> > Signed-off-by: Zqiang <qiang1.zhang@...el.com>
> > ---
> 
> For 
> https://lore.kernel.org/lkml/20220125024744.4186726-1-qiang1.zhang@int
> el.com/T/
> 
> ```
> 
> Reviewed-by: Ammar Faizi <ammarfaizi2@...weeb.org>
> 
> ```

>I queued this wordsmithed as shown below.  Please check to see if I messed anything up.  And thank you both!
>
>							Thanx, Paul
>

Thanks Paul,  this description is clearer.


>------------------------------------------------------------------------
>
>commit 72bdc0a6f8cfb7cc56238dc850b7fe236e7371cb
>Author: Zqiang <qiang1.zhang@...el.com>
>Date:   Tue Jan 25 10:47:44 2022 +0800
>
>    rcu: Add per-CPU rcuc task dumps to RCU CPU stall warnings
>    
>    When the rcutree.use_softirq kernel boot parameter is set to zero, all
>    RCU_SOFTIRQ processing is carried out by the per-CPU rcuc kthreads.
>    If these kthreads are being starved, quiescent states will not be
>    reported, which in turn means that the grace period will not end, which
>    can in turn trigger RCU CPU stall warnings.  This commit therefore dumps
>    stack traces of stalled CPUs' rcuc kthreads, which can help identify
>    what is preventing those kthreads from running.
>    
>    Suggested-by: Ammar Faizi <ammarfaizi2@...weeb.org>
>    Reviewed-by: Ammar Faizi <ammarfaizi2@...weeb.org>
>    Signed-off-by: Zqiang <qiang1.zhang@...el.com>
>    Signed-off-by: Paul E. McKenney <paulmck@...nel.org>

>diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index f071c49b37118..e6ad532cffe78 100644
>--- a/kernel/rcu/tree.c
>+++ b/kernel/rcu/tree.c
>@@ -2859,10 +2859,12 @@ static void rcu_cpu_kthread(unsigned int cpu)  {
> 	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
> 	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
>+	unsigned long *j = this_cpu_ptr(&rcu_data.rcuc_activity);
> 	int spincnt;
> 
> 	trace_rcu_utilization(TPS("Start CPU kthread@..._run"));
> 	for (spincnt = 0; spincnt < 10; spincnt++) {
>+		WRITE_ONCE(*j, jiffies);
> 		local_bh_disable();
> 		*statusp = RCU_KTHREAD_RUNNING;
> 		local_irq_disable();
>@@ -2883,6 +2885,7 @@ static void rcu_cpu_kthread(unsigned int cpu)
> 	schedule_timeout_idle(2);
> 	trace_rcu_utilization(TPS("End CPU kthread@..._yield"));
>	*statusp = RCU_KTHREAD_WAITING;
>+	WRITE_ONCE(*j, jiffies);
> }
> 
> static struct smp_hotplug_thread rcu_cpu_thread_spec = { diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index a87f31b1701db..926673ebe355f 100644
>--- a/kernel/rcu/tree.h
>+++ b/kernel/rcu/tree.h
>@@ -241,6 +241,7 @@ struct rcu_data {
> 					/* rcuc per-CPU kthread or NULL. */
> 	unsigned int rcu_cpu_kthread_status;
> 	char rcu_cpu_has_work;
>+	unsigned long rcuc_activity;
> 
> 	/* 7) Diagnostic data, including RCU CPU stall warnings. */
> 	unsigned int softirq_snap;	/* Snapshot of softirq activity. */
>diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 4781aa338c7c1..6082dd23408f8 100644
>--- a/kernel/rcu/tree_plugin.h
>+++ b/kernel/rcu/tree_plugin.h
>@@ -994,12 +994,15 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
>  */
> static void rcu_cpu_kthread_setup(unsigned int cpu)  {
>+	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
> #ifdef CONFIG_RCU_BOOST
> 	struct sched_param sp;
> 
> 	sp.sched_priority = kthread_prio;
> 	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);  #endif /* #ifdef CONFIG_RCU_BOOST */
>+
>+	WRITE_ONCE(rdp->rcuc_activity, jiffies);
> }
 >
> #ifdef CONFIG_RCU_BOOST
>diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 21bebf7c9030b..0c5d8516516af 100644
>--- a/kernel/rcu/tree_stall.h
>+++ b/kernel/rcu/tree_stall.h
>@@ -379,6 +379,15 @@ static bool rcu_is_gp_kthread_starving(unsigned long *jp)
> 	return j > 2 * HZ;
> }
> 
>+static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned 
>+long *jp) {
>+	unsigned long j = jiffies - READ_ONCE(rdp->rcuc_activity);
>+
>+	if (jp)
>+		*jp = j;
>+	return j > 2 * HZ;
>+}
>+
> /*
>  * Print out diagnostic information for the specified stalled CPU.
>  *
>@@ -430,6 +439,29 @@ static void print_cpu_stall_info(int cpu)
> 	       falsepositive ? " (false positive?)" : "");  }
> 
>+static void rcuc_kthread_dump(struct rcu_data *rdp) {
>+	int cpu;
>+	unsigned long j;
>+	struct task_struct *rcuc;
>+
>+	rcuc = rdp->rcu_cpu_kthread_task;
>+	if (!rcuc)
>+		return;
>+
>+	cpu = task_cpu(rcuc);
>+	if (cpu_is_offline(cpu) || idle_cpu(cpu))
>+		return;
>+
>+	if (!rcu_is_rcuc_kthread_starving(rdp, &j))
>+		return;
>+
>+	pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j);
>+	sched_show_task(rcuc);
>+	if (!trigger_single_cpu_backtrace(cpu))
>+		dump_cpu_task(cpu);
>+}
>+
> /* Complain about starvation of grace-period kthread.  */  static void rcu_check_gp_kthread_starvation(void)
> {
>@@ -601,6 +633,9 @@ static void print_cpu_stall(unsigned long gps)
>	rcu_check_gp_kthread_expired_fqs_timer();
> 	rcu_check_gp_kthread_starvation();
> 
>+	if (!use_softirq)
>+		rcuc_kthread_dump(rdp);
>+
> 	rcu_dump_cpu_stacks();
> 
> 	raw_spin_lock_irqsave_rcu_node(rnp, flags);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ