[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <PH0PR11MB5880D8A33295BFDC6AC557A8DAF29@PH0PR11MB5880.namprd11.prod.outlook.com>
Date: Tue, 19 Apr 2022 03:21:53 +0000
From: "Zhang, Qiang1" <qiang1.zhang@...el.com>
To: "Zhang, Qiang1" <qiang1.zhang@...el.com>,
"paulmck@...nel.org" <paulmck@...nel.org>
CC: "frederic@...nel.org" <frederic@...nel.org>,
"rcu@...r.kernel.org" <rcu@...r.kernel.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: RE: [PATCH] rcu: Dump all rcuc kthreads status for CPUs that not
report quiescent state
On Wed, Apr 13, 2022 at 03:44:11PM +0800, Zqiang wrote:
> If the rcutree.use_softirq is configured, when RCU Stall event
> happened, dump status of all rcuc kthreads who due to starvation
> prevented grace period ends on CPUs that not report quiescent state.
>
> Signed-off-by: Zqiang <qiang1.zhang@...el.com>
>This could be a good improvement! But a few questions and comments below.
>
> Thanx, Paul
> ---
> kernel/rcu/tree_stall.h | 8 +++++---
> 1 file changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index
> d7956c03edbd..e6ecc32cfe23 100644
> --- a/kernel/rcu/tree_stall.h
> +++ b/kernel/rcu/tree_stall.h
> @@ -348,6 +348,7 @@ static int rcu_print_task_stall(struct rcu_node
> *rnp, unsigned long flags) } #endif /* #else #ifdef
> CONFIG_PREEMPT_RCU */
>
> +static void rcuc_kthread_dump(struct rcu_data *rdp);
> /*
> * Dump stacks of all tasks running on stalled CPUs. First try using
> * NMIs, but fall back to manual remote stack tracing on
> architectures @@ -368,6 +369,7 @@ static void rcu_dump_cpu_stacks(void)
> pr_err("Offline CPU %d blocking current GP.\n", cpu);
> else if (!trigger_single_cpu_backtrace(cpu))
> dump_cpu_task(cpu);
> + rcuc_kthread_dump(per_cpu_ptr(&rcu_data, cpu));
>Was this addition inspired by the chasing of a bug? If so, please let me know exactly what information was (or would have been) the most helpful.
>
>For example, the starvation information might be more compactly represented in the information printed by print_cpu_stall_info().
>Unless the stack trace was quite valuable, it might be best to omit it.
>After all, RCU CPU stall warnings are already infamously heavy on the text output.
>Is it better to modify it like this?
I'm so sorry, the following modifications are problematic, I'll resend v2
Thanks
Zqiang
>diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index d7956c03edbd..37444ff00787 100644
>--- a/kernel/rcu/tree_stall.h
>+++ b/kernel/rcu/tree_stall.h
>@@ -465,27 +465,38 @@ static void print_cpu_stall_info(int cpu)
> falsepositive ? " (false positive?)" : ""); }
>
>-static void rcuc_kthread_dump(struct rcu_data *rdp)
>+static void rcuc_kthread_dump(void)
> {
> int cpu;
> unsigned long j;
>+ unsigned long flags
>+ struct rcu_node *rnp;
> struct task_struct *rcuc;
>+ struct rcu_data *rdp;
>
>- rcuc = rdp->rcu_cpu_kthread_task;
>- if (!rcuc)
>- return;
>+ rcu_for_each_leaf_node(rnp) {
>+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
>+ for_each_leaf_node_possible_cpu(rnp, cpu)
>+ if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
>+ rdp = per_cpu_ptr(&rcu_data, cpu);
>+ rcuc = rdp->rcu_cpu_kthread_task;
>+ if (!rcuc)
>+ return;
>
>- cpu = task_cpu(rcuc);
>- if (cpu_is_offline(cpu) || idle_cpu(cpu))
>- return;
>+ cpu = task_cpu(rcuc);
>+ if (cpu_is_offline(cpu) || idle_cpu(cpu))
>+ return;
>
>- if (!rcu_is_rcuc_kthread_starving(rdp, &j))
>- return;
>+ if (!rcu_is_rcuc_kthread_starving(rdp, &j))
>+ return;
>
>- pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j);
>- sched_show_task(rcuc);
>- if (!trigger_single_cpu_backtrace(cpu))
>- dump_cpu_task(cpu);
>+ pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j);
>+ sched_show_task(rcuc);
>+ if (!trigger_single_cpu_backtrace(cpu))
>+ dump_cpu_task(cpu);
>+ }
>+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
>+ }
> }
>
>/* Complain about starvation of grace-period kthread. */ @@ -595,6 +606,9 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
> smp_processor_id(), (long)(jiffies - gps),
> (long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus);
> if (ndetected) {
>+ if (!use_softirq)
>+ rcuc_kthread_dump();
>+
> rcu_dump_cpu_stacks();
>
> /* Complain about tasks blocking the grace period. */ @@ -660,7 +674,7 @@ static void print_cpu_stall(unsigned long gps)
> rcu_check_gp_kthread_starvation();
>
> if (!use_softirq)
>- rcuc_kthread_dump(rdp);
>+ rcuc_kthread_dump();
>
> rcu_dump_cpu_stacks();
>
>Thanks
>Zqiang
> }
> raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
> }
> @@ -471,6 +473,9 @@ static void rcuc_kthread_dump(struct rcu_data *rdp)
> unsigned long j;
> struct task_struct *rcuc;
>
> + if (use_softirq)
> + return;
> +
> rcuc = rdp->rcu_cpu_kthread_task;
> if (!rcuc)
>The checks for use_softirq on the one hand and the checks for non-NULL
>rdp->rcu_cpu_kthread_task are a bit "interesting". Not your fault or
>problem, of course!
>Yes the ' if (!use_softirq)' is redundant, the if rcu_cpu_kthread_task Is completely enough
>
> return;
> @@ -659,9 +664,6 @@ static void print_cpu_stall(unsigned long gps)
> rcu_check_gp_kthread_expired_fqs_timer();
> rcu_check_gp_kthread_starvation();
>
> - if (!use_softirq)
>
>In particular, I am wondering if this "if" is redundant.
>Yes the ' if (!use_softirq)' is also redundant
>
> - rcuc_kthread_dump(rdp);
> -
> rcu_dump_cpu_stacks();
>
> raw_spin_lock_irqsave_rcu_node(rnp, flags);
> --
> 2.25.1
>
Powered by blists - more mailing lists