lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <PH0PR11MB5880C3206B4BB782188844BDDAF19@PH0PR11MB5880.namprd11.prod.outlook.com>
Date:   Sat, 16 Apr 2022 03:08:47 +0000
From:   "Zhang, Qiang1" <qiang1.zhang@...el.com>
To:     "paulmck@...nel.org" <paulmck@...nel.org>
CC:     "frederic@...nel.org" <frederic@...nel.org>,
        "rcu@...r.kernel.org" <rcu@...r.kernel.org>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: RE: [PATCH] rcu: Dump all rcuc kthreads status for CPUs that not
 report quiescent state

On Wed, Apr 13, 2022 at 03:44:11PM +0800, Zqiang wrote:
> If the rcutree.use_softirq is configured, when RCU Stall event 
> happened, dump status of all rcuc kthreads who due to starvation 
> prevented grace period ends on CPUs that not report quiescent state.
> 
> Signed-off-by: Zqiang <qiang1.zhang@...el.com>

>This could be a good improvement!  But a few questions and comments below.
>
>							Thanx, Paul

> ---
>  kernel/rcu/tree_stall.h | 8 +++++---
>  1 file changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 
> d7956c03edbd..e6ecc32cfe23 100644
> --- a/kernel/rcu/tree_stall.h
> +++ b/kernel/rcu/tree_stall.h
> @@ -348,6 +348,7 @@ static int rcu_print_task_stall(struct rcu_node 
> *rnp, unsigned long flags)  }  #endif /* #else #ifdef 
> CONFIG_PREEMPT_RCU */
>  
> +static void rcuc_kthread_dump(struct rcu_data *rdp);
>  /*
>   * Dump stacks of all tasks running on stalled CPUs.  First try using
>   * NMIs, but fall back to manual remote stack tracing on 
> architectures @@ -368,6 +369,7 @@ static void rcu_dump_cpu_stacks(void)
>  					pr_err("Offline CPU %d blocking current GP.\n", cpu);
>  				else if (!trigger_single_cpu_backtrace(cpu))
>  					dump_cpu_task(cpu);
> +				rcuc_kthread_dump(per_cpu_ptr(&rcu_data, cpu));

>Was this addition inspired by the chasing of a bug?  If so, please let me know exactly what information was (or would have been) the most helpful.
>
>For example, the starvation information might be more compactly represented in the information printed by print_cpu_stall_info().
>Unless the stack trace was quite valuable, it might be best to omit it.
>After all, RCU CPU stall warnings are already infamously heavy on the text output.

Is it better to modify it like this?


diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index d7956c03edbd..37444ff00787 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -465,27 +465,38 @@ static void print_cpu_stall_info(int cpu)
               falsepositive ? " (false positive?)" : "");
 }

-static void rcuc_kthread_dump(struct rcu_data *rdp)
+static void rcuc_kthread_dump(void)
 {
        int cpu;
        unsigned long j;
+       unsigned long flags
+       struct rcu_node *rnp;
        struct task_struct *rcuc;
+       struct rcu_data *rdp;

-       rcuc = rdp->rcu_cpu_kthread_task;
-       if (!rcuc)
-               return;
+       rcu_for_each_leaf_node(rnp) {
+               raw_spin_lock_irqsave_rcu_node(rnp, flags);
+               for_each_leaf_node_possible_cpu(rnp, cpu)
+                       if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
+                               rdp = per_cpu_ptr(&rcu_data, cpu);
+                               rcuc = rdp->rcu_cpu_kthread_task;
+                               if (!rcuc)
+                                       return;

-       cpu = task_cpu(rcuc);
-       if (cpu_is_offline(cpu) || idle_cpu(cpu))
-               return;
+                               cpu = task_cpu(rcuc);
+                               if (cpu_is_offline(cpu) || idle_cpu(cpu))
+                                       return;

-       if (!rcu_is_rcuc_kthread_starving(rdp, &j))
-               return;
+                               if (!rcu_is_rcuc_kthread_starving(rdp, &j))
+                                       return;

-       pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j);
-       sched_show_task(rcuc);
-       if (!trigger_single_cpu_backtrace(cpu))
-               dump_cpu_task(cpu);
+                               pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j);
+                               sched_show_task(rcuc);
+                               if (!trigger_single_cpu_backtrace(cpu))
+                                       dump_cpu_task(cpu);
+                       }
+               raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+       }
 }

/* Complain about starvation of grace-period kthread.  */
@@ -595,6 +606,9 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
               smp_processor_id(), (long)(jiffies - gps),
               (long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus);
        if (ndetected) {
+               if (!use_softirq)
+                       rcuc_kthread_dump();
+
                rcu_dump_cpu_stacks();

                /* Complain about tasks blocking the grace period. */
@@ -660,7 +674,7 @@ static void print_cpu_stall(unsigned long gps)
        rcu_check_gp_kthread_starvation();

        if (!use_softirq)
-               rcuc_kthread_dump(rdp);
+               rcuc_kthread_dump();

        rcu_dump_cpu_stacks();

Thanks
Zqiang


>  			}
>  		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
>  	}
> @@ -471,6 +473,9 @@ static void rcuc_kthread_dump(struct rcu_data *rdp)
>  	unsigned long j;
>  	struct task_struct *rcuc;
>  
> +	if (use_softirq)
> +		return;
> +
>  	rcuc = rdp->rcu_cpu_kthread_task;
>  	if (!rcuc)

>The checks for use_softirq on the one hand and the checks for non-NULL
>rdp->rcu_cpu_kthread_task are a bit "interesting".  Not your fault or
>problem, of course!

Yes the ' if (!use_softirq)' is redundant,  the if rcu_cpu_kthread_task  Is completely enough

>
>  		return;
> @@ -659,9 +664,6 @@ static void print_cpu_stall(unsigned long gps)
>  	rcu_check_gp_kthread_expired_fqs_timer();
>  	rcu_check_gp_kthread_starvation();
>  
> -	if (!use_softirq)
>
>In particular, I am wondering if this "if" is redundant.

Yes the ' if (!use_softirq)' is also redundant

>
> -		rcuc_kthread_dump(rdp);
> -
>  	rcu_dump_cpu_stacks();
>  
>  	raw_spin_lock_irqsave_rcu_node(rnp, flags);
> --
> 2.25.1
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ