lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <PH0PR11MB5880D8A33295BFDC6AC557A8DAF29@PH0PR11MB5880.namprd11.prod.outlook.com>
Date:   Tue, 19 Apr 2022 03:21:53 +0000
From:   "Zhang, Qiang1" <qiang1.zhang@...el.com>
To:     "Zhang, Qiang1" <qiang1.zhang@...el.com>,
        "paulmck@...nel.org" <paulmck@...nel.org>
CC:     "frederic@...nel.org" <frederic@...nel.org>,
        "rcu@...r.kernel.org" <rcu@...r.kernel.org>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: RE: [PATCH] rcu: Dump all rcuc kthreads status for CPUs that not
 report quiescent state

On Wed, Apr 13, 2022 at 03:44:11PM +0800, Zqiang wrote:
> If the rcutree.use_softirq is configured, when RCU Stall event 
> happened, dump status of all rcuc kthreads who due to starvation 
> prevented grace period ends on CPUs that not report quiescent state.
> 
> Signed-off-by: Zqiang <qiang1.zhang@...el.com>

>This could be a good improvement!  But a few questions and comments below.
>
>							Thanx, Paul

> ---
>  kernel/rcu/tree_stall.h | 8 +++++---
>  1 file changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index
> d7956c03edbd..e6ecc32cfe23 100644
> --- a/kernel/rcu/tree_stall.h
> +++ b/kernel/rcu/tree_stall.h
> @@ -348,6 +348,7 @@ static int rcu_print_task_stall(struct rcu_node 
> *rnp, unsigned long flags)  }  #endif /* #else #ifdef 
> CONFIG_PREEMPT_RCU */
>  
> +static void rcuc_kthread_dump(struct rcu_data *rdp);
>  /*
>   * Dump stacks of all tasks running on stalled CPUs.  First try using
>   * NMIs, but fall back to manual remote stack tracing on 
> architectures @@ -368,6 +369,7 @@ static void rcu_dump_cpu_stacks(void)
>  					pr_err("Offline CPU %d blocking current GP.\n", cpu);
>  				else if (!trigger_single_cpu_backtrace(cpu))
>  					dump_cpu_task(cpu);
> +				rcuc_kthread_dump(per_cpu_ptr(&rcu_data, cpu));

>Was this addition inspired by the chasing of a bug?  If so, please let me know exactly what information was (or would have been) the most helpful.
>
>For example, the starvation information might be more compactly represented in the information printed by print_cpu_stall_info().
>Unless the stack trace was quite valuable, it might be best to omit it.
>After all, RCU CPU stall warnings are already infamously heavy on the text output.

>Is it better to modify it like this?

I'm so sorry, the following modifications are problematic, I'll resend v2

Thanks
Zqiang

>diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index d7956c03edbd..37444ff00787 100644
>--- a/kernel/rcu/tree_stall.h
>+++ b/kernel/rcu/tree_stall.h
>@@ -465,27 +465,38 @@ static void print_cpu_stall_info(int cpu)
>               falsepositive ? " (false positive?)" : "");  }
>
>-static void rcuc_kthread_dump(struct rcu_data *rdp)
>+static void rcuc_kthread_dump(void)
> {
>        int cpu;
>        unsigned long j;
>+       unsigned long flags
>+       struct rcu_node *rnp;
>        struct task_struct *rcuc;
>+       struct rcu_data *rdp;
>
>-       rcuc = rdp->rcu_cpu_kthread_task;
>-       if (!rcuc)
>-               return;
>+       rcu_for_each_leaf_node(rnp) {
>+               raw_spin_lock_irqsave_rcu_node(rnp, flags);
>+               for_each_leaf_node_possible_cpu(rnp, cpu)
>+                       if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
>+                               rdp = per_cpu_ptr(&rcu_data, cpu);
>+                               rcuc = rdp->rcu_cpu_kthread_task;
>+                               if (!rcuc)
>+                                       return;
>
>-       cpu = task_cpu(rcuc);
>-       if (cpu_is_offline(cpu) || idle_cpu(cpu))
>-               return;
>+                               cpu = task_cpu(rcuc);
>+                               if (cpu_is_offline(cpu) || idle_cpu(cpu))
>+                                       return;
>
>-       if (!rcu_is_rcuc_kthread_starving(rdp, &j))
>-               return;
>+                               if (!rcu_is_rcuc_kthread_starving(rdp, &j))
>+                                       return;
>
>-       pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j);
>-       sched_show_task(rcuc);
>-       if (!trigger_single_cpu_backtrace(cpu))
>-               dump_cpu_task(cpu);
>+                               pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j);
>+                               sched_show_task(rcuc);
>+                               if (!trigger_single_cpu_backtrace(cpu))
>+                                       dump_cpu_task(cpu);
>+                       }
>+               raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
>+       }
> }
>
>/* Complain about starvation of grace-period kthread.  */ @@ -595,6 +606,9 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
 >              smp_processor_id(), (long)(jiffies - gps),
>               (long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus);
>        if (ndetected) {
>+               if (!use_softirq)
>+                       rcuc_kthread_dump();
>+
>                rcu_dump_cpu_stacks();
>
>                /* Complain about tasks blocking the grace period. */ @@ -660,7 +674,7 @@ static void print_cpu_stall(unsigned long gps)
>        rcu_check_gp_kthread_starvation();
>
>        if (!use_softirq)
>-               rcuc_kthread_dump(rdp);
>+               rcuc_kthread_dump();
>
>        rcu_dump_cpu_stacks();
>
>Thanks
>Zqiang


>  			}
>  		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
>  	}
> @@ -471,6 +473,9 @@ static void rcuc_kthread_dump(struct rcu_data *rdp)
>  	unsigned long j;
>  	struct task_struct *rcuc;
>  
> +	if (use_softirq)
> +		return;
> +
>  	rcuc = rdp->rcu_cpu_kthread_task;
>  	if (!rcuc)

>The checks for use_softirq on the one hand and the checks for non-NULL
>rdp->rcu_cpu_kthread_task are a bit "interesting".  Not your fault or
>problem, of course!

>Yes the ' if (!use_softirq)' is redundant,  the if rcu_cpu_kthread_task  Is completely enough

>
>  		return;
> @@ -659,9 +664,6 @@ static void print_cpu_stall(unsigned long gps)
>  	rcu_check_gp_kthread_expired_fqs_timer();
>  	rcu_check_gp_kthread_starvation();
>  
> -	if (!use_softirq)
>
>In particular, I am wondering if this "if" is redundant.

>Yes the ' if (!use_softirq)' is also redundant

>
> -		rcuc_kthread_dump(rdp);
> -
>  	rcu_dump_cpu_stacks();
>  
>  	raw_spin_lock_irqsave_rcu_node(rnp, flags);
> --
> 2.25.1
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ