lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Message-Id: <1524450747-22778-13-git-send-email-paulmck@linux.vnet.ibm.com> Date: Sun, 22 Apr 2018 19:32:18 -0700 From: "Paul E. McKenney" <paulmck@...ux.vnet.ibm.com> To: linux-kernel@...r.kernel.org Cc: mingo@...nel.org, jiangshanlai@...il.com, dipankar@...ibm.com, akpm@...ux-foundation.org, mathieu.desnoyers@...icios.com, josh@...htriplett.org, tglx@...utronix.de, peterz@...radead.org, rostedt@...dmis.org, dhowells@...hat.com, edumazet@...gle.com, fweisbec@...il.com, oleg@...hat.com, joel.opensrc@...il.com, "Paul E. McKenney" <paulmck@...ux.vnet.ibm.com> Subject: [PATCH tip/core/rcu 13/22] rcu: Exclude near-simultaneous RCU CPU stall warnings There is a two-jiffy delay between the time that a CPU will self-report an RCU CPU stall warning and the time that some other CPU will report a warning on behalf of the first CPU. This has worked well in the past, but on busy systems, it is possible for the two warnings to overlap, which makes interpreting them extremely difficult. This commit therefore uses a cmpxchg-based timing decision that allows only one report in a given one-minute period (assuming default stall-warning Kconfig parameters). This approach will of course fail if you are seeing minute-long vCPU preemption, but in that case the overlapping RCU CPU stall warnings are the least of your worries. Reported-by: Dmitry Vyukov <dvyukov@...gle.com> Signed-off-by: Paul E. McKenney <paulmck@...ux.vnet.ibm.com> --- kernel/rcu/tree.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index c4db0e20b035..19d9475d74f2 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1429,8 +1429,6 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) raw_spin_unlock_irqrestore_rcu_node(rnp, flags); return; } - WRITE_ONCE(rsp->jiffies_stall, - jiffies + 3 * rcu_jiffies_till_stall_check() + 3); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); /* @@ -1481,6 +1479,10 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) sched_show_task(current); } } + /* Rewrite if needed in case of slow consoles. */ + if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall))) + WRITE_ONCE(rsp->jiffies_stall, + jiffies + 3 * rcu_jiffies_till_stall_check() + 3); rcu_check_gp_kthread_starvation(rsp); @@ -1525,6 +1527,7 @@ static void print_cpu_stall(struct rcu_state *rsp) rcu_dump_cpu_stacks(rsp); raw_spin_lock_irqsave_rcu_node(rnp, flags); + /* Rewrite if needed in case of slow consoles. */ if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall))) WRITE_ONCE(rsp->jiffies_stall, jiffies + 3 * rcu_jiffies_till_stall_check() + 3); @@ -1548,6 +1551,7 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) unsigned long gpnum; unsigned long gps; unsigned long j; + unsigned long jn; unsigned long js; struct rcu_node *rnp; @@ -1586,14 +1590,17 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) ULONG_CMP_GE(gps, js)) return; /* No stall or GP completed since entering function. */ rnp = rdp->mynode; + jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; if (rcu_gp_in_progress(rsp) && - (READ_ONCE(rnp->qsmask) & rdp->grpmask)) { + (READ_ONCE(rnp->qsmask) & rdp->grpmask) && + cmpxchg(&rsp->jiffies_stall, js, jn) == js) { /* We haven't checked in, so go dump stack. */ print_cpu_stall(rsp); } else if (rcu_gp_in_progress(rsp) && - ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) { + ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) && + cmpxchg(&rsp->jiffies_stall, js, jn) == js) { /* They had a few time units to dump stack, so complain. */ print_other_cpu_stall(rsp, gpnum); -- 2.5.2
Powered by blists - more mailing lists