[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CALm+0cV-s+gYDXKQV9dYWEr-ui6aJ6DZzvyNhW6H2T39WtPjWw@mail.gmail.com>
Date: Mon, 21 Oct 2024 19:01:02 +0800
From: Z qiang <qiang.zhang1211@...il.com>
To: Frederic Weisbecker <frederic@...nel.org>
Cc: paulmck@...nel.org, neeraj.upadhyay@...nel.org, joel@...lfernandes.org,
urezki@...il.com, boqun.feng@...il.com, rcu@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH] rcu/nocb: Fix the WARN_ON_ONCE() in rcu_nocb_rdp_deoffload()
>
> >
> > Le Sun, Oct 20, 2024 at 08:51:19PM +0800, Zqiang a écrit :
> > > Currently, running rcutorture test with torture_type=rcu fwd_progress=8
> > > n_barrier_cbs=8 nocbs_nthreads=8 nocbs_toggle=100 onoff_interval=60
> > > test_boost=2, will trigger the following warning:
> > >
> > > WARNING: CPU: 19 PID: 100 at kernel/rcu/tree_nocb.h:1061 rcu_nocb_rdp_deoffload+0x292/0x2a0
> > > RIP: 0010:rcu_nocb_rdp_deoffload+0x292/0x2a0
> > > [18839.537322] Call Trace:
> > > [18839.538006] <TASK>
> > > [18839.538596] ? __warn+0x7e/0x120
> > > [18839.539491] ? rcu_nocb_rdp_deoffload+0x292/0x2a0
> > > [18839.540757] ? report_bug+0x18e/0x1a0
> > > [18839.541805] ? handle_bug+0x3d/0x70
> > > [18839.542837] ? exc_invalid_op+0x18/0x70
> > > [18839.543959] ? asm_exc_invalid_op+0x1a/0x20
> > > [18839.545165] ? rcu_nocb_rdp_deoffload+0x292/0x2a0
> > > [18839.546547] rcu_nocb_cpu_deoffload+0x70/0xa0
> > > [18839.547814] rcu_nocb_toggle+0x136/0x1c0
> > > [18839.548960] ? __pfx_rcu_nocb_toggle+0x10/0x10
> > > [18839.550073] kthread+0xd1/0x100
> > > [18839.550958] ? __pfx_kthread+0x10/0x10
> > > [18839.552008] ret_from_fork+0x2f/0x50
> > > [18839.553002] ? __pfx_kthread+0x10/0x10
> > > [18839.553968] ret_from_fork_asm+0x1a/0x30
> > > [18839.555038] </TASK>
> > >
> > > CPU0 CPU2 CPU3
> > > //rcu_nocb_toggle //nocb_cb_wait //rcutorture
> > >
> > > // deoffload CPU1 // process CPU1's rdp
> > > rcu_barrier()
> > > rcu_segcblist_entrain()
> > > rcu_segcblist_add_len(1);
> > > // len == 2
> > > // enqueue barrier
> > > // callback to CPU1's
> > > // rdp->cblist
> > > rcu_do_batch()
> > > // invoke CPU1's rdp->cblist
> > > // callback
> > > rcu_barrier_callback()
> > > rcu_barrier()
> > > mutex_lock(&rcu_state.barrier_mutex);
> > > // still see len == 2
> > > // enqueue barrier callback
> > > // to CPU1's rdp->cblist
> > > rcu_segcblist_entrain()
> > > rcu_segcblist_add_len(1);
> > > // len == 3
> > > // decrement len
> > > rcu_segcblist_add_len(-2);
> > > kthread_parkme()
> > >
> > > // CPU1's rdp->cblist len == 1
> > > // Warn because there is
> > > // still a pending barrier
> > > // trigger warning
> > > WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist));
> > > cpus_read_unlock();
> > >
> > > // wait CPU1 comes online
> > > // invoke barrier callback on
> > > // CPU1 rdp's->cblist
> > > wait_for_completion(&rcu_state.barrier_completion);
> > > // deoffload CPU4
> > > cpus_read_lock()
> > > rcu_barrier()
> > > mutex_lock(&rcu_state.barrier_mutex);
> > > // block on barrier_mutex
> > > // wait rcu_barrier() on
> > > // CPU3 to unlock barrier_mutex
> > > // but CPU3 unlock barrier_mutex
> > > // need to wait CPU1 comes online
> > > // when CPU1 going online will block on cpus_write_lock
> > >
> > > The above scenario will not only trigger WARN_ON_ONCE(), but also
> > > trigger deadlock, this commit therefore check rdp->cblist length
> > > before invoke kthread_parkme(), and the kthread_parkme() is not
> > > invoke until length reaches zero.
> > >
> > > Signed-off-by: Zqiang <qiang.zhang1211@...il.com>
> > > ---
> > > kernel/rcu/tree_nocb.h | 8 +++++++-
> > > 1 file changed, 7 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
> > > index 8648233e1717..a2b0ebdefee3 100644
> > > --- a/kernel/rcu/tree_nocb.h
> > > +++ b/kernel/rcu/tree_nocb.h
> > > @@ -893,6 +893,12 @@ static inline bool nocb_cb_wait_cond(struct rcu_data *rdp)
> > > return !READ_ONCE(rdp->nocb_cb_sleep) || kthread_should_park();
> > > }
> > >
> > > +static inline bool nocb_cblist_empty(struct rcu_data *rdp)
> > > +{
> > > + return !(rcu_rdp_is_offloaded(rdp) &&
> >
> > But the rdp has to be offloaded when nocb_cb_wait() is running, and that
> > include the times when it is parking and when it is unparking.
> >
> > > + WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist)));
> >
> > And like your scenario above shows, it's possible to reach here with
> > callbacks. So this check shouldn't be a warning at that point?
>
> Yes, the WARN_ON_ONCE() should be removed.
>
> >
> > > +}
> > > +
> > > /*
> > > * Invoke any ready callbacks from the corresponding no-CBs CPU,
> > > * then, if there are no more, wait for more to appear.
> > > @@ -907,7 +913,7 @@ static void nocb_cb_wait(struct rcu_data *rdp)
> > >
> > > swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
> > > nocb_cb_wait_cond(rdp));
> > > - if (kthread_should_park()) {
> > > + if (kthread_should_park() && nocb_cblist_empty(rdp)) {
> >
> > What about this instead? If the second barrier is queued before
> > the final check to rcu_segcblist_ready_cbs() in nocb_cb_wait(), this
> > will be noticed and ->nocb_cb_sleep will remain false. If otherwise rcu_barrier()
> > is called after that final rcu_segcblist_ready_cbs() check, it will observe
> > the final decrement to zero and won't entrain the callback.
> >
> > diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
> > index 16865475120b..0de07d44646c 100644
> > --- a/kernel/rcu/tree_nocb.h
> > +++ b/kernel/rcu/tree_nocb.h
> > @@ -891,7 +891,19 @@ static void nocb_cb_wait(struct rcu_data *rdp)
> > swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
> > nocb_cb_wait_cond(rdp));
> > if (kthread_should_park()) {
> > - kthread_parkme();
> > + /*
> > + * kthread_park() must be preceded by an rcu_barrier().
> > + * But yet another rcu_barrier() might have sneaked in between
> > + * the barrier callback execution and the callbacks counter
> > + * decrement.
> > + */
> > + if (rdp->nocb_cb_sleep) {
>
> For the non-nocb cpus set during boot, the corresponding
> rcuop kthread, we should park directly, otherwise
> WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp)) will be triggered.
>
> Should the conditions be like this?
> if(!rcu_rdp_is_offloaded(rdp) || rdp->nocb_cb_sleep)
>
>
How about this?
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 8648233e1717..14b70e662c9e 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -904,11 +904,27 @@ static void nocb_cb_wait(struct rcu_data *rdp)
unsigned long flags;
bool needwake_gp = false;
struct rcu_node *rnp = rdp->mynode;
+ bool need_parkme = false;
swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
nocb_cb_wait_cond(rdp));
if (kthread_should_park()) {
- kthread_parkme();
+ /*
+ * kthread_park() must be preceded by an rcu_barrier().
+ * But yet another rcu_barrier() might have sneaked in between
+ * the barrier callback execution and the callbacks counter
+ * decrement.
+ */
+ if (!rcu_rdp_is_offloaded(rdp)) {
+ need_parkme = true;
+ } else if (rdp->nocb_cb_sleep) {
+ need_parkme = true;
+ rcu_nocb_lock_irqsave(rdp, flags);
+ WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist));
+ rcu_nocb_unlock_irqrestore(rdp, flags);
+ }
+ if (need_parkme)
+ kthread_parkme();
} else if (READ_ONCE(rdp->nocb_cb_sleep)) {
WARN_ON(signal_pending(current));
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
> Thanks
> Zqiang
>
> > + rcu_nocb_lock_irqsave(rdp, flags);
> > + WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist));
> > + rcu_nocb_unlock_irqrestore(rdp, flags);
> > +
> > + kthread_parkme();
> > + }
> > } else if (READ_ONCE(rdp->nocb_cb_sleep)) {
> > WARN_ON(signal_pending(current));
> > trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
Powered by blists - more mailing lists