[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Ytijki0fkkyKaD9u@boqun-archlinux>
Date: Wed, 20 Jul 2022 17:53:38 -0700
From: Boqun Feng <boqun.feng@...il.com>
To: "Paul E. McKenney" <paulmck@...nel.org>
Cc: rcu@...r.kernel.org, linux-kernel@...r.kernel.org,
kernel-team@...com, rostedt@...dmis.org,
Brian Foster <bfoster@...hat.com>,
Dave Chinner <david@...morbit.com>,
Al Viro <viro@...iv.linux.org.uk>, Ian Kent <raven@...maw.net>
Subject: Re: [PATCH rcu 04/12] rcu: Switch polled grace-period APIs to
->gp_seq_polled
Hi Paul,
On Mon, Jun 20, 2022 at 03:51:20PM -0700, Paul E. McKenney wrote:
> This commit switches the existing polled grace-period APIs to use a
> new ->gp_seq_polled counter in the rcu_state structure. An additional
> ->gp_seq_polled_snap counter in that same structure allows the normal
> grace period kthread to interact properly with the !SMP !PREEMPT fastpath
> through synchronize_rcu(). The first of the two to note the end of a
> given grace period will make knowledge of this transition available to
> the polled API.
>
> This commit is in preparation for polled expedited grace periods.
>
> Link: https://lore.kernel.org/all/20220121142454.1994916-1-bfoster@redhat.com/
> Link: https://docs.google.com/document/d/1RNKWW9jQyfjxw2E8dsXVTdvZYh0HnYeSHDKog9jhdN8/edit?usp=sharing
> Cc: Brian Foster <bfoster@...hat.com>
> Cc: Dave Chinner <david@...morbit.com>
> Cc: Al Viro <viro@...iv.linux.org.uk>
> Cc: Ian Kent <raven@...maw.net>
> Signed-off-by: Paul E. McKenney <paulmck@...nel.org>
> ---
> kernel/rcu/tree.c | 90 +++++++++++++++++++++++++++++++++++++++++++++--
> kernel/rcu/tree.h | 2 ++
> 2 files changed, 89 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 46cfceea87847..637e8f9454573 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -1775,6 +1775,78 @@ static void rcu_strict_gp_boundary(void *unused)
> invoke_rcu_core();
> }
>
> +// Has rcu_init() been invoked? This is used (for example) to determine
> +// whether spinlocks may be acquired safely.
> +static bool rcu_init_invoked(void)
> +{
> + return !!rcu_state.n_online_cpus;
> +}
> +
> +// Make the polled API aware of the beginning of a grace period.
> +static void rcu_poll_gp_seq_start(unsigned long *snap)
> +{
> + struct rcu_node *rnp = rcu_get_root();
> +
> + if (rcu_init_invoked())
> + raw_lockdep_assert_held_rcu_node(rnp);
> +
> + // If RCU was idle, note beginning of GP.
> + if (!rcu_seq_state(rcu_state.gp_seq_polled))
> + rcu_seq_start(&rcu_state.gp_seq_polled);
> +
> + // Either way, record current state.
> + *snap = rcu_state.gp_seq_polled;
> +}
> +
> +// Make the polled API aware of the end of a grace period.
> +static void rcu_poll_gp_seq_end(unsigned long *snap)
> +{
> + struct rcu_node *rnp = rcu_get_root();
> +
> + if (rcu_init_invoked())
> + raw_lockdep_assert_held_rcu_node(rnp);
> +
> + // If the the previously noted GP is still in effect, record the
> + // end of that GP. Either way, zero counter to avoid counter-wrap
> + // problems.
> + if (*snap && *snap == rcu_state.gp_seq_polled) {
> + rcu_seq_end(&rcu_state.gp_seq_polled);
> + rcu_state.gp_seq_polled_snap = 0;
> + } else {
> + *snap = 0;
> + }
> +}
> +
> +// Make the polled API aware of the beginning of a grace period, but
> +// where caller does not hold the root rcu_node structure's lock.
> +static void rcu_poll_gp_seq_start_unlocked(unsigned long *snap)
> +{
> + struct rcu_node *rnp = rcu_get_root();
> +
> + if (rcu_init_invoked()) {
> + lockdep_assert_irqs_enabled();
> + raw_spin_lock_irq_rcu_node(rnp);
> + }
> + rcu_poll_gp_seq_start(snap);
> + if (rcu_init_invoked())
> + raw_spin_unlock_irq_rcu_node(rnp);
> +}
> +
> +// Make the polled API aware of the end of a grace period, but where
> +// caller does not hold the root rcu_node structure's lock.
> +static void rcu_poll_gp_seq_end_unlocked(unsigned long *snap)
> +{
> + struct rcu_node *rnp = rcu_get_root();
> +
> + if (rcu_init_invoked()) {
> + lockdep_assert_irqs_enabled();
> + raw_spin_lock_irq_rcu_node(rnp);
> + }
> + rcu_poll_gp_seq_end(snap);
> + if (rcu_init_invoked())
> + raw_spin_unlock_irq_rcu_node(rnp);
> +}
> +
> /*
> * Initialize a new grace period. Return false if no grace period required.
> */
> @@ -1810,6 +1882,7 @@ static noinline_for_stack bool rcu_gp_init(void)
> rcu_seq_start(&rcu_state.gp_seq);
> ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
> trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("start"));
> + rcu_poll_gp_seq_start(&rcu_state.gp_seq_polled_snap);
> raw_spin_unlock_irq_rcu_node(rnp);
>
> /*
> @@ -2069,6 +2142,7 @@ static noinline void rcu_gp_cleanup(void)
> * safe for us to drop the lock in order to mark the grace
> * period as completed in all of the rcu_node structures.
> */
> + rcu_poll_gp_seq_end(&rcu_state.gp_seq_polled_snap);
> raw_spin_unlock_irq_rcu_node(rnp);
>
> /*
> @@ -3837,8 +3911,18 @@ void synchronize_rcu(void)
> lock_is_held(&rcu_lock_map) ||
> lock_is_held(&rcu_sched_lock_map),
> "Illegal synchronize_rcu() in RCU read-side critical section");
> - if (rcu_blocking_is_gp())
> + if (rcu_blocking_is_gp()) {
> + // Note well that this code runs with !PREEMPT && !SMP.
> + // In addition, all code that advances grace periods runs
> + // at process level. Therefore, this GP overlaps with other
> + // GPs only by being fully nested within them, which allows
> + // reuse of ->gp_seq_polled_snap.
> + rcu_poll_gp_seq_start_unlocked(&rcu_state.gp_seq_polled_snap);
> + rcu_poll_gp_seq_end_unlocked(&rcu_state.gp_seq_polled_snap);
> + if (rcu_init_invoked())
> + cond_resched_tasks_rcu_qs();
> return; // Context allows vacuous grace periods.
> + }
> if (rcu_gp_is_expedited())
> synchronize_rcu_expedited();
> else
> @@ -3860,7 +3944,7 @@ unsigned long get_state_synchronize_rcu(void)
> * before the load from ->gp_seq.
> */
> smp_mb(); /* ^^^ */
> - return rcu_seq_snap(&rcu_state.gp_seq);
> + return rcu_seq_snap(&rcu_state.gp_seq_polled);
I happened to run into this. There is one usage of
get_state_synchronize_rcu() in start_poll_synchronize_rcu(), in which
the return value of get_state_synchronize_rcu() ("gp_seq") will be used
for rcu_start_this_gp(). I don't think this is quite right, because
after this change, rcu_state.gp_seq and rcu_state.gp_seq_polled are
different values, in fact ->gp_seq_polled is greater than ->gp_seq
by how many synchronize_rcu() is called in early boot.
Am I missing something here?
Regards,
Boqun
> }
> EXPORT_SYMBOL_GPL(get_state_synchronize_rcu);
>
> @@ -3925,7 +4009,7 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu);
> bool poll_state_synchronize_rcu(unsigned long oldstate)
> {
> if (oldstate == RCU_GET_STATE_COMPLETED ||
> - rcu_seq_done_exact(&rcu_state.gp_seq, oldstate)) {
> + rcu_seq_done_exact(&rcu_state.gp_seq_polled, oldstate)) {
> smp_mb(); /* Ensure GP ends before subsequent accesses. */
> return true;
> }
> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> index 2ccf5845957df..9c853033f159d 100644
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -323,6 +323,8 @@ struct rcu_state {
> short gp_state; /* GP kthread sleep state. */
> unsigned long gp_wake_time; /* Last GP kthread wake. */
> unsigned long gp_wake_seq; /* ->gp_seq at ^^^. */
> + unsigned long gp_seq_polled; /* GP seq for polled API. */
> + unsigned long gp_seq_polled_snap; /* ->gp_seq_polled at normal GP start. */
>
> /* End of fields guarded by root rcu_node's lock. */
>
> --
> 2.31.1.189.g2e36527f23
>
Powered by blists - more mailing lists