[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAEf4Bzan+yAzKcBG8VWFWOwR6PigRAjmQB8KrcRwheZnRaTEyQ@mail.gmail.com>
Date: Thu, 25 Sep 2025 11:39:18 -0700
From: Andrii Nakryiko <andrii.nakryiko@...il.com>
To: "Paul E. McKenney" <paulmck@...nel.org>
Cc: rcu@...r.kernel.org, linux-kernel@...r.kernel.org, kernel-team@...a.com,
rostedt@...dmis.org, kernel test robot <oliver.sang@...el.com>,
Andrii Nakryiko <andrii@...nel.org>, Alexei Starovoitov <ast@...nel.org>,
Peter Zijlstra <peterz@...radead.org>, bpf@...r.kernel.org
Subject: Re: [PATCH 01/34] rcu: Re-implement RCU Tasks Trace in terms of SRCU-fast
On Tue, Sep 23, 2025 at 7:22 AM Paul E. McKenney <paulmck@...nel.org> wrote:
>
> This commit saves more than 500 lines of RCU code by re-implementing
> RCU Tasks Trace in terms of SRCU-fast. Follow-up work will remove
> more code that does not cause problems by its presence, but that is no
> longer required.
>
> This variant places smp_mb() in rcu_read_{,un}lock_trace(), which will
> be removed on common-case architectures in a later commit.
>
> [ paulmck: Apply kernel test robot, Boqun Feng, and Zqiang feedback. ]
>
> Signed-off-by: Paul E. McKenney <paulmck@...nel.org>
> Tested-by: kernel test robot <oliver.sang@...el.com>
> Cc: Andrii Nakryiko <andrii@...nel.org>
> Cc: Alexei Starovoitov <ast@...nel.org>
> Cc: Peter Zijlstra <peterz@...radead.org>
> Cc: <bpf@...r.kernel.org>
> ---
> include/linux/rcupdate_trace.h | 107 ++++--
> include/linux/sched.h | 1 +
> kernel/rcu/srcutiny.c | 13 +-
> kernel/rcu/tasks.h | 617 +--------------------------------
> 4 files changed, 104 insertions(+), 634 deletions(-)
>
makes sense to me overall, but I had a few questions below
> diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h
> index e6c44eb428ab63..3f46cbe6700038 100644
> --- a/include/linux/rcupdate_trace.h
> +++ b/include/linux/rcupdate_trace.h
> @@ -12,28 +12,28 @@
> #include <linux/rcupdate.h>
> #include <linux/cleanup.h>
>
> -extern struct lockdep_map rcu_trace_lock_map;
> +#ifdef CONFIG_TASKS_TRACE_RCU
> +extern struct srcu_struct rcu_tasks_trace_srcu_struct;
> +#endif // #ifdef CONFIG_TASKS_TRACE_RCU
>
> -#ifdef CONFIG_DEBUG_LOCK_ALLOC
> +#if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU)
>
> static inline int rcu_read_lock_trace_held(void)
> {
> - return lock_is_held(&rcu_trace_lock_map);
> + return srcu_read_lock_held(&rcu_tasks_trace_srcu_struct);
> }
>
> -#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
> +#else // #if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU)
>
> static inline int rcu_read_lock_trace_held(void)
> {
> return 1;
> }
>
> -#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
> +#endif // #else // #if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU)
nit: // #else // #if... looks very unconventional
>
> #ifdef CONFIG_TASKS_TRACE_RCU
>
> -void rcu_read_unlock_trace_special(struct task_struct *t);
> -
> /**
> * rcu_read_lock_trace - mark beginning of RCU-trace read-side critical section
> *
> @@ -50,12 +50,14 @@ static inline void rcu_read_lock_trace(void)
> {
> struct task_struct *t = current;
>
> - WRITE_ONCE(t->trc_reader_nesting, READ_ONCE(t->trc_reader_nesting) + 1);
> - barrier();
> - if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) &&
> - t->trc_reader_special.b.need_mb)
> - smp_mb(); // Pairs with update-side barriers
> - rcu_lock_acquire(&rcu_trace_lock_map);
> + if (t->trc_reader_nesting++) {
> + // In case we interrupted a Tasks Trace RCU reader.
> + rcu_try_lock_acquire(&rcu_tasks_trace_srcu_struct.dep_map);
why is this a "try_lock" variant instead of a no-try "lock_acquire"
one? Some lockdep special treatment for nested locking?
> + return;
> + }
> + barrier(); // nesting before scp to protect against interrupt handler.
> + t->trc_reader_scp = srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct);
> + smp_mb(); // Placeholder for more selective ordering
> }
>
> /**
[...]
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 2b272382673d62..89d3646155525f 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -939,6 +939,7 @@ struct task_struct {
>
> #ifdef CONFIG_TASKS_TRACE_RCU
> int trc_reader_nesting;
> + struct srcu_ctr __percpu *trc_reader_scp;
> int trc_ipi_to_cpu;
> union rcu_special trc_reader_special;
> struct list_head trc_holdout_list;
> diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
> index e3b64a5e0ec7e1..3450c3751ef7ad 100644
> --- a/kernel/rcu/srcutiny.c
> +++ b/kernel/rcu/srcutiny.c
> @@ -106,15 +106,15 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
> newval = READ_ONCE(ssp->srcu_lock_nesting[idx]) - 1;
> WRITE_ONCE(ssp->srcu_lock_nesting[idx], newval);
> preempt_enable();
> - if (!newval && READ_ONCE(ssp->srcu_gp_waiting) && in_task())
> + if (!newval && READ_ONCE(ssp->srcu_gp_waiting) && in_task() && !irqs_disabled())
this seems like something that probably should be done in a separate
patch with an explanation on why?
> swake_up_one(&ssp->srcu_wq);
> }
> EXPORT_SYMBOL_GPL(__srcu_read_unlock);
>
> /*
> * Workqueue handler to drive one grace period and invoke any callbacks
> - * that become ready as a result. Single-CPU and !PREEMPTION operation
> - * means that we get away with murder on synchronization. ;-)
> + * that become ready as a result. Single-CPU operation and preemption
> + * disabling mean that we get away with murder on synchronization. ;-)
> */
> void srcu_drive_gp(struct work_struct *wp)
> {
> @@ -141,7 +141,12 @@ void srcu_drive_gp(struct work_struct *wp)
> WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1);
> WRITE_ONCE(ssp->srcu_gp_waiting, true); /* srcu_read_unlock() wakes! */
> preempt_enable();
> - swait_event_exclusive(ssp->srcu_wq, !READ_ONCE(ssp->srcu_lock_nesting[idx]));
> + do {
> + // Deadlock issues prevent __srcu_read_unlock() from
> + // doing an unconditional wakeup, so polling is required.
> + swait_event_timeout_exclusive(ssp->srcu_wq,
> + !READ_ONCE(ssp->srcu_lock_nesting[idx]), HZ / 10);
> + } while (READ_ONCE(ssp->srcu_lock_nesting[idx]));
ditto, generic srcu change, driven by RCU Tasks Trace transformation,
but probably worth calling it out separately?
> preempt_disable(); // Needed for PREEMPT_LAZY
> WRITE_ONCE(ssp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */
> WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1);
[...]
Powered by blists - more mailing lists