lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAEf4Bzan+yAzKcBG8VWFWOwR6PigRAjmQB8KrcRwheZnRaTEyQ@mail.gmail.com>
Date: Thu, 25 Sep 2025 11:39:18 -0700
From: Andrii Nakryiko <andrii.nakryiko@...il.com>
To: "Paul E. McKenney" <paulmck@...nel.org>
Cc: rcu@...r.kernel.org, linux-kernel@...r.kernel.org, kernel-team@...a.com, 
	rostedt@...dmis.org, kernel test robot <oliver.sang@...el.com>, 
	Andrii Nakryiko <andrii@...nel.org>, Alexei Starovoitov <ast@...nel.org>, 
	Peter Zijlstra <peterz@...radead.org>, bpf@...r.kernel.org
Subject: Re: [PATCH 01/34] rcu: Re-implement RCU Tasks Trace in terms of SRCU-fast

On Tue, Sep 23, 2025 at 7:22 AM Paul E. McKenney <paulmck@...nel.org> wrote:
>
> This commit saves more than 500 lines of RCU code by re-implementing
> RCU Tasks Trace in terms of SRCU-fast.  Follow-up work will remove
> more code that does not cause problems by its presence, but that is no
> longer required.
>
> This variant places smp_mb() in rcu_read_{,un}lock_trace(), which will
> be removed on common-case architectures in a later commit.
>
> [ paulmck: Apply kernel test robot, Boqun Feng, and Zqiang feedback. ]
>
> Signed-off-by: Paul E. McKenney <paulmck@...nel.org>
> Tested-by: kernel test robot <oliver.sang@...el.com>
> Cc: Andrii Nakryiko <andrii@...nel.org>
> Cc: Alexei Starovoitov <ast@...nel.org>
> Cc: Peter Zijlstra <peterz@...radead.org>
> Cc: <bpf@...r.kernel.org>
> ---
>  include/linux/rcupdate_trace.h | 107 ++++--
>  include/linux/sched.h          |   1 +
>  kernel/rcu/srcutiny.c          |  13 +-
>  kernel/rcu/tasks.h             | 617 +--------------------------------
>  4 files changed, 104 insertions(+), 634 deletions(-)
>

makes sense to me overall, but I had a few questions below

> diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h
> index e6c44eb428ab63..3f46cbe6700038 100644
> --- a/include/linux/rcupdate_trace.h
> +++ b/include/linux/rcupdate_trace.h
> @@ -12,28 +12,28 @@
>  #include <linux/rcupdate.h>
>  #include <linux/cleanup.h>
>
> -extern struct lockdep_map rcu_trace_lock_map;
> +#ifdef CONFIG_TASKS_TRACE_RCU
> +extern struct srcu_struct rcu_tasks_trace_srcu_struct;
> +#endif // #ifdef CONFIG_TASKS_TRACE_RCU
>
> -#ifdef CONFIG_DEBUG_LOCK_ALLOC
> +#if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU)
>
>  static inline int rcu_read_lock_trace_held(void)
>  {
> -       return lock_is_held(&rcu_trace_lock_map);
> +       return srcu_read_lock_held(&rcu_tasks_trace_srcu_struct);
>  }
>
> -#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
> +#else // #if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU)
>
>  static inline int rcu_read_lock_trace_held(void)
>  {
>         return 1;
>  }
>
> -#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
> +#endif // #else // #if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU)

nit: // #else // #if... looks very unconventional

>
>  #ifdef CONFIG_TASKS_TRACE_RCU
>
> -void rcu_read_unlock_trace_special(struct task_struct *t);
> -
>  /**
>   * rcu_read_lock_trace - mark beginning of RCU-trace read-side critical section
>   *
> @@ -50,12 +50,14 @@ static inline void rcu_read_lock_trace(void)
>  {
>         struct task_struct *t = current;
>
> -       WRITE_ONCE(t->trc_reader_nesting, READ_ONCE(t->trc_reader_nesting) + 1);
> -       barrier();
> -       if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) &&
> -           t->trc_reader_special.b.need_mb)
> -               smp_mb(); // Pairs with update-side barriers
> -       rcu_lock_acquire(&rcu_trace_lock_map);
> +       if (t->trc_reader_nesting++) {
> +               // In case we interrupted a Tasks Trace RCU reader.
> +               rcu_try_lock_acquire(&rcu_tasks_trace_srcu_struct.dep_map);

why is this a "try_lock" variant instead of a no-try "lock_acquire"
one? Some lockdep special treatment for nested locking?

> +               return;
> +       }
> +       barrier();  // nesting before scp to protect against interrupt handler.
> +       t->trc_reader_scp = srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct);
> +       smp_mb(); // Placeholder for more selective ordering
>  }
>
>  /**

[...]

> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 2b272382673d62..89d3646155525f 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -939,6 +939,7 @@ struct task_struct {
>
>  #ifdef CONFIG_TASKS_TRACE_RCU
>         int                             trc_reader_nesting;
> +       struct srcu_ctr __percpu        *trc_reader_scp;
>         int                             trc_ipi_to_cpu;
>         union rcu_special               trc_reader_special;
>         struct list_head                trc_holdout_list;
> diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
> index e3b64a5e0ec7e1..3450c3751ef7ad 100644
> --- a/kernel/rcu/srcutiny.c
> +++ b/kernel/rcu/srcutiny.c
> @@ -106,15 +106,15 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
>         newval = READ_ONCE(ssp->srcu_lock_nesting[idx]) - 1;
>         WRITE_ONCE(ssp->srcu_lock_nesting[idx], newval);
>         preempt_enable();
> -       if (!newval && READ_ONCE(ssp->srcu_gp_waiting) && in_task())
> +       if (!newval && READ_ONCE(ssp->srcu_gp_waiting) && in_task() && !irqs_disabled())

this seems like something that probably should be done in a separate
patch with an explanation on why?

>                 swake_up_one(&ssp->srcu_wq);
>  }
>  EXPORT_SYMBOL_GPL(__srcu_read_unlock);
>
>  /*
>   * Workqueue handler to drive one grace period and invoke any callbacks
> - * that become ready as a result.  Single-CPU and !PREEMPTION operation
> - * means that we get away with murder on synchronization.  ;-)
> + * that become ready as a result.  Single-CPU operation and preemption
> + * disabling mean that we get away with murder on synchronization.  ;-)
>   */
>  void srcu_drive_gp(struct work_struct *wp)
>  {
> @@ -141,7 +141,12 @@ void srcu_drive_gp(struct work_struct *wp)
>         WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1);
>         WRITE_ONCE(ssp->srcu_gp_waiting, true);  /* srcu_read_unlock() wakes! */
>         preempt_enable();
> -       swait_event_exclusive(ssp->srcu_wq, !READ_ONCE(ssp->srcu_lock_nesting[idx]));
> +       do {
> +               // Deadlock issues prevent __srcu_read_unlock() from
> +               // doing an unconditional wakeup, so polling is required.
> +               swait_event_timeout_exclusive(ssp->srcu_wq,
> +                                             !READ_ONCE(ssp->srcu_lock_nesting[idx]), HZ / 10);
> +       } while (READ_ONCE(ssp->srcu_lock_nesting[idx]));

ditto, generic srcu change, driven by RCU Tasks Trace transformation,
but probably worth calling it out separately?

>         preempt_disable();  // Needed for PREEMPT_LAZY
>         WRITE_ONCE(ssp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */
>         WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1);

[...]

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ