[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <ffbf9606-572e-49d6-9d90-5532a5376f23@nvidia.com>
Date: Thu, 10 Apr 2025 10:13:28 -0400
From: Joel Fernandes <joelagnelf@...dia.com>
To: linux-kernel@...r.kernel.org, "Paul E. McKenney" <paulmck@...nel.org>,
Frederic Weisbecker <frederic@...nel.org>,
Neeraj Upadhyay <neeraj.upadhyay@...nel.org>,
Josh Triplett <josh@...htriplett.org>, Boqun Feng <boqun.feng@...il.com>,
Uladzislau Rezki <urezki@...il.com>, Steven Rostedt <rostedt@...dmis.org>,
Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
Lai Jiangshan <jiangshanlai@...il.com>, Zqiang <qiang.zhang1211@...il.com>,
Davidlohr Bueso <dave@...olabs.net>
Cc: rcu@...r.kernel.org
Subject: Re: [PATCH v2] rcutorture: Perform more frequent testing of ->gpwrap
On 4/10/2025 9:54 AM, Joel Fernandes wrote:
> Currently, the ->gpwrap is not tested (at all per my testing) due to the
> requirement of a large delta between a CPU's rdp->gp_seq and its node's
> rnp->gpseq.
>
> This results in no testing of ->gpwrap being set. This patch by default
> adds 5 minutes of testing with ->gpwrap forced by lowering the delta
> between rdp->gp_seq and rnp->gp_seq to just 8 GPs. All of this is
> configurable, including the active time for the setting and a full
> testing cycle.
>
> By default, the first 25 minutes of a test will have the _default_
> behavior there is right now (ULONG_MAX / 4) delta. Then for 5 minutes,
> we switch to a smaller delta causing 1-2 wraps in 5 minutes. I believe
> this is reasonable since we at least add a little bit of testing for
> usecases where ->gpwrap is set.
>
> Signed-off-by: Joel Fernandes <joelagnelf@...dia.com>
> ---
> kernel/rcu/rcu.h | 4 +++
> kernel/rcu/rcutorture.c | 68 ++++++++++++++++++++++++++++++++++++++++-
> kernel/rcu/tree.c | 34 +++++++++++++++++++--
> kernel/rcu/tree.h | 1 +
> 4 files changed, 104 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
> index eed2951a4962..516b26024a37 100644
> --- a/kernel/rcu/rcu.h
> +++ b/kernel/rcu/rcu.h
> @@ -572,6 +572,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
> unsigned long c_old,
> unsigned long c);
> void rcu_gp_set_torture_wait(int duration);
> +void rcu_set_gpwrap_lag(unsigned long lag);
> +int rcu_get_gpwrap_count(int cpu);
> #else
> static inline void rcutorture_get_gp_data(int *flags, unsigned long *gp_seq)
> {
> @@ -589,6 +591,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
> do { } while (0)
> #endif
> static inline void rcu_gp_set_torture_wait(int duration) { }
> +static inline void rcu_set_gpwrap_lag(unsigned long lag) { }
> +static inline int rcu_get_gpwrap_count(int cpu) { return 0; }
> #endif
> unsigned long long rcutorture_gather_gp_seqs(void);
> void rcutorture_format_gp_seqs(unsigned long long seqs, char *cp, size_t len);
> diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
> index 895a27545ae1..c64cb5498401 100644
> --- a/kernel/rcu/rcutorture.c
> +++ b/kernel/rcu/rcutorture.c
> @@ -118,6 +118,9 @@ torture_param(int, nreaders, -1, "Number of RCU reader threads");
> torture_param(int, object_debug, 0, "Enable debug-object double call_rcu() testing");
> torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
> torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (jiffies), 0=disable");
> +torture_param(int, gpwrap_lag_cycle_mins, 30, "Total cycle duration for ovf lag testing (in minutes)");
> +torture_param(int, gpwrap_lag_active_mins, 5, "Duration for which ovf lag is active within each cycle (in minutes)");
> +torture_param(int, gpwrap_lag_gps, 8, "Value to set for set_gpwrap_lag during an active testing period.");
> torture_param(int, nocbs_nthreads, 0, "Number of NOCB toggle threads, 0 to disable");
> torture_param(int, nocbs_toggle, 1000, "Time between toggling nocb state (ms)");
> torture_param(int, preempt_duration, 0, "Preemption duration (ms), zero to disable");
> @@ -418,6 +421,8 @@ struct rcu_torture_ops {
> bool (*reader_blocked)(void);
> unsigned long long (*gather_gp_seqs)(void);
> void (*format_gp_seqs)(unsigned long long seqs, char *cp, size_t len);
> + void (*set_gpwrap_lag)(unsigned long lag);
> + int (*get_gpwrap_count)(int cpu);
> long cbflood_max;
> int irq_capable;
> int can_boost;
> @@ -625,6 +630,8 @@ static struct rcu_torture_ops rcu_ops = {
> : NULL,
> .gather_gp_seqs = rcutorture_gather_gp_seqs,
> .format_gp_seqs = rcutorture_format_gp_seqs,
> + .set_gpwrap_lag = rcu_set_gpwrap_lag,
> + .get_gpwrap_count = rcu_get_gpwrap_count,
> .irq_capable = 1,
> .can_boost = IS_ENABLED(CONFIG_RCU_BOOST),
> .extendables = RCUTORTURE_MAX_EXTEND,
> @@ -2629,6 +2636,7 @@ rcu_torture_stats_print(void)
> int i;
> long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
> long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
> + long n_gpwraps = 0;
> struct rcu_torture *rtcp;
> static unsigned long rtcv_snap = ULONG_MAX;
> static bool splatted;
> @@ -2639,6 +2647,7 @@ rcu_torture_stats_print(void)
> pipesummary[i] += READ_ONCE(per_cpu(rcu_torture_count, cpu)[i]);
> batchsummary[i] += READ_ONCE(per_cpu(rcu_torture_batch, cpu)[i]);
> }
> + n_gpwraps += cur_ops->get_gpwrap_count(cpu);
> }
> for (i = RCU_TORTURE_PIPE_LEN; i >= 0; i--) {
> if (pipesummary[i] != 0)
> @@ -2670,8 +2679,9 @@ rcu_torture_stats_print(void)
> data_race(n_barrier_attempts),
> data_race(n_rcu_torture_barrier_error));
> pr_cont("read-exits: %ld ", data_race(n_read_exits)); // Statistic.
> - pr_cont("nocb-toggles: %ld:%ld\n",
> + pr_cont("nocb-toggles: %ld:%ld ",
> atomic_long_read(&n_nocb_offload), atomic_long_read(&n_nocb_deoffload));
> + pr_cont("gpwraps: %ld\n", n_gpwraps);
>
> pr_alert("%s%s ", torture_type, TORTURE_FLAG);
> if (atomic_read(&n_rcu_torture_mberror) ||
> @@ -3842,6 +3852,55 @@ static int rcu_torture_preempt(void *unused)
>
> static enum cpuhp_state rcutor_hp;
>
> +static struct hrtimer gpwrap_lag_timer;
> +static bool gpwrap_lag_active;
> +
> +/* Timer handler for toggling RCU grace-period sequence overflow test lag value */
> +static enum hrtimer_restart rcu_gpwrap_lag_timer(struct hrtimer *timer)
> +{
> + ktime_t next_delay;
> +
> + if (gpwrap_lag_active) {
> + pr_alert("rcu-torture: Disabling ovf lag (value=0)\n");
> + cur_ops->set_gpwrap_lag(0);
> + gpwrap_lag_active = false;
> + next_delay = ktime_set((gpwrap_lag_cycle_mins - gpwrap_lag_active_mins) * 60, 0);
> + } else {
> + pr_alert("rcu-torture: Enabling ovf lag (value=%d)\n", gpwrap_lag_gps);
> + cur_ops->set_gpwrap_lag(gpwrap_lag_gps);
> + gpwrap_lag_active = true;
> + next_delay = ktime_set(gpwrap_lag_active_mins * 60, 0);
> + }
> +
> + if (torture_must_stop())
> + return HRTIMER_NORESTART;
> +
> + hrtimer_forward_now(timer, next_delay);
> + return HRTIMER_RESTART;
> +}
> +
> +static int rcu_gpwrap_lag_init(void)
> +{
> + if (gpwrap_lag_cycle_mins <= 0 || gpwrap_lag_active_mins <= 0) {
> + pr_alert("rcu-torture: lag timing parameters must be positive\n");
> + return -EINVAL;
> + }
> +
> + hrtimer_init(&gpwrap_lag_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> + gpwrap_lag_timer.function = rcu_gpwrap_lag_timer;
I should change this to hrtimer_setup() per the latest timer tree. Will await
any other further comments before reposting.
Sorry, thanks,
- Joel
Powered by blists - more mailing lists