[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20211202003858.3129628-16-paulmck@kernel.org>
Date: Wed, 1 Dec 2021 16:38:56 -0800
From: "Paul E. McKenney" <paulmck@...nel.org>
To: rcu@...r.kernel.org
Cc: linux-kernel@...r.kernel.org, kernel-team@...com, mingo@...nel.org,
jiangshanlai@...il.com, akpm@...ux-foundation.org,
mathieu.desnoyers@...icios.com, josh@...htriplett.org,
tglx@...utronix.de, peterz@...radead.org, rostedt@...dmis.org,
dhowells@...hat.com, edumazet@...gle.com, fweisbec@...il.com,
oleg@...hat.com, joel@...lfernandes.org,
"Paul E. McKenney" <paulmck@...nel.org>, Martin Lau <kafai@...com>,
Neeraj Upadhyay <neeraj.iitr10@...il.com>
Subject: [PATCH rcu 16/18] rcu-tasks: Use more callback queues if contention encountered
The rcupdate.rcu_task_enqueue_lim module parameter allows system
administrators to tune the number of callback queues used by the RCU
Tasks flavors. However if callback storms are infrequent, it would
be better to operate with a single queue on a given system unless and
until that system actually needed more queues. Systems not needing
more queues can then avoid the overhead of checking the extra queues
and especially avoid the overhead of fanning workqueue handlers out to
all CPUs to invoke callbacks.
This commit therefore switches to using all the CPUs' callback queues if
call_rcu_tasks_generic() encounters too much lock contention. The amount
of lock contention to tolerate defaults to 100 contended lock acquisitions
per jiffy, and can be adjusted using the new rcupdate.rcu_task_contend_lim
module parameter.
Such switching is undertaken only if the rcupdate.rcu_task_enqueue_lim
module parameter is negative, which is its default value (-1).
This allows savvy systems administrators to set the number of queues
to some known good value and to not have to worry about the kernel doing
any second guessing.
[ paulmck: Apply feedback from Guillaume Tucker and kernelci. ]
Reported-by: Martin Lau <kafai@...com>
Cc: Neeraj Upadhyay <neeraj.iitr10@...il.com>
Signed-off-by: Paul E. McKenney <paulmck@...nel.org>
---
.../admin-guide/kernel-parameters.txt | 8 ++++++
kernel/rcu/tasks.h | 27 ++++++++++++++++---
2 files changed, 31 insertions(+), 4 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 9b09fc5dfe665..089f4c5f8225b 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4805,6 +4805,14 @@
period to instead use normal non-expedited
grace-period processing.
+ rcupdate.rcu_task_contend_lim= [KNL]
+ Set the minimum number of callback-queuing-time
+ lock-contention events per jiffy required to
+ cause the RCU Tasks flavors to switch to per-CPU
+ callback queuing. This switching only occurs
+ when rcupdate.rcu_task_enqueue_lim is set to
+ the default value of -1.
+
rcupdate.rcu_task_enqueue_lim= [KNL]
Set the number of callback queues to use for the
RCU Tasks family of RCU flavors. The default
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 652f51eec5f34..1695da0f6985e 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -142,6 +142,10 @@ module_param(rcu_task_stall_timeout, int, 0644);
static int rcu_task_enqueue_lim __read_mostly = -1;
module_param(rcu_task_enqueue_lim, int, 0444);
+static bool rcu_task_cb_adjust;
+static int rcu_task_contend_lim __read_mostly = 100;
+module_param(rcu_task_contend_lim, int, 0444);
+
/* RCU tasks grace-period state for debugging. */
#define RTGS_INIT 0
#define RTGS_WAIT_WAIT_CBS 1
@@ -207,10 +211,13 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
int lim;
raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
- if (rcu_task_enqueue_lim < 0)
- rcu_task_enqueue_lim = nr_cpu_ids;
- else if (rcu_task_enqueue_lim == 0)
+ if (rcu_task_enqueue_lim < 0) {
+ rcu_task_enqueue_lim = 1;
+ rcu_task_cb_adjust = true;
+ pr_info("%s: Setting adjustable number of callback queues.\n", __func__);
+ } else if (rcu_task_enqueue_lim == 0) {
rcu_task_enqueue_lim = 1;
+ }
lim = rcu_task_enqueue_lim;
if (lim > nr_cpu_ids)
@@ -251,6 +258,7 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
{
unsigned long flags;
unsigned long j;
+ bool needadjust = false;
bool needwake;
struct rcu_tasks_percpu *rtpcp;
@@ -266,7 +274,9 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
rtpcp->rtp_jiffies = j;
rtpcp->rtp_n_lock_retries = 0;
}
- rtpcp->rtp_n_lock_retries++;
+ if (rcu_task_cb_adjust && ++rtpcp->rtp_n_lock_retries > rcu_task_contend_lim &&
+ READ_ONCE(rtp->percpu_enqueue_lim) != nr_cpu_ids)
+ needadjust = true; // Defer adjustment to avoid deadlock.
}
if (!rcu_segcblist_is_enabled(&rtpcp->cblist)) {
raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled.
@@ -276,6 +286,15 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
needwake = rcu_segcblist_empty(&rtpcp->cblist);
rcu_segcblist_enqueue(&rtpcp->cblist, rhp);
raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
+ if (unlikely(needadjust)) {
+ raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
+ if (rtp->percpu_enqueue_lim != nr_cpu_ids) {
+ WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids));
+ smp_store_release(&rtp->percpu_enqueue_lim, nr_cpu_ids);
+ pr_info("Switching %s to per-CPU callback queuing.\n", rtp->name);
+ }
+ raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
+ }
/* We can't create the thread unless interrupts are enabled. */
if (needwake && READ_ONCE(rtp->kthread_ptr)) {
irq_work_queue(&rtpcp->rtp_irq_work);
--
2.31.1.189.g2e36527f23
Powered by blists - more mailing lists