[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <20251121145720.342467-6-jiangshanlai@gmail.com>
Date: Fri, 21 Nov 2025 22:57:18 +0800
From: Lai Jiangshan <jiangshanlai@...il.com>
To: linux-kernel@...r.kernel.org
Cc: Tejun Heo <tj@...nel.org>,
ying chen <yc1082463@...il.com>,
Lai Jiangshan <jiangshan.ljs@...group.com>,
Lai Jiangshan <jiangshanlai@...il.com>
Subject: [PATCH V3 5/7] workqueue: Process rescuer work items one-by-one using a cursor
From: Lai Jiangshan <jiangshan.ljs@...group.com>
Previously, the rescuer scanned for all matching work items at once and
processed them within a single rescuer thread, which could cause one
blocking work item to stall all others.
Make the rescuer process work items one-by-one instead of slurping all
matches in a single pass.
Break the rescuer loop after finding and processing the first matching
work item, then restart the search to pick up the next. This gives
normal worker threads a chance to process other items which gives them
the opportinity to be processed instead of waiting on the rescuer's
queue and prevents a blocking work item from stalling the rest once
memory pressure is relieved.
Introduce a dummy cursor work item to avoid potentially O(N^2)
rescans of the work list. The marker records the resume position for
the next scan, eliminating redundant traversals.
Cc: ying chen <yc1082463@...il.com>
Reported-by: ying chen <yc1082463@...il.com>
Fixes: e22bee782b3b ("workqueue: implement concurrency managed dynamic worker pool")
Signed-off-by: Lai Jiangshan <jiangshan.ljs@...group.com>
---
kernel/workqueue.c | 56 ++++++++++++++++++++++++++++++++++++++++------
1 file changed, 49 insertions(+), 7 deletions(-)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 3032235a131e..49dce50ff647 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -286,6 +286,7 @@ struct pool_workqueue {
struct list_head pending_node; /* LN: node on wq_node_nr_active->pending_pwqs */
struct list_head pwqs_node; /* WR: node on wq->pwqs */
struct list_head mayday_node; /* MD: node on wq->maydays */
+ struct work_struct mayday_cursor; /* L: cursor on pool->worklist */
u64 stats[PWQ_NR_STATS];
@@ -1126,6 +1127,12 @@ static struct worker *find_worker_executing_work(struct worker_pool *pool,
return NULL;
}
+static void mayday_cursor_func(struct work_struct *work)
+{
+ /* should not be processed, only for marking position */
+ BUG();
+}
+
/**
* move_linked_works - move linked works to a list
* @work: start of series of works to be scheduled
@@ -1188,6 +1195,16 @@ static bool assign_work(struct work_struct *work, struct worker *worker,
lockdep_assert_held(&pool->lock);
+ /* The cursor work should not be processed */
+ if (unlikely(work->func == mayday_cursor_func)) {
+ /* only worker_thread() can possibly take this branch */
+ WARN_ON_ONCE(worker->rescue_wq);
+ if (nextp)
+ *nextp = list_next_entry(work, entry);
+ list_del_init(&work->entry);
+ return false;
+ }
+
/*
* A single work shouldn't be executed concurrently by multiple workers.
* __queue_work() ensures that @work doesn't jump to a different pool
@@ -3442,22 +3459,33 @@ static int worker_thread(void *__worker)
static bool assign_rescuer_work(struct pool_workqueue *pwq, struct worker *rescuer)
{
struct worker_pool *pool = pwq->pool;
+ struct work_struct *cursor = &pwq->mayday_cursor;
struct work_struct *work, *n;
+ /* from where to search */
+ if (list_empty(&cursor->entry)) {
+ work = list_first_entry(&pool->worklist, struct work_struct, entry);
+ } else {
+ work = list_next_entry(cursor, entry);
+ /* It will be at a new position or not need cursor anymore */
+ list_del_init(&cursor->entry);
+ }
+
/* need rescue? */
if (!pwq->nr_active || !need_to_create_worker(pool))
return false;
- /*
- * Slurp in all works issued via this workqueue and
- * process'em.
- */
- list_for_each_entry_safe(work, n, &pool->worklist, entry) {
- if (get_work_pwq(work) == pwq && assign_work(work, rescuer, &n))
+ /* try to assign a work to rescue */
+ list_for_each_entry_safe_from(work, n, &pool->worklist, entry) {
+ if (get_work_pwq(work) == pwq && assign_work(work, rescuer, &n)) {
pwq->stats[PWQ_STAT_RESCUED]++;
+ /* put the cursor for next search */
+ list_add_tail(&cursor->entry, &n->entry);
+ return true;
+ }
}
- return !list_empty(&rescuer->scheduled);
+ return false;
}
/**
@@ -5141,6 +5169,20 @@ static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
INIT_LIST_HEAD(&pwq->pwqs_node);
INIT_LIST_HEAD(&pwq->mayday_node);
kthread_init_work(&pwq->release_work, pwq_release_workfn);
+
+ /*
+ * Set the dumpy cursor work with valid function and get_work_pwq().
+ *
+ * The cursor work should only be in the pwq->pool->worklist, and
+ * should never be queued, processed, flushed, cancelled or even examed
+ * as a work item.
+ *
+ * WORK_STRUCT_PENDING and WORK_STRUCT_INACTIVE just make it less
+ * surprise for kernel debuging tools and reviewers.
+ */
+ INIT_WORK(&pwq->mayday_cursor, mayday_cursor_func);
+ atomic_long_set(&pwq->mayday_cursor.data, (unsigned long)pwq |
+ WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | WORK_STRUCT_INACTIVE);
}
/* sync @pwq with the current state of its associated wq and link it */
--
2.19.1.6.gb485710b
Powered by blists - more mailing lists