lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20251125063617.671199-3-jiangshanlai@gmail.com>
Date: Tue, 25 Nov 2025 14:36:15 +0800
From: Lai Jiangshan <jiangshanlai@...il.com>
To: linux-kernel@...r.kernel.org
Cc: Tejun Heo <tj@...nel.org>,
	ying chen <yc1082463@...il.com>,
	Lai Jiangshan <jiangshan.ljs@...group.com>,
	Lai Jiangshan <jiangshanlai@...il.com>
Subject: [PATCH V4 2/4] workqueue: Process rescuer work items one-by-one using a cursor

From: Lai Jiangshan <jiangshan.ljs@...group.com>

Previously, the rescuer scanned for all matching work items at once and
processed them within a single rescuer thread, which could cause one
blocking work item to stall all others.

Make the rescuer process work items one-by-one instead of slurping all
matches in a single pass.

Break the rescuer loop after finding and processing the first matching
work item, then restart the search to pick up the next. This gives
normal worker threads a chance to process other items which gives them
the opportinity to be processed instead of waiting on the rescuer's
queue and prevents a blocking work item from stalling the rest once
memory pressure is relieved.

Introduce a dummy cursor  work item to avoid potentially O(N^2)
rescans of the work list.  The marker records the resume position for
the next scan, eliminating redundant traversals.

Cc: ying chen <yc1082463@...il.com>
Reported-by: ying chen <yc1082463@...il.com>
Fixes: e22bee782b3b ("workqueue: implement concurrency managed dynamic worker pool")
Signed-off-by: Lai Jiangshan <jiangshan.ljs@...group.com>
---
 kernel/workqueue.c | 55 ++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 48 insertions(+), 7 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 02386e6eb409..06cd3d6ff7e1 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -286,6 +286,7 @@ struct pool_workqueue {
 	struct list_head	pending_node;	/* LN: node on wq_node_nr_active->pending_pwqs */
 	struct list_head	pwqs_node;	/* WR: node on wq->pwqs */
 	struct list_head	mayday_node;	/* MD: node on wq->maydays */
+	struct work_struct	mayday_cursor;	/* L: cursor on pool->worklist */
 
 	u64			stats[PWQ_NR_STATS];
 
@@ -1120,6 +1121,12 @@ static struct worker *find_worker_executing_work(struct worker_pool *pool,
 	return NULL;
 }
 
+static void mayday_cursor_func(struct work_struct *work)
+{
+	/* should not be processed, only for marking position */
+	BUG();
+}
+
 /**
  * move_linked_works - move linked works to a list
  * @work: start of series of works to be scheduled
@@ -1182,6 +1189,16 @@ static bool assign_work(struct work_struct *work, struct worker *worker,
 
 	lockdep_assert_held(&pool->lock);
 
+	/* The cursor work should not be processed */
+	if (unlikely(work->func == mayday_cursor_func)) {
+		/* only worker_thread() can possibly take this branch */
+		WARN_ON_ONCE(worker->rescue_wq);
+		if (nextp)
+			*nextp = list_next_entry(work, entry);
+		list_del_init(&work->entry);
+		return false;
+	}
+
 	/*
 	 * A single work shouldn't be executed concurrently by multiple workers.
 	 * __queue_work() ensures that @work doesn't jump to a different pool
@@ -3436,22 +3453,33 @@ static int worker_thread(void *__worker)
 static bool assign_rescuer_work(struct pool_workqueue *pwq, struct worker *rescuer)
 {
 	struct worker_pool *pool = pwq->pool;
+	struct work_struct *cursor = &pwq->mayday_cursor;
 	struct work_struct *work, *n;
 
+	/* search from the start or cursor if available */
+	if (list_empty(&cursor->entry)) {
+		work = list_first_entry(&pool->worklist, struct work_struct, entry);
+	} else {
+		work = list_next_entry(cursor, entry);
+		/* It will be at a new position or not need cursor anymore */
+		list_del_init(&cursor->entry);
+	}
+
 	/* need rescue? */
 	if (!pwq->nr_active || !need_to_create_worker(pool))
 		return false;
 
-	/*
-	 * Slurp in all works issued via this workqueue and
-	 * process'em.
-	 */
-	list_for_each_entry_safe(work, n, &pool->worklist, entry) {
-		if (get_work_pwq(work) == pwq && assign_work(work, rescuer, &n))
+	/* find the next work item to rescue */
+	list_for_each_entry_safe_from(work, n, &pool->worklist, entry) {
+		if (get_work_pwq(work) == pwq && assign_work(work, rescuer, &n)) {
 			pwq->stats[PWQ_STAT_RESCUED]++;
+			/* put the cursor for next search */
+			list_add_tail(&cursor->entry, &n->entry);
+			return true;
+		}
 	}
 
-	return !list_empty(&rescuer->scheduled);
+	return false;
 }
 
 /**
@@ -5135,6 +5163,19 @@ static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
 	INIT_LIST_HEAD(&pwq->pwqs_node);
 	INIT_LIST_HEAD(&pwq->mayday_node);
 	kthread_init_work(&pwq->release_work, pwq_release_workfn);
+
+	/*
+	 * Set the dummy cursor work with valid function and get_work_pwq().
+	 *
+	 * The cursor work should only be in the pwq->pool->worklist, and
+	 * should not be treated as a processable work item.
+	 *
+	 * WORK_STRUCT_PENDING and WORK_STRUCT_INACTIVE just make it less
+	 * surprise for kernel debuging tools and reviewers.
+	 */
+	INIT_WORK(&pwq->mayday_cursor, mayday_cursor_func);
+	atomic_long_set(&pwq->mayday_cursor.data, (unsigned long)pwq |
+			WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | WORK_STRUCT_INACTIVE);
 }
 
 /* sync @pwq with the current state of its associated wq and link it */
-- 
2.19.1.6.gb485710b


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ