lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date: Mon, 20 May 2024 12:30:18 -1000
From: Tejun Heo <tj@...nel.org>
To: Linus Torvalds <torvalds@...ux-foundation.org>
Cc: Jan Engelhardt <jengelh@...i.de>, Craig Small <csmall@....com.au>,
	Yafang Shao <laoar.shao@...il.com>, linux-kernel@...r.kernel.org,
	Lai Jiangshan <jiangshanlai@...il.com>
Subject: [PATCH workqueue/for-6.10-fixes] workqueue: Refactor worker ID
 formatting and make wq_worker_comm() use full ID string

Currently, worker ID formatting is open coded in create_worker(),
init_rescuer() and worker_thread() (for %WORKER_DIE case). The formatted ID
is saved into task->comm and wq_worker_comm() uses it as the base name to
append extra information to when generating the name to be shown as.

However, TASK_COMM_LEN is only 16 leading to badly truncated names for
rescuers. For example, the rescuer for the inet_frag_wq workqueue becomes:

  $ ps -ef | grep '[k]worker/R-inet'
  root         483       2  0 Apr26 ?        00:00:00 [kworker/R-inet_]

Even for non-rescue workers, it's easy to run over 15 characters on
moderately large machines.

Fit it by consolidating worker ID formatting into a new helper
format_worker_id() and calling it from wq_worker_comm() to obtain the
untruncated worker ID.

  $ ps -ef | grep '[k]worker/R-inet'
  root          60       2  0 12:10 ?        00:00:00 [kworker/R-inet_frag_wq]

Signed-off-by: Tejun Heo <tj@...nel.org>
Reported-by: Jan Engelhardt <jengelh@...i.de>
Suggested-by: Linus Torvalds <torvalds@...ux-foundation.org>
---
(cc'ing LKML and Lai)

On Mon, May 20, 2024 at 10:17:00AM -0700, Linus Torvalds wrote:
> Oh well. I suspect this would be trivial to fix. I think the
> get_kthread_comm() should use the full name, and only *then* attach
> the extra worker pool information if it exists..
> 
> Tejun?

Yeah, looks like even the unbound worker IDs are getting truncated on larger
machines. This patch should fix it. I'll apply it to wq/for-6.10-fixes soon.

> Also, independently of the WQ worker issue, I do think we could
> possibly expand TASK_COMM_LEN a bit more. 16 bytes is too short for
> user-level names too, and while it's seldom used for 'ps', it's
> visible in things like oops messages etc where it gets truncated.

That'd be great. ISTR this being discussed a while ago. Am I imagining that
we were going to raise this to 32?

Thanks.

 kernel/workqueue.c |   51 ++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 34 insertions(+), 17 deletions(-)

--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -125,6 +125,7 @@ enum wq_internal_consts {
 	HIGHPRI_NICE_LEVEL	= MIN_NICE,
 
 	WQ_NAME_LEN		= 32,
+	WORKER_ID_LEN		= 10 + WQ_NAME_LEN, /* "kworker/R-" + WQ_NAME_LEN */
 };
 
 /*
@@ -2742,6 +2743,26 @@ static void worker_detach_from_pool(stru
 		complete(detach_completion);
 }
 
+static int format_worker_id(char *buf, size_t size, struct worker *worker,
+			    struct worker_pool *pool)
+{
+	if (worker->rescue_wq)
+		return scnprintf(buf, size, "kworker/R-%s",
+				 worker->rescue_wq->name);
+
+	if (pool) {
+		if (pool->cpu >= 0)
+			return scnprintf(buf, size, "kworker/%d:%d%s",
+					 pool->cpu, worker->id,
+					 pool->attrs->nice < 0  ? "H" : "");
+		else
+			return scnprintf(buf, size, "kworker/u%d:%d",
+					 pool->id, worker->id);
+	} else {
+		return scnprintf(buf, size, "kworker/dying");
+	}
+}
+
 /**
  * create_worker - create a new workqueue worker
  * @pool: pool the new worker will belong to
@@ -2758,7 +2779,6 @@ static struct worker *create_worker(stru
 {
 	struct worker *worker;
 	int id;
-	char id_buf[23];
 
 	/* ID is needed to determine kthread name */
 	id = ida_alloc(&pool->worker_ida, GFP_KERNEL);
@@ -2777,17 +2797,14 @@ static struct worker *create_worker(stru
 	worker->id = id;
 
 	if (!(pool->flags & POOL_BH)) {
-		if (pool->cpu >= 0)
-			snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
-				 pool->attrs->nice < 0  ? "H" : "");
-		else
-			snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
+		char id_buf[WORKER_ID_LEN];
 
+		format_worker_id(id_buf, sizeof(id_buf), worker, pool);
 		worker->task = kthread_create_on_node(worker_thread, worker,
-					pool->node, "kworker/%s", id_buf);
+						      pool->node, "%s", id_buf);
 		if (IS_ERR(worker->task)) {
 			if (PTR_ERR(worker->task) == -EINTR) {
-				pr_err("workqueue: Interrupted when creating a worker thread \"kworker/%s\"\n",
+				pr_err("workqueue: Interrupted when creating a worker thread \"%s\"\n",
 				       id_buf);
 			} else {
 				pr_err_once("workqueue: Failed to create a worker thread: %pe",
@@ -3350,7 +3367,6 @@ woke_up:
 		raw_spin_unlock_irq(&pool->lock);
 		set_pf_worker(false);
 
-		set_task_comm(worker->task, "kworker/dying");
 		ida_free(&pool->worker_ida, worker->id);
 		worker_detach_from_pool(worker);
 		WARN_ON_ONCE(!list_empty(&worker->entry));
@@ -5542,6 +5558,7 @@ static int wq_clamp_max_active(int max_a
 static int init_rescuer(struct workqueue_struct *wq)
 {
 	struct worker *rescuer;
+	char id_buf[WORKER_ID_LEN];
 	int ret;
 
 	if (!(wq->flags & WQ_MEM_RECLAIM))
@@ -5555,7 +5572,9 @@ static int init_rescuer(struct workqueue
 	}
 
 	rescuer->rescue_wq = wq;
-	rescuer->task = kthread_create(rescuer_thread, rescuer, "kworker/R-%s", wq->name);
+	format_worker_id(id_buf, sizeof(id_buf), rescuer, NULL);
+
+	rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", id_buf);
 	if (IS_ERR(rescuer->task)) {
 		ret = PTR_ERR(rescuer->task);
 		pr_err("workqueue: Failed to create a rescuer kthread for wq \"%s\": %pe",
@@ -6384,19 +6403,15 @@ void show_freezable_workqueues(void)
 /* used to show worker information through /proc/PID/{comm,stat,status} */
 void wq_worker_comm(char *buf, size_t size, struct task_struct *task)
 {
-	int off;
-
-	/* always show the actual comm */
-	off = strscpy(buf, task->comm, size);
-	if (off < 0)
-		return;
-
 	/* stabilize PF_WQ_WORKER and worker pool association */
 	mutex_lock(&wq_pool_attach_mutex);
 
 	if (task->flags & PF_WQ_WORKER) {
 		struct worker *worker = kthread_data(task);
 		struct worker_pool *pool = worker->pool;
+		int off;
+
+		off = format_worker_id(buf, size, worker, pool);
 
 		if (pool) {
 			raw_spin_lock_irq(&pool->lock);
@@ -6415,6 +6430,8 @@ void wq_worker_comm(char *buf, size_t si
 			}
 			raw_spin_unlock_irq(&pool->lock);
 		}
+	} else {
+		strscpy(buf, task->comm, size);
 	}
 
 	mutex_unlock(&wq_pool_attach_mutex);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ