linux-kernel - [PATCH] workqueue: fix invalid cpu in kick

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [thread-next>] [day] [month] [year] [list]

Message-Id: <20231120121623.119780-1-alexyonghe@tencent.com>
Date:   Mon, 20 Nov 2023 20:16:23 +0800
From:   Yong He <zhuangel570@...il.com>
To:     tj@...nel.org, jiangshanlai@...il.com, linux-kernel@...r.kernel.org
Subject: [PATCH] workqueue: fix invalid cpu in kick_pool

From: Yong He <alexyonghe@...cent.com>

Now unbound workqueue supports non-strict affinity scope after
commit 8639ecebc9b1 ("workqueue: Implement non-strict affinity scope for
unbound workqueues"), which allow the worker task to run out of the pod
to gain better performance, then use kick_pool() to migarate the worker
task back to the pod.

With incorrect unbound workqueue configurations, this may introduce kernel
panic, because cpumask_any_distribute() will not always return a valid cpu,
such as one set the 'isolcpus' and 'workqueue.unbound_cpus' into the same
cpuset, and this will make the @pool->attrs->__pod_cpumask an empty set,
then trigger panic like this:

 BUG: unable to handle page fault for address: ffffffff8305e9c0
 #PF: supervisor read access in kernel mode
 #PF: error_code(0x0000) - not-present page
 PGD 2c31067 P4D 2c31067 PUD 2c32063 PMD 10a18d063 PTE 800ffffffcfa1062
 Oops: 0000 [#1] PREEMPT SMP PTI
 CPU: 39 PID: 1 Comm: systemd Not tainted 6.6.1-tlinux4-0011.1 #2
 Hardware name: Cloud Hypervisor cloud-hypervisor, BIOS 0
 RIP: 0010:available_idle_cpu+0x21/0x60
 RSP: 0018:ffffc90000013828 EFLAGS: 00010082
 RAX: 0000000000000000 RBX: 0000000000000028 RCX: 0000000000000008
 RDX: ffffffff8305e040 RSI: 0000000000000028 RDI: 0000000000000028
 RBP: ffffc90000013828 R08: 0000000000000027 R09: 00000000000000b0
 R10: 0000000000000000 R11: ffffffff82c64348 R12: 0000000000000028
 R13: ffff888100928000 R14: 0000000000000028 R15: 0000000000000000
 FS:  00007f0d6a5d39c0(0000) GS:ffff888c8ddc0000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: ffffffff8305e9c0 CR3: 0000000100074002 CR4: 0000000000770ee0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 PKRU: 55555554
 Call Trace:
  <TASK>
  select_idle_sibling+0x79/0xaf0
  select_task_rq_fair+0x1cb/0x7b0
  try_to_wake_up+0x29c/0x5c0
  wake_up_process+0x19/0x20
  kick_pool+0x5e/0xb0
  __queue_work+0x119/0x430
  queue_work_on+0x29/0x30
  driver_deferred_probe_trigger.part.15+0x8b/0x90
  driver_bound+0x8b/0xe0
  really_probe+0x2e6/0x3b0
  __driver_probe_device+0x85/0x170
  driver_probe_device+0x24/0x90
  __driver_attach+0xd5/0x170
  bus_for_each_dev+0x7a/0xd0
  driver_attach+0x22/0x30
  bus_add_driver+0x17c/0x230
  driver_register+0x5e/0x110
  ? 0xffffffffa021b000
  register_virtio_driver+0x24/0x40
  register_virtio_driver+0x24/0x40
  virtio_rng_driver_init+0x19/0x1000 [virtio_rng]
  do_one_initcall+0x54/0x220
  do_init_module+0x68/0x250
  load_module+0x1f21/0x2080
  init_module_from_file+0x99/0xd0
  idempotent_init_module+0x195/0x250
  __x64_sys_finit_module+0x68/0xc0
  do_syscall_64+0x40/0x90
  entry_SYSCALL_64_after_hwframe+0x63/0xcd

Fixes: 8639ecebc9b1 ("workqueue: Implement non-strict affinity scope for unbound workqueues")
Signed-off-by: Yong He <alexyonghe@...cent.com>
---
 kernel/workqueue.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 6e578f576a6f..0d20feded4e2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1106,6 +1106,7 @@ static bool kick_pool(struct worker_pool *pool)
 {
 	struct worker *worker = first_idle_worker(pool);
 	struct task_struct *p;
+	int cpu;
 
 	lockdep_assert_held(&pool->lock);
 
@@ -1133,10 +1134,13 @@ static bool kick_pool(struct worker_pool *pool)
 	 */
 	if (!pool->attrs->affn_strict &&
 	    !cpumask_test_cpu(p->wake_cpu, pool->attrs->__pod_cpumask)) {
-		struct work_struct *work = list_first_entry(&pool->worklist,
-						struct work_struct, entry);
-		p->wake_cpu = cpumask_any_distribute(pool->attrs->__pod_cpumask);
-		get_work_pwq(work)->stats[PWQ_STAT_REPATRIATED]++;
+		cpu = cpumask_any_distribute(pool->attrs->__pod_cpumask);
+		if (cpu < nr_cpu_ids) {
+			struct work_struct *work = list_first_entry(&pool->worklist,
+							struct work_struct, entry);
+			p->wake_cpu = cpu;
+			get_work_pwq(work)->stats[PWQ_STAT_REPATRIATED]++;
+		}
 	}
 #endif
 	wake_up_process(p);
-- 
2.31.1