lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e6916a78-7872-442d-922d-31ea3920da4f@gmx.de>
Date: Thu, 1 Feb 2024 18:56:20 +0100
From: Helge Deller <deller@....de>
To: Tejun Heo <tj@...nel.org>
Cc: Helge Deller <deller@...nel.org>, Lai Jiangshan <jiangshanlai@...il.com>,
 linux-kernel@...r.kernel.org, linux-parisc@...r.kernel.org
Subject: Re: [PATCH][RFC] workqueue: Fix kernel panic on CPU hot-unplug

Hi Tejun,

On 2/1/24 17:54, Tejun Heo wrote:
> On Thu, Feb 01, 2024 at 05:41:10PM +0100, Helge Deller wrote:
>>> Hmm... I have a hard time imagining a scenario where some CPUs don't have
>>> pwq installed on wq->cpu_pwq. Can you please run `drgn
>>> tools/workqueue/wq_dump.py` before triggering the hotplug event and paste
>>> the output along with full dmesg?

Enabling CONFIG_DEBUG_INFO=y did the trick :-)


root@...ian:~# drgn --main-symbols -s ./vmlinux ./wq_dump.py 2>&1 | tee L
Affinity Scopes
===============
wq_unbound_cpumask=0000ffff

CPU
   nr_pods  16
   pod_cpus [0]=00000001 [1]=00000002 [2]=00000004 [3]=00000008 [4]=00000010 [5]=00000020 [6]=00000040 [7]=00000080 [8]=00000100 [9]=00000200 [10]=00000400 [11]=00000800 [12]=00001000 [13]=00002000 [14]=00004000 [15]=00008000
   pod_node [0]=0 [1]=0 [2]=0 [3]=0 [4]=0 [5]=0 [6]=0 [7]=0 [8]=0 [9]=0 [10]=0 [11]=0 [12]=0 [13]=0 [14]=0 [15]=0
   cpu_pod  [0]=0 [1]=1

SMT
   nr_pods  16
   pod_cpus [0]=00000001 [1]=00000002 [2]=00000004 [3]=00000008 [4]=00000010 [5]=00000020 [6]=00000040 [7]=00000080 [8]=00000100 [9]=00000200 [10]=00000400 [11]=00000800 [12]=00001000 [13]=00002000 [14]=00004000 [15]=00008000
   pod_node [0]=0 [1]=0 [2]=0 [3]=0 [4]=0 [5]=0 [6]=0 [7]=0 [8]=0 [9]=0 [10]=0 [11]=0 [12]=0 [13]=0 [14]=0 [15]=0
   cpu_pod  [0]=0 [1]=1

CACHE (default)
   nr_pods  1
   pod_cpus [0]=0000ffff
   pod_node [0]=0
   cpu_pod  [0]=0 [1]=0

NUMA
   nr_pods  1
   pod_cpus [0]=0000ffff
   pod_node [0]=0
   cpu_pod  [0]=0 [1]=0

SYSTEM
   nr_pods  1
   pod_cpus [0]=0000ffff
   pod_node [0]=-1
   cpu_pod  [0]=0 [1]=0

Worker Pools
============
pool[00] ref= 1 nice=  0 idle/workers=  4/  4 cpu=  0
pool[01] ref= 1 nice=-20 idle/workers=  2/  2 cpu=  0
pool[02] ref= 1 nice=  0 idle/workers=  4/  4 cpu=  1
pool[03] ref= 1 nice=-20 idle/workers=  2/  2 cpu=  1
pool[04] ref= 1 nice=  0 idle/workers=  0/  0 cpu=  2
pool[05] ref= 1 nice=-20 idle/workers=  0/  0 cpu=  2
pool[06] ref= 1 nice=  0 idle/workers=  0/  0 cpu=  3
pool[07] ref= 1 nice=-20 idle/workers=  0/  0 cpu=  3
pool[08] ref= 1 nice=  0 idle/workers=  0/  0 cpu=  4
pool[09] ref= 1 nice=-20 idle/workers=  0/  0 cpu=  4
pool[10] ref= 1 nice=  0 idle/workers=  0/  0 cpu=  5
pool[11] ref= 1 nice=-20 idle/workers=  0/  0 cpu=  5
pool[12] ref= 1 nice=  0 idle/workers=  0/  0 cpu=  6
pool[13] ref= 1 nice=-20 idle/workers=  0/  0 cpu=  6
pool[14] ref= 1 nice=  0 idle/workers=  0/  0 cpu=  7
pool[15] ref= 1 nice=-20 idle/workers=  0/  0 cpu=  7
pool[16] ref= 1 nice=  0 idle/workers=  0/  0 cpu=  8
pool[17] ref= 1 nice=-20 idle/workers=  0/  0 cpu=  8
pool[18] ref= 1 nice=  0 idle/workers=  0/  0 cpu=  9
pool[19] ref= 1 nice=-20 idle/workers=  0/  0 cpu=  9
pool[20] ref= 1 nice=  0 idle/workers=  0/  0 cpu= 10
pool[21] ref= 1 nice=-20 idle/workers=  0/  0 cpu= 10
pool[22] ref= 1 nice=  0 idle/workers=  0/  0 cpu= 11
pool[23] ref= 1 nice=-20 idle/workers=  0/  0 cpu= 11
pool[24] ref= 1 nice=  0 idle/workers=  0/  0 cpu= 12
pool[25] ref= 1 nice=-20 idle/workers=  0/  0 cpu= 12
pool[26] ref= 1 nice=  0 idle/workers=  0/  0 cpu= 13
pool[27] ref= 1 nice=-20 idle/workers=  0/  0 cpu= 13
pool[28] ref= 1 nice=  0 idle/workers=  0/  0 cpu= 14
pool[29] ref= 1 nice=-20 idle/workers=  0/  0 cpu= 14
pool[30] ref= 1 nice=  0 idle/workers=  0/  0 cpu= 15
pool[31] ref= 1 nice=-20 idle/workers=  0/  0 cpu= 15
pool[32] ref=28 nice=  0 idle/workers=  8/  8 cpus=0000ffff pod_cpus=0000ffff

Workqueue CPU -> pool
=====================
[    workqueue     \     type   CPU  0  1 dfl]
events                   percpu      0  2
events_highpri           percpu      1  3
events_long              percpu      0  2
events_unbound           unbound    32 32 32
events_freezable         percpu      0  2
events_power_efficient   percpu      0  2
events_freezable_power_  percpu      0  2
rcu_gp                   percpu      0  2
rcu_par_gp               percpu      0  2
slub_flushwq             percpu      0  2
netns                    ordered    32 32 32
mm_percpu_wq             percpu      0  2
inet_frag_wq             percpu      0  2
cgroup_destroy           percpu      0  2
cgroup_pidlist_destroy   percpu      0  2
cgwb_release             percpu      0  2
writeback                unbound    32 32 32
kintegrityd              percpu      1  3
kblockd                  percpu      1  3
blkcg_punt_bio           unbound    32 32 32
ata_sff                  percpu      0  2
usb_hub_wq               percpu      0  2
inode_switch_wbs         percpu      0  2
virtio-blk               percpu      0  2
scsi_tmf_0               ordered    32 32 32
psmouse-smbus            percpu      0  2
kpsmoused                ordered    32 32 32
sock_diag_events         percpu      0  2
kstrp                    ordered    32 32 32
ext4-rsv-conversion      ordered    32 32 32
root@...ian:~#
root@...ian:~# lscpu
Architecture:          parisc
   Byte Order:          Big Endian
CPU(s):                2
   On-line CPU(s) list: 0,1
Model name:            PA7300LC (PCX-L2)
   CPU family:          PA-RISC 1.1e
   Model:               9000/778/B160L - Merlin L2 160 (9000/778/B160L)
   Thread(s) per core:  1
   Core(s) per socket:  1
   Socket(s):           2
   BogoMIPS:            2446.13
root@...ian:~#
root@...ian:~# chcpu -d 1
[  261.926353] Backtrace:
[  261.928292]  [<10448744>] workqueue_offline_cpu+0x1d4/0x1dc
[  261.928292]  [<10429db4>] cpuhp_invoke_callback+0xf8/0x200
[  261.928292]  [<1042a1d0>] cpuhp_thread_fun+0xb8/0x164
[  261.928292]  [<10452970>] smpboot_thread_fn+0x284/0x288
[  261.928292]  [<1044d8f4>] kthread+0x12c/0x13c
[  261.928292]  [<1040201c>] ret_from_kernel_thread+0x1c/0x24
[  261.928292]
[  261.928292]
[  261.928292] Kernel Fault: Code=26 (Data memory access rights trap) at addr 00000000
[  261.928292] CPU: 1 PID: 21 Comm: cpuhp/1 Not tainted 6.8.0-rc1-32bit+ #1293
[  261.928292] Hardware name: 9000/778/B160L
[  261.928292]
[  261.928292]      YZrvWESTHLNXBCVMcbcbcbcbOGFRQPDI
[  261.928292] PSW: 00000000000001101111111100001111 Not tainted
[  261.928292] r00-03  0006ff0f 11011540 10446d9c 11e00500
[  261.928292] r04-07  11c0b800 00000002 11c0d000 00000001
[  261.928292] r08-11  110194e4 11018f08 00000000 00000004
[  261.928292] r12-15  10c78800 00000612 f0028050 f0027fd8
[  261.928292] r16-19  fffffffc fee01180 f0027ed8 01735000
[  261.928292] r20-23  0000ffff 1249cc00 1249cc00 00000000
[  261.928292] r24-27  11c0c580 11c0d004 11c0d000 10ceb708
[  261.928292] r28-31  00000000 0000000e 11e00580 00000018
[  261.928292] sr00-03  00000000 00000000 00000000 000004be
[  261.928292] sr04-07  00000000 00000000 00000000 00000000
[  261.928292]
[  261.928292] IASQ: 00000000 00000000 IAOQ: 10446db4 10446db8
[  261.928292]  IIR: 0f80109c    ISR: 00000000  IOR: 00000000
[  261.928292]  CPU:        1   CR30: 11dd1710 CR31: 00000000
[  261.928292]  ORIG_R28: 00000612
[  261.928292]  IAOQ[0]: wq_update_pod+0x98/0x14c
[  261.928292]  IAOQ[1]: wq_update_pod+0x9c/0x14c
[  261.928292]  RP(r2): wq_update_pod+0x80/0x14c
[  261.928292] Backtrace:
[  261.928292]  [<10448744>] workqueue_offline_cpu+0x1d4/0x1dc
[  261.928292]  [<10429db4>] cpuhp_invoke_callback+0xf8/0x200
[  261.928292]  [<1042a1d0>] cpuhp_thread_fun+0xb8/0x164
[  261.928292]  [<10452970>] smpboot_thread_fn+0x284/0x288
[  261.928292]  [<1044d8f4>] kthread+0x12c/0x13c
[  261.928292]  [<1040201c>] ret_from_kernel_thread+0x1c/0x24
[  261.928292]
[  261.928292] Kernel panic - not syncing: Kernel Fault


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ