lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20071220143242.GB6953@skywalker>
Date:	Thu, 20 Dec 2007 20:02:42 +0530
From:	"Aneesh Kumar K.V" <aneesh.kumar@...ux.vnet.ibm.com>
To:	ext4 development <linux-ext4@...r.kernel.org>
Cc:	Theodore Tso <tytso@....edu>, Andreas Dilger <adilger@....com>
Subject: BUG: soft lockup - CPU#0 stuck for 11s! [fsstress:5534]

I am seeing this with the patch queue. I can reproduce this on x86 and
powerpc. I see the file system full when this happens. The same happens even
without delalloc enabled.


Any idea what i should try now. It is not spinning on the spin lock. I had the
spin lock debug enabled and added similar check for ext4_lock_group also.

diff --git a/include/linux/bit_spinlock.h b/include/linux/bit_spinlock.h
index 7113a32..9833376 100644
--- a/include/linux/bit_spinlock.h
+++ b/include/linux/bit_spinlock.h
@@ -9,6 +9,7 @@
  */
 static inline void bit_spin_lock(int bitnum, unsigned long *addr)
 {
+       unsigned int stuck = 0xffffffff;
        /*
         * Assuming the lock is uncontended, this never enters
         * the body of the outer loop. If it is contended, then
@@ -21,6 +22,11 @@ static inline void bit_spin_lock(int bitnum, unsigned long *addr)
        while (unlikely(test_and_set_bit_lock(bitnum, addr))) {
                while (test_bit(bitnum, addr)) {
                        preempt_enable();
+                       if (!--stuck) {
+                               printk("bit_spin_lock(%p) ", __builtin_return_address(0));
+                               dump_stack();
+                               stuck = 0xffffffff;
+                       }
                        cpu_relax();
                        preempt_disable();
                }
diff --git a/kernel/sched.c b/kernel/sched.c

The kernels are non preempt and I guess we are getting stuck in the kernel
trying to allocate new blocks.

-aneesh


BUG: soft lockup - CPU#0 stuck for 11s! [fsstress:5534]
NIP: c0000000001a98d8 LR: c0000000001a98fc CTR: c0000000001900ec
REGS: c0000000ed84ab30 TRAP: 0901   Not tainted  (2.6.24-rc5-autokern1)
MSR: 8000000000009032 <EE,ME,IR,DR>  CR: 24008428  XER: 00000020
TASK = c0000000ed846000[5534] 'fsstress' THREAD: c0000000ed848000 CPU: 0
GPR00: 0000000000000002 c0000000ed84adb0 c000000000604760 c0000000e4105cc8 
GPR04: 000000000000002b c0000000ed84ae30 0000000000000015 c000000000733c98 
GPR08: 0000000000000000 c0000000fc9905a0 0000000000000000 0000000000000000 
GPR12: 0000000024002428 c000000000542980 
NIP [c0000000001a98d8] .ext4_mb_discard_preallocations+0x1e4/0x600
LR [c0000000001a98fc] .ext4_mb_discard_preallocations+0x208/0x600
Call Trace:
[c0000000ed84adb0] [c0000000001a98fc] .ext4_mb_discard_preallocations+0x208/0x600 (unreliable)
[c0000000ed84afa0] [c0000000001ab6d8] .ext4_mb_new_blocks+0x18a0/0x19a4
[c0000000ed84b220] [c0000000001a14d8] .ext4_ext_get_blocks+0x970/0xb5c
[c0000000ed84b3b0] [c00000000018ec5c] .ext4_get_block+0x138/0x1c0
[c0000000ed84b470] [c0000000001163a0] .__block_prepare_write+0x1f8/0x4c8
[c0000000ed84b580] [c0000000001168b8] .block_write_begin+0xc4/0x160
[c0000000ed84b650] [c00000000018c4e8] .ext4_write_begin+0x12c/0x24c
[c0000000ed84b750] [c0000000000aa9b0] .generic_file_buffered_write+0x174/0x760
[c0000000ed84b8a0] [c0000000000ab684] .__generic_file_aio_write_nolock+0x3ec/0x444
[c0000000ed84b9e0] [c0000000000ab75c] .generic_file_aio_write+0x80/0x114
[c0000000ed84baa0] [c000000000188634] .ext4_file_write+0xc8/0x190
[c0000000ed84bb50] [c0000000000e3484] .do_sync_write+0xd0/0x130
[c0000000ed84bcf0] [c0000000000e35ec] .vfs_write+0x108/0x1c8
[c0000000ed84bd90] [c0000000000e3784] .sys_write+0x4c/0x8c
[c0000000ed84be30] [c00000000000872c] syscall_exit+0x0/0x40
Instruction dump:
794bffe3 41a2002c e8090000 780affe3 41820018 7c210b78 7c421378 e8090000 
780bffe3 4bffffec 38000002 4bffffc0 <e93d0020> 3b9d0020 3be9fff0 7fa9e000 
BUG: soft lockup - CPU#3 stuck for 11s! [fsstress:6291]
NIP: c0000000001a98c0 LR: c0000000001a98fc CTR: c0000000001900ec
REGS: c0000000ebecf0e0 TRAP: 0901   Not tainted  (2.6.24-rc5-autokern1)
MSR: 8000000000009032 <EE,ME,IR,DR>  CR: 44008428  XER: 00000008
TASK = c0000000ebebc820[6291] 'fsstress' THREAD: c0000000ebecc000 CPU: 3
GPR00: 0000000000000002 c0000000ebecf360 c000000000604760 c0000000e4105cc8 
GPR04: 000000000000002b c0000000ebecf3e0 0000000000000015 c000000000748c98 
GPR08: 0000000000000000 c0000000fc9905a0 0000000000000001 0000000000000001 
GPR12: 0000000024002488 c000000000543100 
NIP [c0000000001a98c0] .ext4_mb_discard_preallocations+0x1cc/0x600
LR [c0000000001a98fc] .ext4_mb_discard_preallocations+0x208/0x600
Call Trace:
[c0000000ebecf360] [c0000000001a98fc] .ext4_mb_discard_preallocations+0x208/0x600 (unreliable)
[c0000000ebecf550] [c0000000001ab6d8] .ext4_mb_new_blocks+0x18a0/0x19a4
[c0000000ebecf7d0] [c0000000001a14d8] .ext4_ext_get_blocks+0x970/0xb5c
[c0000000ebecf960] [c00000000018fccc] .ext4_getblk+0xc4/0x2a4
[c0000000ebecfa80] [c00000000018fed0] .ext4_bread+0x24/0xec
[c0000000ebecfb20] [c0000000001941a4] .ext4_mkdir+0x1fc/0x490
[c0000000ebecfbf0] [c0000000000f181c] .vfs_mkdir+0x134/0x1d0
[c0000000ebecfc90] [c0000000000f1968] .sys_mkdirat+0xb0/0x10c
[c0000000ebecfdc0] [c00000000001721c] .compat_sys_mkdir+0x14/0x28
[c0000000ebecfe30] [c00000000000872c] syscall_exit+0x0/0x40
Instruction dump:
7d29582a 7d4048a8 7d4b0378 7d6049ad 40a2fff4 4c00012c 794bffe3 41a2002c 
e8090000 780affe3 41820018 7c210b78 <7c421378> e8090000 780bffe3 4bffffec 
BUG: soft lockup - CPU#2 stuck for 11s! [fsstress:5738]
NIP: c0000000001a98c0 LR: c0000000001a98fc CTR: c0000000001900ec
REGS: c0000000e59eab30 TRAP: 0901   Not tainted  (2.6.24-rc5-autokern1)
MSR: 8000000000009032 <EE,ME,IR,DR>  CR: 44008428  XER: 00000018
TASK = c0000000e59e6000[5738] 'fsstress' THREAD: c0000000e59e8000 CPU: 2
GPR00: 0000000000000002 c0000000e59eadb0 c000000000604760 c0000000e4105cc8 
GPR04: 000000000000002b c0000000e59eae30 0000000000000015 c000000000741c98 
GPR08: 0000000000000000 c0000000fc9905a0 0000000000000001 0000000000000001 
GPR12: 0000000024002428 c000000000542e80 
NIP [c0000000001a98c0] .ext4_mb_discard_preallocations+0x1cc/0x600
LR [c0000000001a98fc] .ext4_mb_discard_preallocations+0x208/0x600
Call Trace:
[c0000000e59eadb0] [c0000000001a98fc] .ext4_mb_discard_preallocations+0x208/0x600 (unreliable)
[c0000000e59eafa0] [c0000000001ab6d8] .ext4_mb_new_blocks+0x18a0/0x19a4
[c0000000e59eb220] [c0000000001a14d8] .ext4_ext_get_blocks+0x970/0xb5c
[c0000000e59eb3b0] [c00000000018ec5c] .ext4_get_block+0x138/0x1c0
[c0000000e59eb470] [c0000000001163a0] .__block_prepare_write+0x1f8/0x4c8
[c0000000e59eb580] [c0000000001168b8] .block_write_begin+0xc4/0x160
[c0000000e59eb650] [c00000000018c4e8] .ext4_write_begin+0x12c/0x24c
[c0000000e59eb750] [c0000000000aa9b0] .generic_file_buffered_write+0x174/0x760
[c0000000e59eb8a0] [c0000000000ab684] .__generic_file_aio_write_nolock+0x3ec/0x444
[c0000000e59eb9e0] [c0000000000ab75c] .generic_file_aio_write+0x80/0x114
[c0000000e59ebaa0] [c000000000188634] .ext4_file_write+0xc8/0x190
[c0000000e59ebb50] [c0000000000e3484] .do_sync_write+0xd0/0x130
[c0000000e59ebcf0] [c0000000000e35ec] .vfs_write+0x108/0x1c8
[c0000000e59ebd90] [c0000000000e3784] .sys_write+0x4c/0x8c
[c0000000e59ebe30] [c00000000000872c] syscall_exit+0x0/0x40
Instruction dump:
7d29582a 7d4048a8 7d4b0378 7d6049ad 40a2fff4 4c00012c 794bffe3 41a2002c 
e8090000 780affe3 41820018 7c210b78 <7c421378> e8090000 780bffe3 4bffffec 
BUG: soft lockup - CPU#1 stuck for 11s! [fsstress:5942]
NIP: c0000000001a98a0 LR: c0000000001a98fc CTR: c00000000007f95c
REGS: c0000000ebc26b30 TRAP: 0901   Not tainted  (2.6.24-rc5-autokern1)
MSR: 8000000000009032 <EE,ME,IR,DR>  CR: 24008428  XER: 00000010
TASK = c0000000ebc22000[5942] 'fsstress' THREAD: c0000000ebc24000 CPU: 1
GPR00: 0000000000000002 c0000000ebc26db0 c000000000604760 c0000000e4105cc8 
GPR04: 000000000000002b c0000000ebc26e30 0000000000000015 c00000000073ac98 
GPR08: 0000000000000000 c0000000fc9905a0 0000000000000002 0000000000000002 
GPR12: 0000000024002428 c000000000542c00 
NIP [c0000000001a98a0] .ext4_mb_discard_preallocations+0x1ac/0x600
LR [c0000000001a98fc] .ext4_mb_discard_preallocations+0x208/0x600
Call Trace:
[c0000000ebc26db0] [c0000000001a98fc] .ext4_mb_discard_preallocations+0x208/0x600 (unreliable)
[c0000000ebc26fa0] [c0000000001ab6d8] .ext4_mb_new_blocks+0x18a0/0x19a4
[c0000000ebc27220] [c0000000001a14d8] .ext4_ext_get_blocks+0x970/0xb5c
[c0000000ebc273b0] [c00000000018ec5c] .ext4_get_block+0x138/0x1c0
[c0000000ebc27470] [c0000000001163a0] .__block_prepare_write+0x1f8/0x4c8
[c0000000ebc27580] [c0000000001168b8] .block_write_begin+0xc4/0x160
[c0000000ebc27650] [c00000000018c4e8] .ext4_write_begin+0x12c/0x24c
[c0000000ebc27750] [c0000000000aa9b0] .generic_file_buffered_write+0x174/0x760
[c0000000ebc278a0] [c0000000000ab610] .__generic_file_aio_write_nolock+0x378/0x444
[c0000000ebc279e0] [c0000000000ab75c] .generic_file_aio_write+0x80/0x114
[c0000000ebc27aa0] [c000000000188634] .ext4_file_write+0xc8/0x190
[c0000000ebc27b50] [c0000000000e3484] .do_sync_write+0xd0/0x130
[c0000000ebc27cf0] [c0000000000e35ec] .vfs_write+0x108/0x1c8
[c0000000ebc27d90] [c0000000000e3784] .sys_write+0x4c/0x8c
[c0000000ebc27e30] [c00000000000872c] syscall_exit+0x0/0x40
Instruction dump:
e92a0030 e94a4208 7f6b5c36 3929ffff 796b1f24 7f694838 7d6b502a 79291f24 
7d29582a 7d4048a8 7d4b0378 7d6049ad <40a2fff4> 4c00012c 794bffe3 41a2002c 
-- 0:conmux-control -- time-stamp -- Dec/20/07  2:53:52 --
-- 0:conmux-control -- time-stamp -- Dec/20/07  3:01:55 --
(bot:conmon-payload) disconnected



x86 gives me this stack.
-----------------------

BUG: soft lockup - CPU#6 stuck for 11s! [fsstress:14048]

Pid: 14048, comm: fsstress Not tainted (2.6.24-rc5-autokern1 #1)
EIP: 0060:[<c01d2f1c>] EFLAGS: 00000282 CPU: 6
EIP is at ext4_mb_discard_preallocations+0x13a/0x439
EAX: 00000002 EBX: dbd50180 ECX: 00000007 EDX: ffffffba
ESI: e44599c8 EDI: dbd50194 EBP: e44599b0 ESP: e419fb5c
 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
CR0: 8005003b CR2: 080eae1c CR3: 373cd000 CR4: 000006b0
DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
DR6: ffff0ff0 DR7: 00000400
 [<c01be636>] ext4_mark_iloc_dirty+0x382/0x3dd
 [<c01be7df>] ext4_mark_inode_dirty+0x14e/0x15e
 [<c01cfe12>] ext4_mb_store_history+0x14b/0x153
 [<c01d51b9>] ext4_mb_new_blocks+0x17de/0x1891
 [<c01ce529>] __ext4_journal_dirty_metadata+0x15/0x3c
 [<c01be7df>] ext4_mark_inode_dirty+0x14e/0x15e
 [<c0170f1b>] __find_get_block+0x17b/0x185
 [<c01cbdc8>] ext4_ext_find_extent+0x5b/0x217
 [<c01cd44c>] ext4_ext_get_blocks+0x74d/0x924
 [<c01be636>] ext4_mark_iloc_dirty+0x382/0x3dd
 [<c01c086b>] ext4_get_blocks_wrap+0xc7/0x109
 [<c01c14e6>] ext4_getblk+0x4f/0x16f
 [<c01c161f>] ext4_bread+0x19/0x72
 [<c01c4f32>] ext4_mkdir+0x150/0x2d2
 [<c01d72d4>] ext4_permission+0x0/0xa
 [<c015cccd>] vfs_mkdir+0x8f/0xd3
 [<c015cd99>] sys_mkdirat+0x88/0xbf
 [<c0154f4b>] __fput+0x116/0x140
 [<c01655c1>] mntput_no_expire+0x11/0x63
 [<c0152cf4>] filp_close+0x4f/0x56
 [<c015cdef>] sys_mkdir+0x1f/0x23
 [<c01024ba>] sysenter_past_esp+0x5f/0x85
 =======================
BUG: soft lockup - CPU#7 stuck for 11s! [fsstress:14017]

Pid: 14017, comm: fsstress Not tainted (2.6.24-rc5-autokern1 #1)
EIP: 0060:[<c01d2f32>] EFLAGS: 00000246 CPU: 7
EIP is at ext4_mb_discard_preallocations+0x150/0x439
EAX: dbd50180 EBX: e44599b8 ECX: 00000007 EDX: ffffffff
ESI: e44599c8 EDI: dbd50194 EBP: e44599b0 ESP: f66737cc
 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
CR0: 8005003b CR2: b7f02840 CR3: 36443000 CR4: 000006b0
DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
DR6: ffff0ff0 DR7: 00000400
 [<c01be636>] ext4_mark_iloc_dirty+0x382/0x3dd
 [<c01be7df>] ext4_mark_inode_dirty+0x14e/0x15e
 [<c0170f1b>] __find_get_block+0x17b/0x185
 [<c01d000c>] ext4_mb_init+0x1f2/0x85a
 [<c01d51b9>] ext4_mb_new_blocks+0x17de/0x1891
 [<c01ce529>] __ext4_journal_dirty_metadata+0x15/0x3c
 [<c01be7df>] ext4_mark_inode_dirty+0x14e/0x15e
 [<c01cfe12>] ext4_mb_store_history+0x14b/0x153
 [<c01c63b1>] __ext4_journal_stop+0x19/0x34
 [<c0170f1b>] __find_get_block+0x17b/0x185
 [<c01cbed4>] ext4_ext_find_extent+0x167/0x217
 [<c01cd44c>] ext4_ext_get_blocks+0x74d/0x924
 [<c01c086b>] ext4_get_blocks_wrap+0xc7/0x109
 [<c01c1388>] ext4_da_get_block_write+0x6f/0x17e
 [<c01c1319>] ext4_da_get_block_write+0x0/0x17e
 [<c0174a9e>] mpage_da_map_blocks+0x6d/0x230
 [<c01cb78e>] __ext4_ext_check_header+0x77/0xfa
 [<c0170f1b>] __find_get_block+0x17b/0x185
 [<c01cb78e>] __ext4_ext_check_header+0x77/0xfa
 [<c0174cb5>] mpage_add_bh_to_extent+0x54/0x6b
 [<c017592b>] __mpage_da_writepage+0xb1/0xc6
 [<c0376e5c>] _read_unlock_irq+0x5/0x7
 [<c0136ce7>] find_get_pages_tag+0x76/0x80
 [<c013d10a>] write_cache_pages+0x169/0x280
 [<c017587a>] __mpage_da_writepage+0x0/0xc6
 [<c0175858>] mpage_da_writepages+0x5d/0x7f
 [<c01c0827>] ext4_get_blocks_wrap+0x83/0x109
 [<c01c12ce>] ext4_da_get_block_prep+0x4c/0x97
 [<c01c1319>] ext4_da_get_block_write+0x0/0x17e
 [<c01bd9db>] ext4_da_writepages+0x0/0xa
 [<c013d265>] do_writepages+0x20/0x33
 [<c0136ba6>] __filemap_fdatawrite_range+0x65/0x70
 [<c0137ae6>] filemap_fdatawrite+0x23/0x27
 [<c0137afb>] filemap_write_and_wait+0x11/0x29
 [<c01384a8>] generic_file_buffered_write+0x57a/0x5a0
 [<c011c05c>] current_fs_time+0x13/0x15
 [<c0139437>] __generic_file_aio_write_nolock+0x3c3/0x4b5
 [<c0139581>] generic_file_aio_write+0x58/0xb6
 [<c01bc5a4>] ext4_file_write+0xb4/0x124
 [<c0153eb8>] do_sync_write+0xc7/0x10a
 [<c0127898>] autoremove_wake_function+0x0/0x33
 [<c0156e44>] sys_fstat64+0x1e/0x23
 [<c0153df1>] do_sync_write+0x0/0x10a
 [<c0154495>] vfs_write+0x89/0x10b
 [<c0154b91>] sys_write+0x41/0x67
 [<c01024ba>] sysenter_past_esp+0x5f/0x85
 =======================
(bot:conmon-payload) disconnecte
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ