lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 17 May 2016 10:39:15 -0700
From:	David Daney <ddaney@...iumnetworks.com>
To:	Peter Hurley <peter@...leysoftware.com>,
	Dann Frazier <dann.frazier@...onical.com>
CC:	Ming Lei <ming.lei@...onical.com>,
	Scot Doyle <lkml14@...tdoyle.com>,
	David Airlie <airlied@...ux.ie>,
	<dri-devel@...ts.freedesktop.org>,
	"Chintakuntla, Radha" <Radha.Chintakuntla@...iumnetworks.com>,
	Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
	Jiri Slaby <jslaby@...e.com>, Pavel Machek <pavel@....cz>,
	Linux Kernel Mailing List <linux-kernel@...r.kernel.org>
Subject: Re: ast: cursor flashing softlockups

I can confirm this.  The cursor is blinking along nicely with a 200mS 
on/off time then with this patch installed:

diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index 6e92917..7855446 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -402,6 +402,8 @@ static void cursor_timer_handler(unsigned long dev_addr)
  	struct fbcon_ops *ops = info->fbcon_par;

  	queue_work(system_power_efficient_wq, &info->queue);
+	if (WARN_ON(ops->cur_blink_jiffies < 10))
+		ops->cur_blink_jiffies = 200;
  	mod_timer(&ops->cursor_timer, jiffies + ops->cur_blink_jiffies);
  }

@@ -417,6 +419,8 @@ static void fbcon_add_cursor_timer(struct fb_info *info)

  		init_timer(&ops->cursor_timer);
  		ops->cursor_timer.function = cursor_timer_handler;
+		if (WARN_ON(ops->cur_blink_jiffies < 10))
+			ops->cur_blink_jiffies = 200;
  		ops->cursor_timer.expires = jiffies + ops->cur_blink_jiffies;
  		ops->cursor_timer.data = (unsigned long ) info;
  		add_timer(&ops->cursor_timer);
@@ -1096,6 +1100,7 @@ static void fbcon_init(struct vc_data *vc, int init)

  	ops = info->fbcon_par;
  	ops->cur_blink_jiffies = msecs_to_jiffies(vc->vc_cur_blink_ms);
+	WARN_ON(ops->cur_blink_jiffies < 10);
  	p->con_rotate = initial_rotation;
  	set_blitting_type(vc, info);

@@ -1310,6 +1315,7 @@ static void fbcon_cursor(struct vc_data *vc, int mode)
   	int c = scr_readw((u16 *) vc->vc_pos);

  	ops->cur_blink_jiffies = msecs_to_jiffies(vc->vc_cur_blink_ms);
+	WARN_ON(ops->cur_blink_jiffies < 10);

  	if (fbcon_is_inactive(vc, info) || vc->vc_deccm != 1)
  		return;

-------------------------------------------------------------

I get:


[   29.386066] ------------[ cut here ]------------
[   29.386080] WARNING: CPU: 0 PID: 1688 at 
drivers/video/console/fbcon.c:1103 fbcon_init+0x47c/0x4b8
[   29.386145] Modules linked in: vfat(E) fat(E) aes_ce_blk(E) 
ablk_helper(E) cryptd(E) aes_ce_cipher(E) ghash_ce(E) sha2_ce(E) 
sha1_ce(E) sg(E) ip_tables(E) xfs(E) libcrc32c(E) nicvf(E) ast(E) 
i2c_algo_bit(E) drm_kms_helper(E) syscopyarea(E) sysfillrect(E) 
sysimgblt(E) fb_sys_fops(E) ttm(E) drm(E) i2c_core(E) nicpf(E) 
thunder_bgx(E) mdio_thunder(E) mdio_cavium(E) dm_mirror(E) 
dm_region_hash(E) dm_log(E) dm_mod(E)
[   29.386147]
[   29.386153] CPU: 0 PID: 1688 Comm: systemd-logind Tainted: G 
    E   4.6.0-rc3-arm64next+ #278
[   29.386155] Hardware name: www.cavium.com ThunderX CRB-2S/ThunderX 
CRB-2S, BIOS 0.3 Apr 25 2016
[   29.386159] task: fffffe0fd1109d80 ti: fffffe0fd119c000 task.ti: 
fffffe0fd119c000
[   29.386163] PC is at fbcon_init+0x47c/0x4b8
[   29.386167] LR is at fbcon_init+0x144/0x4b8
[   29.386170] pc : [<fffffc0008468930>] lr : [<fffffc00084685f8>] 
pstate: 80000145
[   29.386172] sp : fffffe0fd119f900
[   29.386179] x29: fffffe0fd119f900 x28: fffffe0fdc059c00
[   29.386186] x27: fffffe001c7c8800 x26: fffffe0fd6027910
[   29.386192] x25: fffffe0fd6027800 x24: fffffc0009b56f98
[   29.386198] x23: 0000000000000001 x22: fffffc0008d1a000
[   29.386205] x21: fffffe0ff4062000 x20: fffffc0009b57018
[   29.386211] x19: fffffc0009b56000 x18: 000002aae7bd51f0
[   29.386222] x17: 000003ffb542f9f0 x16: fffffc0008242b80
[   29.386228] x15: 000002aae7bdb328 x14: 0000000000000006
[   29.386234] x13: 000002aae7bc72c8 x12: 0000000000000000
[   29.386240] x11: 0000000000000000 x10: 0000000000000000
[   29.386247] x9 : 0000000000000000 x8 : fffffe001c7c8b18
[   29.386253] x7 : 0000000000000000 x6 : 000000000000007f
[   29.386259] x5 : fffffc0008c91640 x4 : 0000000000000000
[   29.386266] x3 : 0000000000000800 x2 : fffffe001c7c8ce8
[   29.386272] x1 : fffffe0fd4df4e00 x0 : 0000000000000000
[   29.386274]
[   29.386277] ---[ end trace 6c32ddc01008c9ba ]---
[   29.386280] Call trace:
[   29.386284] Exception stack(0xfffffe0fd119f730 to 0xfffffe0fd119f850)
[   29.386288] f720:                                   fffffc0009b56000 
fffffc0009b57018
[   29.386292] f740: fffffe0fd119f900 fffffc0008468930 0000000080000145 
000000000000003d
[   29.386296] f760: fffffc0008c36ce8 fffffc00081c2a30 fffffe0fd119f7f0 
fffffc00080f47d8
[   29.386299] f780: fffffe0fd119f800 fffffc00080f47d8 000000000000010c 
fffffe0fd1109d80
[   29.386303] f7a0: 0000000000000000 000000000025810c fffffe0fd110a5a0 
0000000000000000
[   29.386307] f7c0: fffffc000880d2e8 0000000000000000 fffffe0fd119f850 
fffffc00080f47d8
[   29.386311] f7e0: 0000000000000000 fffffe0fd4df4e00 fffffe001c7c8ce8 
0000000000000800
[   29.386314] f800: 0000000000000000 fffffc0008c91640 000000000000007f 
0000000000000000
[   29.386318] f820: fffffe001c7c8b18 0000000000000000 0000000000000000 
0000000000000000
[   29.386321] f840: 0000000000000000 000002aae7bc72c8
[   29.386327] [<fffffc0008468930>] fbcon_init+0x47c/0x4b8
[   29.386332] [<fffffc00084d6e3c>] visual_init+0xbc/0x114
[   29.386336] [<fffffc00084d9938>] vc_allocate+0x108/0x1e0
[   29.386340] [<fffffc00084d9a44>] con_install+0x34/0x100
[   29.386347] [<fffffc00084c2f44>] tty_init_dev+0x74/0x1a8
[   29.386351] [<fffffc00084c3560>] tty_open+0x4e8/0x5a8
[   29.386357] [<fffffc0008248a78>] chrdev_open+0xa8/0x1ac
[   29.386361] [<fffffc0008241288>] do_dentry_open+0x1e8/0x300
[   29.386364] [<fffffc0008242694>] vfs_open+0x6c/0x7c
[   29.386370] [<fffffc0008252864>] do_last+0x12c/0xc24
[   29.386374] [<fffffc00082533d8>] path_openat+0x7c/0x2b8
[   29.386377] [<fffffc00082546d4>] do_filp_open+0x78/0xd4
[   29.386381] [<fffffc0008242a74>] do_sys_open+0x150/0x214
[   29.386384] [<fffffc0008242bbc>] SyS_openat+0x3c/0x48
[   29.386389] [<fffffc000808538c>] __sys_trace_return+0x0/0x4
.
.
.
[  OK  ] Started Command Scheduler.
          Starting Command Scheduler...
[   33.395141] ------------[ cut here ]------------
[   33.399773] WARNING: CPU: 0 PID: 0 at 
drivers/video/console/fbcon.c:405 cursor_timer_handler+0x5c/0x6c
[   33.409072] Modules linked in: ip6t_REJECT(E) nf_reject_ipv6(E) 
ipt_REJECT(E) nf_reject_ipv4(E) xt_conntrack(E) ebtable_nat(E) 
ebtable_broute(E) bridge(E) stp(E) llc(E) ebtable_filter(E) ebtables(E) 
ip6table_nat(E) nf_conntrack_ipv6(E) nf_defrag_ipv6(E) nf_nat_ipv6(E) 
ip6table_mangle(E) ip6table_security(E) ip6table_raw(E) 
ip6table_filter(E) ip6_tables(E) iptable_nat(E) nf_conntrack_ipv4(E) 
nf_defrag_ipv4(E) nf_nat_ipv4(E) nf_nat(E) nf_conntrack(E) 
iptable_mangle(E) iptable_security(E) iptable_raw(E) iptable_filter(E) 
vfat(E) fat(E) aes_ce_blk(E) ablk_helper(E) cryptd(E) aes_ce_cipher(E) 
ghash_ce(E) sha2_ce(E) sha1_ce(E) sg(E) ip_tables(E) xfs(E) libcrc32c(E) 
nicvf(E) ast(E) i2c_algo_bit(E) drm_kms_helper(E) syscopyarea(E) 
sysfillrect(E) sysimgblt(E) fb_sys_fops(E) ttm(E) drm(E) i2c_core(E) 
nicpf(E) thunder_bgx(E) mdio_thunder(E) mdio_cavium(E) dm_mirror(E) 
dm_region_hash(E) dm_log(E) dm_mod(E)
[   33.488864]
[   33.490358] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G        W   E 
4.6.0-rc3-arm64next+ #278
[   33.498877] Hardware name: www.cavium.com ThunderX CRB-2S/ThunderX 
CRB-2S, BIOS 0.3 Apr 25 2016
[   33.507571] task: fffffc0008c79c80 ti: fffffc0008c50000 task.ti: 
fffffc0008c50000
[   33.515053] PC is at cursor_timer_handler+0x5c/0x6c
[   33.519930] LR is at cursor_timer_handler+0x30/0x6c
[   33.524804] pc : [<fffffc0008467490>] lr : [<fffffc0008467464>] 
pstate: 80000145
[   33.532198] sp : ffffff0ff9dc3da0
[   33.535513] x29: ffffff0ff9dc3da0 x28: fffffc0008c70be8
[   33.540836] x27: fffffc0008c70000 x26: fffffc0008d68fb3
[   33.546158] x25: fffffc0008c70000 x24: fffffc0008d6a000
[   33.551483] x23: fffffe0fd6027800 x22: fffffc0008467434
[   33.556809] x21: 0000000000000101 x20: fffffe0fd6027800
[   33.562138] x19: fffffe0fdc059c00 x18: 000002aadcf70050
[   33.567464] x17: 000003ff95cfa228 x16: fffffc000829628c
[   33.572786] x15: 00000000000000dd x14: ffffffc2c3bfa000
[   33.578114] x13: 003d093352dc0000 x12: 00000000003d0900
[   33.583440] x11: 0000000000000000 x10: 00003d09334ec400
[   33.588765] x9 : fffffc0008c50000 x8 : fffffc0008851320
[   33.594090] x7 : 00000000ad583ee9 x6 : 0000000000000001
[   33.599412] x5 : fffffc0008c79c80 x4 : fffffc0009012c28
[   33.604743] x3 : 0000000000000004 x2 : 0000000000000002
[   33.610074] x1 : 0000000000000000 x0 : 0000000000000001
[   33.615395]
[   33.616886] ---[ end trace 6c32ddc01008c9c3 ]---
[   33.621506] Call trace:
[   33.623951] Exception stack(0xffffff0ff9dc3bd0 to 0xffffff0ff9dc3cf0)
[   33.630390] 3bc0:                                   fffffe0fdc059c00 
fffffe0fd6027800
[   33.638221] 3be0: ffffff0ff9dc3da0 fffffc0008467490 0000000080000145 
000000000000003d
[   33.646050] 3c00: fffffe0fd46b5700 ffffff0ff9dd8f00 fffffc0008d69de0 
0000000000000000
[   33.653882] 3c20: fffffc0008c36ce8 fffffc0008c24b28 fffffc0008c70be8 
fffffc0008da5b48
[   33.661708] 3c40: ffffff0ff9dc3cb0 fffffc0008814614 ffffff0ff9dd8f00 
fffffc0008d68ea4
[   33.669535] 3c60: fffffc0008d69de0 0000000000000000 fffffc0008c36ce8 
fffffc0008c24b28
[   33.677361] 3c80: 0000000000000001 0000000000000000 0000000000000002 
0000000000000004
[   33.685191] 3ca0: fffffc0009012c28 fffffc0008c79c80 0000000000000001 
00000000ad583ee9
[   33.693017] 3cc0: fffffc0008851320 fffffc0008c50000 00003d09334ec400 
0000000000000000
[   33.700845] 3ce0: 00000000003d0900 003d093352dc0000
[   33.705726] [<fffffc0008467490>] cursor_timer_handler+0x5c/0x6c
[   33.711648] [<fffffc0008117bd8>] call_timer_fn+0xc0/0x3e4
[   33.717047] [<fffffc0008118114>] run_timer_softirq+0x1ec/0x39c
[   33.722879] [<fffffc0008081794>] __do_softirq+0x104/0x598
[   33.728278] [<fffffc00080a2ca4>] irq_exit+0x9c/0xc0
[   33.733155] [<fffffc0008102384>] __handle_domain_irq+0x98/0xfc
[   33.739009] [<fffffc0008081594>] gic_handle_irq+0x94/0x190
[   33.744492] Exception stack(0xffffff0ff9dc3fc0 to 0xffffff0ff9dc40e0)
[   33.750930] 3fc0: fffffc0008c53da0 fffffc0008d6a000 fffffc0008c53ec0 
fffffc00080860c0
[   33.758756] 3fe0: 0000000060000145 fffffc0008a33c98 ffffff0ff9dc0020 
0000000000000000
[   33.766582] 4000: fffffc0008c53ec0 fffffc0008c53da0 0000000000000000 
0000000000000000
[   33.774408] 4020: fffffe0fee5412d0 0000000000000000 0000000000000000 
0000000000000000
[   33.782234] 4040: 0000000000000000 0000000000000000 0000000000000000 
0000000000000000
[   33.790060] 4060: 0000000000000000 0000000000000000 0000000000000000 
0000000000000000
[   33.797887] 4080: 0000000000000000 0000000000000000 0000000000000000 
0000000000000000
[   33.805715] 40a0: 0000000000000000 0000000000000000 0000000000000000 
0000000000000000
[   33.813547] 40c0: 0000000000000000 0000000000000000 0000000000000000 
0000000000000000
[   33.821374] [<fffffc0008084de4>] el1_irq+0xa4/0x114
[   33.826251] [<fffffc00080860c0>] arch_cpu_idle+0x40/0x1fc
[   33.831651] [<fffffc00080e9970>] default_idle_call+0x20/0x34
[   33.837310] [<fffffc00080e9ae4>] cpu_startup_entry+0x160/0x314
[   33.843142] [<fffffc000880d968>] rest_init+0x150/0x160
[   33.848283] [<fffffc0008b70b90>] start_kernel+0x3b8/0x3cc
[   33.853681] [<fffffc0008b701c4>] __primary_switched+0x30/0x6c





On 05/17/2016 07:29 AM, Peter Hurley wrote:
> [ +to Scot Doyle ]
>
> Scot, please take a look at this soft lockup.
>
> Regards,
> Peter Hurley
>
>
> Hi Ming,
>
> On 05/17/2016 02:12 AM, Ming Lei wrote:
>> Hi,
>>
>> On Tue, May 17, 2016 at 4:07 AM, Dann Frazier
>> <dann.frazier@...onical.com> wrote:
>>> Hi,
>>>   I'm observing a soft lockup issue w/ the ASPEED controller on an
>>> arm64 server platform. This was originally seen on Ubuntu's 4.4
>>> kernel, but it is reproducible w/ vanilla 4.6-rc7 as well.
>>>
>>> [   32.792656] NMI watchdog: BUG: soft lockup - CPU#38 stuck for 22s!
>>> [swapper/38:0]
>>>
>>> I observe this just once each time I boot into debian-installer (I'm
>>> using a serial console, but the ast module gets loaded during
>>> startup).
>>
>> I have figured out that it is caused by 'mod_timer(timer, jiffies)' and
>> 'ops->cur_blink_jiffies' is observed as zero in cursor_timer_handler()
>> when the issue happened.
>
> Thanks for tracking this down.
>
> This softlockup looks to be caused by:
>
> 	commit 27a4c827c34ac4256a190cc9d24607f953c1c459
> 	Author: Scot Doyle <lkml14@...tdoyle.com>
> 	Date:   Thu Mar 26 13:56:38 2015 +0000
>
> 	    fbcon: use the cursor blink interval provided by vt
>
> 	    vt now provides a cursor blink interval via vc_data. Use this
> 	    interval instead of the currently hardcoded 200 msecs. Store it in
> 	    fbcon_ops to avoid locking the console in cursor_timer_handler().
>
> 	    Signed-off-by: Scot Doyle <lkml14@...tdoyle.com>
> 	    Acked-by: Pavel Machek <pavel@....cz>
> 	    Signed-off-by: Greg Kroah-Hartman <gregkh@...uxfoundation.org>
>
> and
>
> 	commit bd63364caa8df38bad2b25b11b2a1b849475cce5
> 	Author: Scot Doyle <lkml14@...tdoyle.com>
> 	Date:   Thu Mar 26 13:54:39 2015 +0000
>
> 	    vt: add cursor blink interval escape sequence
>
> 	    Add an escape sequence to specify the current console's cursor blink
> 	    interval. The interval is specified as a number of milliseconds until
> 	    the next cursor display state toggle, from 50 to 65535. /proc/loadavg
> 	    did not show a difference with a one msec interval, but the lower
> 	    bound is set to 50 msecs since slower hardware wasn't tested.
>
> 	    Store the interval in the vc_data structure for later access by fbcon,
> 	    initializing the value to fbcon's current hardcoded value of 200 msecs.
>
> 	    Signed-off-by: Scot Doyle <lkml14@...tdoyle.com>
> 	    Acked-by: Pavel Machek <pavel@....cz>
> 	    Signed-off-by: Greg Kroah-Hartman <gregkh@...uxfoundation.org>
>
>
>
>> Looks it is a real fbcon/vt issue, see following:
>>
>> fbcon_init()
>>      <-.con_init
>>            <-visual_init()
>>
>> reset_terminal()
>>      <-vc_init()
>>
>> vc->vc_cur_blink_ms is just set in reset_terminal() from vc_init() path,
>> and ops->cur_blink_jiffies is figured out from vc->vc_cur_blink_ms
>> in fbcon_init().
>>
>> And visual_init() is always run before vc_init(), so ops->cur_blink_jiffies
>> is initialized as zero and cause the soft lockup issue finally.
>>
>> Thanks,
>> Ming
>>
>>>
>>> perf shows that the CPU caught by the NMI is typically in code
>>> updating the cursor timer:
>>>
>>> -   16.92%  swapper  [kernel.kallsyms]      [k] _raw_spin_unlock_irqrestore
>>>     - _raw_spin_unlock_irqrestore
>>>        + 16.87% mod_timer
>>>        + 0.05% cursor_timer_handler
>>> -   12.15%  swapper  [kernel.kallsyms]      [k] queue_work_on
>>>     - queue_work_on
>>>        + 12.00% cursor_timer_handler
>>>        + 0.15% call_timer_fn
>>> +   10.98%  swapper  [kernel.kallsyms]      [k] run_timer_softirq
>>> -    2.23%  swapper  [kernel.kallsyms]      [k] mod_timer
>>>     - mod_timer
>>>        + 1.97% cursor_timer_handler
>>>        + 0.26% call_timer_fn
>>>
>>> During the same period, I can see that another CPU is actively
>>> executing the timer function:
>>>
>>> -   42.18%  kworker/u96:2  [kernel.kallsyms]  [k] ww_mutex_unlock
>>>     - ww_mutex_unlock
>>>        - 40.70% ast_dirty_update
>>>             ast_imageblit
>>>             soft_cursor
>>>             bit_cursor
>>>             fb_flashcursor
>>>             process_one_work
>>>             worker_thread
>>>             kthread
>>>             ret_from_fork
>>>        + 1.48% ast_imageblit
>>> -   40.15%  kworker/u96:2  [kernel.kallsyms]  [k] __memcpy_toio
>>>     - __memcpy_toio
>>>        + 31.54% ast_dirty_update
>>>        + 8.61% ast_imageblit
>>>
>>> Using the graph function tracer on fb_flashcursor(), I see that
>>> ast_dirty_update usually takes around 60 us, in which it makes 16
>>> calls to __memcpy_toio(). However, there is always one instance on
>>> every boot of the installer where ast_dirty_update() takes ~98 *ms* to
>>> complete, during which it makes 743 calls to __memcpy_toio(). While
>>> that  doesn't directly account for the full 22s, I do wonder if that
>>> maybe a smoking gun.
>>>
>>> fyi, this is being tracked at: https://bugs.launchpad.net/bugs/1574814
>>>
>>>    -dann
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ