lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAO6TR8Wanqr25=dmMbOVZd3-2GSzP+yDKA4KkX4JpbNwYRupHw@mail.gmail.com>
Date:	Mon, 18 Jan 2016 19:31:16 -0700
From:	Jeff Merkey <linux.mdb@...il.com>
To:	LKML <linux-kernel@...r.kernel.org>
Subject: Re: [BUG REPORT] ktime_get_ts64 causes Hard Lockup

output from objdump provided:

static __always_inline u32
__iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
{
	u32 ret = 0;
     27b:	31 d2                	xor    %edx,%edx
	while (dividend >= divisor) {
		/* The following asm() prevents the compiler from
		   optimising this loop into a modulo operation.  */
		asm("" : "+rm"(dividend));

		dividend -= divisor;
     27d:	48 2d 00 ca 9a 3b    	sub    $0x3b9aca00,%rax
		ret++;
     283:	83 c2 01             	add    $0x1,%edx
static __always_inline u32
__iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
{
	u32 ret = 0;

	while (dividend >= divisor) {
     286:	48 3d ff c9 9a 3b    	cmp    $0x3b9ac9ff,%rax
     28c:	77 ef                	ja     27d <ktime_get_ts64+0x9d>
 * This must always be inlined because its used from the x86-64 vdso,
 * which cannot call other kernel functions.
 */

I guess is dividend and divsor get out of whack this function loops
forever.  So I sent this to the maintainers, so which is the list of
"engineers" who actually wrote and understand this section of code.  I
would like to get this fixed.

Thanks

Jeff

On 1/18/16, Jeff Merkey <linux.mdb@...il.com> wrote:
> On 1/18/16, Jeff Merkey <linux.mdb@...il.com> wrote:
>> If I suspend all processors in the NMI handler while in the debugger
>> console at an active breakpoint, and then I leave the system in the
>> console for about 20 minutes with the processors spinning, then exit
>> and release the processors, ktime_get_ts64 enters an infinite loop and
>> triggers the Hard Lockup detector.   Nice having a debugger to just
>> step into the NMI handlers and find this stuff now.
>>
>> The offending code is:
>>
>> (2)> u ktime_get_ts64+9d
>> <<<  this section gets stuck in an infinite loop.
>> 0xffffffff810ede1d 482D00CA9A3B    sub    rax,0x3b9aca00
>> 0xffffffff810ede23 83C201          add    edx,0x1
>> 0xffffffff810ede26 483DFFC99A3B    cmp    rax,0x3b9ac9ff
>> 0xffffffff810ede2c 77EF            ja     ktime_get_ts64+0x9d
>> (0xffffffff810ede1d) (up)
>> <<<
>> 0xffffffff810ede2e 4801CA          add    rdx,rcx
>> 0xffffffff810ede31 48894308        mov    QWORD PTR
>> [rbx+8]=0xFFFFFFFFA0774E01,rax
>> 0xffffffff810ede35 488913          mov    QWORD PTR
>> [rbx]=0xFFFFFFFFA0742000,rdx
>> 0xffffffff810ede38 5B              pop    rbx
>> 0xffffffff810ede39 415C            pop    r12
>> 0xffffffff810ede3b 415D            pop    r13
>> 0xffffffff810ede3d 5D              pop    rbp
>> 0xffffffff810ede3e C3              ret
>> 0xffffffff810ede3f 31D2            xor    edx,edx
>> 0xffffffff810ede41 EBEB            jmp    ktime_get_ts64+0xae
>> (0xffffffff810ede2e) (up)
>> 0xffffffff810ede43 BE11030000      mov    esi,0x311
>> 0xffffffff810ede48 48C7C751C38D81  mov    rdi,0xffffffff818dc351
>> 0xffffffff810ede4f E82C12F9FF      call   warn_slowpath_null
>> 0xffffffff810ede54 E946FFFFFF      jmp    ktime_get_ts64+0x1f
>> (0xffffffff810edd9f) (up)
>> 0xffffffff810ede59 F390            pause
>> 0xffffffff810ede5b E942FFFFFF      jmp    ktime_get_ts64+0x22
>> (0xffffffff810edda2) (up)
>>
>> Same code in GDB format
>>
>> (2)> id ktime_get_ts64+9d
>> <<<
>> 0xffffffff810ede1d ktime_get_ts64+0x9d:    sub    $0x3b9aca00,%rax
>> 0xffffffff810ede23 ktime_get_ts64+0xa3:    add    $0x1,%edx
>> 0xffffffff810ede26 ktime_get_ts64+0xa6:    cmp    $0x3b9ac9ff,%rax
>> 0xffffffff810ede2c ktime_get_ts64+0xac:    ja     0xffffffff810ede1d
>> ktime_get_ts64+0x9d (up)
>> <<<
>> 0xffffffff810ede2e ktime_get_ts64+0xae:    add    %rcx,%rdx
>> 0xffffffff810ede31 ktime_get_ts64+0xb1:    mov    %rax,0x8(%rbx)
>> 0xffffffff810ede35 ktime_get_ts64+0xb5:    mov    %rdx,(%rbx)
>> 0xffffffff810ede38 ktime_get_ts64+0xb8:    pop    %rbx
>> 0xffffffff810ede39 ktime_get_ts64+0xb9:    pop    %r12
>> 0xffffffff810ede3b ktime_get_ts64+0xbb:    pop    %r13
>> 0xffffffff810ede3d ktime_get_ts64+0xbd:    pop    %rbp
>> 0xffffffff810ede3e ktime_get_ts64+0xbe:    retq
>> 0xffffffff810ede3f ktime_get_ts64+0xbf:    xor    %edx,%edx
>> 0xffffffff810ede41 ktime_get_ts64+0xc1:    jmp    0xffffffff810ede2e
>> ktime_get_ts64+0xae (up)
>> 0xffffffff810ede43 ktime_get_ts64+0xc3:    mov    $0x311,%esi
>> 0xffffffff810ede48 ktime_get_ts64+0xc8:    mov
>> $0xffffffff818dc351,%rdi
>> 0xffffffff810ede4f ktime_get_ts64+0xcf:    callq  0xffffffff8107f080
>> warn_slowpath_null
>> 0xffffffff810ede54 ktime_get_ts64+0xd4:    jmpq   0xffffffff810edd9f
>> ktime_get_ts64+0x1f (up)
>> 0xffffffff810ede59 ktime_get_ts64+0xd9:    pause
>> 0xffffffff810ede5b ktime_get_ts64+0xdb:    jmpq   0xffffffff810edda2
>> ktime_get_ts64+0x22 (up)
>> (2)> g
>>
>>
>> What is strange is the math its doing.  It is subtracting a fixed
>> value from rax then comparing the value.  It looks like this is case
>> where the value may have wrapped and the code just wasn;t setup to
>> handle it.
>>
>> 0xffffffff810ede1d 482D00CA9A3B    sub    rax,0x3b9aca00
>> 0xffffffff810ede23 83C201          add    edx,0x1
>> 0xffffffff810ede26 483DFFC99A3B    cmp    rax,0x3b9ac9ff
>> 0xffffffff810ede2c 77EF            ja     ktime_get_ts64+0x9d
>> (0xffffffff810ede1d) (up)
>>
>> The C code is:
>>
>>
>> /**
>>  * ktime_get_ts64 - get the monotonic clock in timespec64 format
>>  * @ts:		pointer to timespec variable
>>  *
>>  * The function calculates the monotonic clock from the realtime
>>  * clock and the wall_to_monotonic offset and stores the result
>>  * in normalized timespec64 format in the variable pointed to by @ts.
>>  */
>> void ktime_get_ts64(struct timespec64 *ts)
>> {
>> 	struct timekeeper *tk = &tk_core.timekeeper;
>> 	struct timespec64 tomono;
>> 	s64 nsec;
>> 	unsigned int seq;
>>
>> 	WARN_ON(timekeeping_suspended);
>>
>> 	do {
>> 		seq = read_seqcount_begin(&tk_core.seq);
>> 		ts->tv_sec = tk->xtime_sec;
>> 		nsec = timekeeping_get_ns(&tk->tkr_mono);
>> 		tomono = tk->wall_to_monotonic;
>> <<<
>> 	} while (read_seqcount_retry(&tk_core.seq, seq));
>> <<<
>> 	ts->tv_sec += tomono.tv_sec;
>> 	ts->tv_nsec = 0;
>> 	timespec64_add_ns(ts, nsec + tomono.tv_nsec);
>> }
>> EXPORT_SYMBOL_GPL(ktime_get_ts64);
>>
>> Any ideas how to fix this problem?  That do {} while gets stuck there.
>>
>> Jeff
>>
>
> I just verified that both kgdb and kdb trigger this bug as well if you
> hold the processors suspended for about 20-45 minutes.
>
> Jeff
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ