lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:	29 Apr 2016 23:04:31 -0400
From:	"George Spelvin" <linux@...izon.com>
To:	linux@...izon.com, torvalds@...ux-foundation.org
Cc:	linux-kernel@...r.kernel.org, tglx@...utronix.de
Subject: Re: [patch 2/7] lib/hashmod: Add modulo based hash mechanism

> Not doing it for 64-bit constants makes no sense if it just uses the
> trivial Booth's algorithm version.

AFAICT, gcc 5 *does* optimize 64-bit multiplies by constants.
Does the belief that it doesn't date back to some really old
version?

There's still a threshold where it just punts to the multiplier.

Some examples, x86-64 (gcc 6.0.1) and aarch64 (gcc 5.3.1).
Note the difference in the multiply-by-12345 routine.

	return x*9;
mul9:
        leaq    (%rdi,%rdi,8), %rax
        ret
mul9:
        add     x0, x0, x0, lsl 3
        ret

	return x*10;
mul10:
        leaq    (%rdi,%rdi,4), %rax
        addq    %rax, %rax
        ret
mul10:
        lsl     x1, x0, 3
        add     x0, x1, x0, lsl 1
        ret

	return x*127;
mul127:
        movq    %rdi, %rax
        salq    $7, %rax
        subq    %rdi, %rax
        ret
mul127:
        lsl     x1, x0, 7
        sub     x0, x1, x0
        ret

	return x*12345;
mul12345:
        imulq   $12345, %rdi, %rax
        ret
mul12345:
        lsl     x1, x0, 3
        sub     x1, x1, x0
        lsl     x1, x1, 1
        sub     x1, x1, x0
        lsl     x1, x1, 3
        sub     x1, x1, x0
        lsl     x1, x1, 3
        sub     x0, x1, x0
        lsl     x1, x0, 4
        sub     x0, x1, x0
        ret

        uint64_t y = (x << 9) - (x << 3) + x;
        return x + (x << 14) - (y << 3);
mul12345_manual:
        movq    %rdi, %rdx
        salq    $14, %rax
        salq    $9, %rdx
        addq    %rdi, %rax
        addq    %rdi, %rdx
        salq    $3, %rdi
        subq    %rdi, %rdx
        salq    $3, %rdx
        subq    %rdx, %rax
        ret
mul12345_manual:
        lsl     x2, x0, 9
        lsl     x1, x0, 14
        add     x2, x2, x0
        add     x1, x1, x0
        sub     x0, x2, x0, lsl 3
        sub     x0, x1, x0, lsl 3
        ret

	return x*2654435769:
mul2654435769:
        movl    $2654435769, %eax
        imulq   %rdi, %rax
        ret
mul2654435769:
        mov     x1, 31161
        movk    x1, 0x9e37, lsl 16
        mul     x0, x0, x1
        ret

The problem with variant code paths like mul12345_manual is that the
infrastructure required to determine which to use is many times larger
than the code itself.  :-(

Powered by blists - more mailing lists