[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20160430030431.20266.qmail@ns.horizon.com>
Date: 29 Apr 2016 23:04:31 -0400
From: "George Spelvin" <linux@...izon.com>
To: linux@...izon.com, torvalds@...ux-foundation.org
Cc: linux-kernel@...r.kernel.org, tglx@...utronix.de
Subject: Re: [patch 2/7] lib/hashmod: Add modulo based hash mechanism
> Not doing it for 64-bit constants makes no sense if it just uses the
> trivial Booth's algorithm version.
AFAICT, gcc 5 *does* optimize 64-bit multiplies by constants.
Does the belief that it doesn't date back to some really old
version?
There's still a threshold where it just punts to the multiplier.
Some examples, x86-64 (gcc 6.0.1) and aarch64 (gcc 5.3.1).
Note the difference in the multiply-by-12345 routine.
return x*9;
mul9:
leaq (%rdi,%rdi,8), %rax
ret
mul9:
add x0, x0, x0, lsl 3
ret
return x*10;
mul10:
leaq (%rdi,%rdi,4), %rax
addq %rax, %rax
ret
mul10:
lsl x1, x0, 3
add x0, x1, x0, lsl 1
ret
return x*127;
mul127:
movq %rdi, %rax
salq $7, %rax
subq %rdi, %rax
ret
mul127:
lsl x1, x0, 7
sub x0, x1, x0
ret
return x*12345;
mul12345:
imulq $12345, %rdi, %rax
ret
mul12345:
lsl x1, x0, 3
sub x1, x1, x0
lsl x1, x1, 1
sub x1, x1, x0
lsl x1, x1, 3
sub x1, x1, x0
lsl x1, x1, 3
sub x0, x1, x0
lsl x1, x0, 4
sub x0, x1, x0
ret
uint64_t y = (x << 9) - (x << 3) + x;
return x + (x << 14) - (y << 3);
mul12345_manual:
movq %rdi, %rdx
salq $14, %rax
salq $9, %rdx
addq %rdi, %rax
addq %rdi, %rdx
salq $3, %rdi
subq %rdi, %rdx
salq $3, %rdx
subq %rdx, %rax
ret
mul12345_manual:
lsl x2, x0, 9
lsl x1, x0, 14
add x2, x2, x0
add x1, x1, x0
sub x0, x2, x0, lsl 3
sub x0, x1, x0, lsl 3
ret
return x*2654435769:
mul2654435769:
movl $2654435769, %eax
imulq %rdi, %rax
ret
mul2654435769:
mov x1, 31161
movk x1, 0x9e37, lsl 16
mul x0, x0, x1
ret
The problem with variant code paths like mul12345_manual is that the
infrastructure required to determine which to use is many times larger
than the code itself. :-(
Powered by blists - more mailing lists