[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20221207130240.aoojta5n5enec72y@skbuf>
Date: Wed, 7 Dec 2022 15:02:40 +0200
From: Vladimir Oltean <olteanv@...il.com>
To: Dan Carpenter <error27@...il.com>
Cc: "David S. Miller" <davem@...emloft.net>, netdev@...r.kernel.org,
kernel-janitors@...r.kernel.org
Subject: Re: [PATCH net] lib: packing: fix shift wrapping in bit_reverse()
On Wed, Dec 07, 2022 at 02:22:54PM +0200, Vladimir Oltean wrote:
> Wait a second, I deliberately wrote the code without conditionals.
> Let me look at the code disassembly before and after the patch and see
> what they look like.
Before (make lib/packing.lst on arm64):
for (i = 0; i < width; i++) {
1c: 310004ec adds w12, w7, #0x1
20: 540001c0 b.eq 58 <adjust_for_msb_right_quirk+0x58> // b.none
24: 52800004 mov w4, #0x0 // #0
bit = (val & (1 << i)) != 0;
28: 5280002b mov w11, #0x1 // #1
2c: d503201f nop
30: 1ac42166 lsl w6, w11, w4
new_val |= (bit << (width - i - 1));
34: 4b0400e9 sub w9, w7, w4
bit = (val & (1 << i)) != 0;
38: 93407cc6 sxtw x6, w6 /* We don't want sign extension */
3c: ea0a00df tst x6, x10
40: 1a9f07e6 cset w6, ne // ne = any
for (i = 0; i < width; i++) {
44: 6b07009f cmp w4, w7
48: 11000484 add w4, w4, #0x1
new_val |= (bit << (width - i - 1));
4c: 1ac920c6 lsl w6, w6, w9
50: aa060108 orr x8, x8, x6
for (i = 0; i < width; i++) {
54: 54fffee1 b.ne 30 <adjust_for_msb_right_quirk+0x30> // b.any
Then this modified code:
static u64 bit_reverse(u64 val, unsigned int width)
{
u64 new_val = 0;
unsigned int i;
for (i = 0; i < width; i++)
if (val & BIT_ULL(i))
new_val |= BIT_ULL(width - i - 1);
return new_val;
}
compiles to this:
for (i = 0; i < width; i++) {
1c: 310004eb adds w11, w7, #0x1
20: 540001c0 b.eq 58 <adjust_for_msb_right_quirk+0x58> // b.none
24: 52800005 mov w5, #0x0 // #0
new_val |= BIT_ULL(width - i - 1);
28: d280002a mov x10, #0x1 // #1
2c: 14000002 b 34 <adjust_for_msb_right_quirk+0x34> /* this is an unconditional jump to "sub w4, w7, w5", skipping the assignment to w5 */
30: 2a0803e5 mov w5, w8 /* this is only hit by the backwards jump b.ne 30 at the end */
34: 4b0500e4 sub w4, w7, w5
if (val & BIT_ULL(i))
38: 9ac52528 lsr x8, x9, x5
new_val |= BIT_ULL(width - i - 1);
3c: f240011f tst x8, #0x1
for (i = 0; i < width; i++) {
40: 110004a8 add w8, w5, #0x1
new_val |= BIT_ULL(width - i - 1);
44: 9ac42144 lsl x4, x10, x4
48: aa0400c4 orr x4, x6, x4
4c: 9a861086 csel x6, x4, x6, ne // ne = any
for (i = 0; i < width; i++) {
50: 6b0700bf cmp w5, w7
54: 54fffee1 b.ne 30 <adjust_for_msb_right_quirk+0x30> // b.any
which indeed has no branch in the main loop (between 0x30 and 0x54),
because as I explained, the "b 34" doesn't count - it's not in the loop.
Additionally, this rewritten code which is considerably more obscure in C:
static u64 bit_reverse(u64 val, unsigned int width)
{
u64 new_val = 0;
unsigned int i;
for (i = 0; i < width; i++)
new_val |= !!(val & BIT_ULL(i)) ?
BIT_ULL(width - i - 1) : 0;
return new_val;
}
compiles to the exact same assembly code as the variant with "if":
for (i = 0; i < width; i++)
1c: 310004eb adds w11, w7, #0x1
20: 540001c0 b.eq 58 <adjust_for_msb_right_quirk+0x58> // b.none
24: 52800005 mov w5, #0x0 // #0
new_val |= !!(val & BIT_ULL(i)) ?
28: d280002a mov x10, #0x1 // #1
2c: 14000002 b 34 <adjust_for_msb_right_quirk+0x34>
30: 2a0803e5 mov w5, w8
34: 4b0500e4 sub w4, w7, w5
38: 9ac52528 lsr x8, x9, x5
3c: f240011f tst x8, #0x1
for (i = 0; i < width; i++)
40: 110004a8 add w8, w5, #0x1
new_val |= !!(val & BIT_ULL(i)) ?
44: 9ac42144 lsl x4, x10, x4
48: aa0400c4 orr x4, x6, x4
4c: 9a861086 csel x6, x4, x6, ne // ne = any
for (i = 0; i < width; i++)
50: 6b0700bf cmp w5, w7
54: 54fffee1 b.ne 30 <adjust_for_msb_right_quirk+0x30> // b.any
So this is good with the "if".
Powered by blists - more mailing lists