netdev - Re: [PATCH net] lib: packing: fix shift wrapping in bit

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <20221207130240.aoojta5n5enec72y@skbuf>
Date:   Wed, 7 Dec 2022 15:02:40 +0200
From:   Vladimir Oltean <olteanv@...il.com>
To:     Dan Carpenter <error27@...il.com>
Cc:     "David S. Miller" <davem@...emloft.net>, netdev@...r.kernel.org,
        kernel-janitors@...r.kernel.org
Subject: Re: [PATCH net] lib: packing: fix shift wrapping in bit_reverse()

On Wed, Dec 07, 2022 at 02:22:54PM +0200, Vladimir Oltean wrote:
> Wait a second, I deliberately wrote the code without conditionals.
> Let me look at the code disassembly before and after the patch and see
> what they look like.

Before (make lib/packing.lst on arm64):

	for (i = 0; i < width; i++) {
  1c:	310004ec 	adds	w12, w7, #0x1
  20:	540001c0 	b.eq	58 <adjust_for_msb_right_quirk+0x58>  // b.none
  24:	52800004 	mov	w4, #0x0                   	// #0
		bit = (val & (1 << i)) != 0;
  28:	5280002b 	mov	w11, #0x1                   	// #1
  2c:	d503201f 	nop
  30:	1ac42166 	lsl	w6, w11, w4
		new_val |= (bit << (width - i - 1));
  34:	4b0400e9 	sub	w9, w7, w4
		bit = (val & (1 << i)) != 0;
  38:	93407cc6 	sxtw	x6, w6		/* We don't want sign extension */
  3c:	ea0a00df 	tst	x6, x10
  40:	1a9f07e6 	cset	w6, ne  // ne = any
	for (i = 0; i < width; i++) {
  44:	6b07009f 	cmp	w4, w7
  48:	11000484 	add	w4, w4, #0x1
		new_val |= (bit << (width - i - 1));
  4c:	1ac920c6 	lsl	w6, w6, w9
  50:	aa060108 	orr	x8, x8, x6
	for (i = 0; i < width; i++) {
  54:	54fffee1 	b.ne	30 <adjust_for_msb_right_quirk+0x30>  // b.any


Then this modified code:

static u64 bit_reverse(u64 val, unsigned int width)
{
	u64 new_val = 0;
	unsigned int i;

	for (i = 0; i < width; i++)
		if (val & BIT_ULL(i))
			new_val |= BIT_ULL(width - i - 1);

	return new_val;
}

compiles to this:

	for (i = 0; i < width; i++) {
  1c:	310004eb 	adds	w11, w7, #0x1
  20:	540001c0 	b.eq	58 <adjust_for_msb_right_quirk+0x58>  // b.none
  24:	52800005 	mov	w5, #0x0                   	// #0
			new_val |= BIT_ULL(width - i - 1);
  28:	d280002a 	mov	x10, #0x1                   	// #1
  2c:	14000002 	b	34 <adjust_for_msb_right_quirk+0x34>	/* this is an unconditional jump to "sub w4, w7, w5", skipping the assignment to w5 */
  30:	2a0803e5 	mov	w5, w8			/* this is only hit by the backwards jump b.ne 30 at the end */
  34:	4b0500e4 	sub	w4, w7, w5
		if (val & BIT_ULL(i))
  38:	9ac52528 	lsr	x8, x9, x5
			new_val |= BIT_ULL(width - i - 1);
  3c:	f240011f 	tst	x8, #0x1
	for (i = 0; i < width; i++) {
  40:	110004a8 	add	w8, w5, #0x1
			new_val |= BIT_ULL(width - i - 1);
  44:	9ac42144 	lsl	x4, x10, x4
  48:	aa0400c4 	orr	x4, x6, x4
  4c:	9a861086 	csel	x6, x4, x6, ne  // ne = any
	for (i = 0; i < width; i++) {
  50:	6b0700bf 	cmp	w5, w7
  54:	54fffee1 	b.ne	30 <adjust_for_msb_right_quirk+0x30>  // b.any

which indeed has no branch in the main loop (between 0x30 and 0x54),
because as I explained, the "b 34" doesn't count - it's not in the loop.

Additionally, this rewritten code which is considerably more obscure in C:

static u64 bit_reverse(u64 val, unsigned int width)
{
	u64 new_val = 0;
	unsigned int i;

	for (i = 0; i < width; i++)
		new_val |= !!(val & BIT_ULL(i)) ?
			   BIT_ULL(width - i - 1) : 0;

	return new_val;
}

compiles to the exact same assembly code as the variant with "if":

	for (i = 0; i < width; i++)
  1c:	310004eb 	adds	w11, w7, #0x1
  20:	540001c0 	b.eq	58 <adjust_for_msb_right_quirk+0x58>  // b.none
  24:	52800005 	mov	w5, #0x0                   	// #0
		new_val |= !!(val & BIT_ULL(i)) ?
  28:	d280002a 	mov	x10, #0x1                   	// #1
  2c:	14000002 	b	34 <adjust_for_msb_right_quirk+0x34>
  30:	2a0803e5 	mov	w5, w8
  34:	4b0500e4 	sub	w4, w7, w5
  38:	9ac52528 	lsr	x8, x9, x5
  3c:	f240011f 	tst	x8, #0x1
	for (i = 0; i < width; i++)
  40:	110004a8 	add	w8, w5, #0x1
		new_val |= !!(val & BIT_ULL(i)) ?
  44:	9ac42144 	lsl	x4, x10, x4
  48:	aa0400c4 	orr	x4, x6, x4
  4c:	9a861086 	csel	x6, x4, x6, ne  // ne = any
	for (i = 0; i < width; i++)
  50:	6b0700bf 	cmp	w5, w7
  54:	54fffee1 	b.ne	30 <adjust_for_msb_right_quirk+0x30>  // b.any

So this is good with the "if".