lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Mon, 8 Aug 2016 18:12:16 +0200 (CEST)
From:	Jiri Kosina <jikos@...nel.org>
To:	Peter Zijlstra <peterz@...radead.org>
cc:	Ingo Molnar <mingo@...hat.com>,
	Arnaldo Carvalho de Melo <acme@...nel.org>,
	Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
	x86@...nel.org, linux-kernel@...r.kernel.org,
	Mike Galbraith <umgwanakikbuti@...il.com>,
	Borislav Petkov <bp@...e.de>
Subject: [PATCH v2] x86/hweight: fix open-coded versions of 32bit and 64bit
 hweight calculations

From: Jiri Kosina <jkosina@...e.cz>

The open-coded version of __sw_hweight32() and __sw_hweight64() are 
broken; the variable <-> register mapping in the comments doesn't match 
the registers being used, resulting in wrong calculations.

This has a potential to demonstrate itself by various syptoms on machines 
where this is actually being used to compute the hweight (due to lack of 
popcnt insn). On my core2 system, this demonstrates itself as

 NMI watchdog: enabled on all CPUs, permanently consumes one hw-PMU counter.
 unchecked MSR access error: WRMSR to 0xdf (tried to write 0x000000ff80000001) at rIP: 0xffffffff90004acct_set_period+0xdc/0x190)
  ffff8e39f941b000 ffff8e39fbc0a440 000000000000001e ffff8e39fbc0a660
  ffff8e39f9523b78 ffffffff90004bd0 ffff8e39f941b000 ffff8e39fbc0a760
  ffff8e39fbc0a440 ffff8e39f9523bc0 ffffffff900053ef 00000000f951c2c0
 Call Trace:
  [<ffffffff90004bd0>] x86_pmu_start+0x50/0x110
  [<ffffffff900053ef>] x86_pmu_enable+0x27f/0x2f0
  [<ffffffff90175642>] perf_pmu_enable+0x22/0x30
  [<ffffffff901766b1>] ctx_resched+0x51/0x60
  [<ffffffff901768a0>] __perf_event_enable+0x1e0/0x240
  [<ffffffff9016e5f9>] event_function+0xa9/0x180
  [<ffffffff901766c0>] ? ctx_resched+0x60/0x60
  [<ffffffff9016fcef>] remote_function+0x3f/0x50
  [<ffffffff901012b6>] generic_exec_single+0xb6/0x140
  [<ffffffff9016fcb0>] ? perf_cgroup_attach+0x50/0x50
  [<ffffffff9016fcb0>] ? perf_cgroup_attach+0x50/0x50
  [<ffffffff901013f7>] smp_call_function_single+0xb7/0x110
  [<ffffffff9016e984>] cpu_function_call+0x34/0x40
  [<ffffffff9016e550>] ? list_del_event+0x150/0x150
  [<ffffffff9016ecda>] event_function_call+0x11a/0x120
  [<ffffffff901766c0>] ? ctx_resched+0x60/0x60
  [<ffffffff9016ed79>] _perf_event_enable+0x49/0x70
  [<ffffffff901736ac>] perf_event_enable+0x1c/0x40
  [<ffffffff9013cad2>] watchdog_enable+0x132/0x1d0
  [<ffffffff90092440>] smpboot_thread_fn+0xe0/0x1d0
  [<ffffffff90092360>] ? sort_range+0x30/0x30
  [<ffffffff9008e7e2>] kthread+0xf2/0x110
  [<ffffffff9069e611>] ? wait_for_completion+0xe1/0x110
  [<ffffffff906a3b2f>] ret_from_fork+0x1f/0x40
  [<ffffffff9008e6f0>] ? kthread_create_on_node+0x220/0x220

as FIXED_EVENT_CONSTRAINT() doesn't produce correct results, and followup 
panic in x86_pmu_stop(). YMMV.

Fix this by rewriting the code so that it actually matches the math 
lib/hweight.c. While at it, nuke the original comments altogether; the 
"variable" names don't really match what's being used in the C-version of 
the function, and I find them to be more misleading than helping.

This version of gcc:

	gcc (SUSE Linux) 4.8.5

produces identical assembly code from lib/hweight.c (modulo frame pointer 
maths).

Fixes: f5967101e9d ("x86/hweight: Get rid of the special calling convention")
Signed-off-by: Jiri Kosina <jkosina@...e.cz>
---

v1 -> v2: remove the comments altogether; put "approved by gcc" note into 
          changelog

 arch/x86/lib/hweight.S | 69 +++++++++++++++++++++++++-------------------------
 1 file changed, 34 insertions(+), 35 deletions(-)

diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S
index 02de3d7..e1cea44f 100644
--- a/arch/x86/lib/hweight.S
+++ b/arch/x86/lib/hweight.S
@@ -8,56 +8,55 @@
  */
 ENTRY(__sw_hweight32)
 
-#ifdef CONFIG_X86_64
-	movl %edi, %eax				# w
-#endif
 	__ASM_SIZE(push,) %__ASM_REG(dx)
-	movl %eax, %edx				# w -> t
-	shrl %edx				# t >>= 1
-	andl $0x55555555, %edx			# t &= 0x55555555
-	subl %edx, %eax				# w -= t
 
-	movl %eax, %edx				# w -> t
-	shrl $2, %eax				# w_tmp >>= 2
-	andl $0x33333333, %edx			# t	&= 0x33333333
-	andl $0x33333333, %eax			# w_tmp &= 0x33333333
-	addl %edx, %eax				# w = w_tmp + t
+	movl %edi, %edx
+
+	shrl %edx
+	andl $0x55555555, %edx
+	subl %edx, %edi
+	movl %edi, %eax
+
+	shrl $2, %edi
+	andl $0x33333333, %eax
+	andl $0x33333333, %edi
+	addl %eax, %edi
 
-	movl %eax, %edx				# w -> t
-	shrl $4, %edx				# t >>= 4
-	addl %edx, %eax				# w_tmp += t
-	andl  $0x0f0f0f0f, %eax			# w_tmp &= 0x0f0f0f0f
-	imull $0x01010101, %eax, %eax		# w_tmp *= 0x01010101
-	shrl $24, %eax				# w = w_tmp >> 24
+	movl %edi, %eax
+	shrl $4, %eax
+	addl %edi, %eax
+	andl  $0x0f0f0f0f, %eax
+	imull $0x01010101, %eax, %eax
+	shrl $24, %eax
 	__ASM_SIZE(pop,) %__ASM_REG(dx)
 	ret
 ENDPROC(__sw_hweight32)
 
 ENTRY(__sw_hweight64)
-#ifdef CONFIG_X86_64
+
 	pushq   %rdx
 
-	movq    %rdi, %rdx                      # w -> t
+	movq    %rdi, %rdx
 	movabsq $0x5555555555555555, %rax
-	shrq    %rdx                            # t >>= 1
-	andq    %rdx, %rax                      # t &= 0x5555555555555555
+	shrq    %rdx
+	andq    %rax, %rdx
+	subq    %rdx, %rdi
 	movabsq $0x3333333333333333, %rdx
-	subq    %rax, %rdi                      # w -= t
-
-	movq    %rdi, %rax                      # w -> t
-	shrq    $2, %rdi                        # w_tmp >>= 2
-	andq    %rdx, %rax                      # t     &= 0x3333333333333333
-	andq    %rdi, %rdx                      # w_tmp &= 0x3333333333333333
-	addq    %rdx, %rax                      # w = w_tmp + t
 
-	movq    %rax, %rdx                      # w -> t
-	shrq    $4, %rdx                        # t >>= 4
-	addq    %rdx, %rax                      # w_tmp += t
+	movq    %rdi, %rax
+	shrq    $2, %rdi
+	andq    %rdx, %rax
+	andq    %rdx, %rdi
 	movabsq $0x0f0f0f0f0f0f0f0f, %rdx
-	andq    %rdx, %rax                      # w_tmp &= 0x0f0f0f0f0f0f0f0f
+	addq    %rax, %rdi
+
+	movq    %rdi, %rax
+	shrq    $4, %rax
+	addq    %rdi, %rax
+	andq    %rdx, %rax
 	movabsq $0x0101010101010101, %rdx
-	imulq   %rdx, %rax                      # w_tmp *= 0x0101010101010101
-	shrq    $56, %rax                       # w = w_tmp >> 56
+	imulq   %rdx, %rax
+	shrq    $56, %rax
 
 	popq    %rdx
 	ret
-- 
Jiri Kosina
SUSE Labs

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ