lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Mon, 8 Aug 2016 16:41:46 +0200 (CEST)
From:	Jiri Kosina <jikos@...nel.org>
To:	Peter Zijlstra <peterz@...radead.org>
cc:	Ingo Molnar <mingo@...hat.com>,
	Arnaldo Carvalho de Melo <acme@...nel.org>,
	Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
	x86@...nel.org, linux-kernel@...r.kernel.org,
	Mike Galbraith <umgwanakikbuti@...il.com>,
	Borislav Petkov <bp@...e.de>
Subject: Re: x86 PMU broken in current Linus' tree

On Mon, 8 Aug 2016, Peter Zijlstra wrote:

> So I can reproduce on my Lenovo T500 which has a Core2 as well. By long
> and tedious printk() it looks like the event constraint:
> 
>   FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */

[ adding Boris to CC ]

This made me wonder, and it turned out that HWEIGHT() doesn't seem to be 
doing the right thing on machines that don't have popcnt instruction 
(which is probably the distinguishing factor here).

How about the patch below? It fixes the symptoms on my system.




From: Jiri Kosina <jkosina@...e.cz>
Subject: [PATCH] x86/hweight: fix open-coded versions of 32bit and 64bit hweight calculations

The open-coded version of __sw_hweight32() and __sw_hweight64() are broken;
the variable <-> register mapping in the comments doesn't match the registers
being used, resulting in wrong calculations.

This has a potential to demonstrate itself by various syptoms on machines
where this is actually being used to compute the hweight (due to lack of
popcnt insn). On my core2 system, this demonstrates itself as

 NMI watchdog: enabled on all CPUs, permanently consumes one hw-PMU counter.
 unchecked MSR access error: WRMSR to 0xdf (tried to write 0x000000ff80000001) at rIP: 0xffffffff90004acc
t_set_period+0xdc/0x190)
  ffff8e39f941b000 ffff8e39fbc0a440 000000000000001e ffff8e39fbc0a660
  ffff8e39f9523b78 ffffffff90004bd0 ffff8e39f941b000 ffff8e39fbc0a760
  ffff8e39fbc0a440 ffff8e39f9523bc0 ffffffff900053ef 00000000f951c2c0
 Call Trace:
  [<ffffffff90004bd0>] x86_pmu_start+0x50/0x110
  [<ffffffff900053ef>] x86_pmu_enable+0x27f/0x2f0
  [<ffffffff90175642>] perf_pmu_enable+0x22/0x30
  [<ffffffff901766b1>] ctx_resched+0x51/0x60
  [<ffffffff901768a0>] __perf_event_enable+0x1e0/0x240
  [<ffffffff9016e5f9>] event_function+0xa9/0x180
  [<ffffffff901766c0>] ? ctx_resched+0x60/0x60
  [<ffffffff9016fcef>] remote_function+0x3f/0x50
  [<ffffffff901012b6>] generic_exec_single+0xb6/0x140
  [<ffffffff9016fcb0>] ? perf_cgroup_attach+0x50/0x50
  [<ffffffff9016fcb0>] ? perf_cgroup_attach+0x50/0x50
  [<ffffffff901013f7>] smp_call_function_single+0xb7/0x110
  [<ffffffff9016e984>] cpu_function_call+0x34/0x40
  [<ffffffff9016e550>] ? list_del_event+0x150/0x150
  [<ffffffff9016ecda>] event_function_call+0x11a/0x120
  [<ffffffff901766c0>] ? ctx_resched+0x60/0x60
  [<ffffffff9016ed79>] _perf_event_enable+0x49/0x70
  [<ffffffff901736ac>] perf_event_enable+0x1c/0x40
  [<ffffffff9013cad2>] watchdog_enable+0x132/0x1d0
  [<ffffffff90092440>] smpboot_thread_fn+0xe0/0x1d0
  [<ffffffff90092360>] ? sort_range+0x30/0x30
  [<ffffffff9008e7e2>] kthread+0xf2/0x110
  [<ffffffff9069e611>] ? wait_for_completion+0xe1/0x110
  [<ffffffff906a3b2f>] ret_from_fork+0x1f/0x40
  [<ffffffff9008e6f0>] ? kthread_create_on_node+0x220/0x220

as FIXED_EVENT_CONSTRAINT() doesn't procude correct results, and followup panic
in x86_pmu_stop(). YMMV.

Fix this by rewriting the code so that it actually matches the math in
comments.

Fixes: f5967101e9d ("x86/hweight: Get rid of the special calling convention")
Signed-off-by: Jiri Kosina <jkosina@...e.cz>
---
 arch/x86/lib/hweight.S | 43 +++++++++++++++++++++----------------------
 1 file changed, 21 insertions(+), 22 deletions(-)

diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S
index 02de3d7..9d0540f 100644
--- a/arch/x86/lib/hweight.S
+++ b/arch/x86/lib/hweight.S
@@ -8,24 +8,23 @@
  */
 ENTRY(__sw_hweight32)
 
+	__ASM_SIZE(push,) %__ASM_REG(dx)
 #ifdef CONFIG_X86_64
-	movl %edi, %eax				# w
+	movl %edi, %edx				# w -> t
 #endif
-	__ASM_SIZE(push,) %__ASM_REG(dx)
-	movl %eax, %edx				# w -> t
-	shrl %edx				# t >>= 1
-	andl $0x55555555, %edx			# t &= 0x55555555
-	subl %edx, %eax				# w -= t
+	shrl %edx				# w >>= 1
+	andl $0x55555555, %edx			# w &= 0x55555555
+	subl %edx, %edi				# w -= t
+	movl %edi, %eax				# w -> t
 
-	movl %eax, %edx				# w -> t
-	shrl $2, %eax				# w_tmp >>= 2
-	andl $0x33333333, %edx			# t	&= 0x33333333
-	andl $0x33333333, %eax			# w_tmp &= 0x33333333
-	addl %edx, %eax				# w = w_tmp + t
+	shrl $2, %edi				# w_tmp >>= 2
+	andl $0x33333333, %eax			# t	&= 0x33333333
+	andl $0x33333333, %edi			# w_tmp &= 0x33333333
+	addl %eax, %edi				# w = w_tmp + t
 
-	movl %eax, %edx				# w -> t
-	shrl $4, %edx				# t >>= 4
-	addl %edx, %eax				# w_tmp += t
+	movl %edi, %eax				# w -> t
+	shrl $4, %eax				# t >>= 4
+	addl %edi, %eax				# w_tmp += t
 	andl  $0x0f0f0f0f, %eax			# w_tmp &= 0x0f0f0f0f
 	imull $0x01010101, %eax, %eax		# w_tmp *= 0x01010101
 	shrl $24, %eax				# w = w_tmp >> 24
@@ -40,20 +39,20 @@ ENTRY(__sw_hweight64)
 	movq    %rdi, %rdx                      # w -> t
 	movabsq $0x5555555555555555, %rax
 	shrq    %rdx                            # t >>= 1
-	andq    %rdx, %rax                      # t &= 0x5555555555555555
+	andq    %rax, %rdx                      # t &= 0x5555555555555555
+	subq    %rdx, %rdi                      # w -= t
 	movabsq $0x3333333333333333, %rdx
-	subq    %rax, %rdi                      # w -= t
 
 	movq    %rdi, %rax                      # w -> t
 	shrq    $2, %rdi                        # w_tmp >>= 2
 	andq    %rdx, %rax                      # t     &= 0x3333333333333333
-	andq    %rdi, %rdx                      # w_tmp &= 0x3333333333333333
-	addq    %rdx, %rax                      # w = w_tmp + t
-
-	movq    %rax, %rdx                      # w -> t
-	shrq    $4, %rdx                        # t >>= 4
-	addq    %rdx, %rax                      # w_tmp += t
+	andq    %rdx, %rdi                      # w_tmp &= 0x3333333333333333
 	movabsq $0x0f0f0f0f0f0f0f0f, %rdx
+	addq    %rax, %rdi                      # w = w_tmp + t
+
+	movq    %rdi, %rax                      # w -> t
+	shrq    $4, %rax                        # t >>= 4
+	addq    %rdi, %rax                      # w_tmp += t
 	andq    %rdx, %rax                      # w_tmp &= 0x0f0f0f0f0f0f0f0f
 	movabsq $0x0101010101010101, %rdx
 	imulq   %rdx, %rax                      # w_tmp *= 0x0101010101010101

-- 
Jiri Kosina
SUSE Labs

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ