lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <174015038309.10177.3818298325558179.tip-bot2@tip-bot2>
Date: Fri, 21 Feb 2025 15:06:23 -0000
From: "tip-bot2 for Uros Bizjak" <tip-bot2@...utronix.de>
To: linux-tip-commits@...r.kernel.org
Cc: Uros Bizjak <ubizjak@...il.com>, Ingo Molnar <mingo@...nel.org>,
 Linus Torvalds <torvalds@...ux-foundation.org>, x86@...nel.org,
 linux-kernel@...r.kernel.org
Subject: [tip: locking/core] x86/locking: Use asm_inline for
 {,try_}cmpxchg{64,128} emulations

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     2d352ec9fcb5d965318e7855b2406a7a14e9ae13
Gitweb:        https://git.kernel.org/tip/2d352ec9fcb5d965318e7855b2406a7a14e9ae13
Author:        Uros Bizjak <ubizjak@...il.com>
AuthorDate:    Fri, 14 Feb 2025 16:07:47 +01:00
Committer:     Ingo Molnar <mingo@...nel.org>
CommitterDate: Fri, 21 Feb 2025 15:56:08 +01:00

x86/locking: Use asm_inline for {,try_}cmpxchg{64,128} emulations

According to:

  https://gcc.gnu.org/onlinedocs/gcc/Size-of-an-asm.html

the usage of asm pseudo directives in the asm template can confuse
the compiler to wrongly estimate the size of the generated
code.

The ALTERNATIVE macro expands to several asm pseudo directives,
so its usage in {,try_}cmpxchg{64,128} causes instruction length estimate
to fail by an order of magnitude (the specially instrumented compiler
reports the estimated length of these asm templates to be more than 20
instructions long).

This incorrect estimate further causes unoptimal inlining
decisions, unoptimal instruction scheduling and unoptimal code block
alignments for functions that use these locking primitives.

Use asm_inline instead:

  https://gcc.gnu.org/pipermail/gcc-patches/2018-December/512349.html

which is a feature that makes GCC pretend some inline assembler code
is tiny (while it would think it is huge), instead of just asm.

For code size estimation, the size of the asm is then taken as
the minimum size of one instruction, ignoring how many instructions
compiler thinks it is.

The effect of this patch on x86_64 target is minor, since 128-bit
functions are rarely used on this target. The code size of the resulting
defconfig object file stays the same:

      text       data     bss      dec         hex filename
  27456612    4638523  814148 32909283     1f627e3 vmlinux-old.o
  27456612    4638523  814148 32909283     1f627e3 vmlinux-new.o

but the patch has minor effect on code layout due to the different
scheduling decisions in functions containing changed macros.

There is no effect on the x64_32 target, the code size of the resulting
defconfig object file and the code layout stays the same:

      text       data     bss      dec         hex filename
  18883870    2679275 1707916 23271061     1631695 vmlinux-old.o
  18883870    2679275 1707916 23271061     1631695 vmlinux-new.o

Signed-off-by: Uros Bizjak <ubizjak@...il.com>
Signed-off-by: Ingo Molnar <mingo@...nel.org>
Cc: Linus Torvalds <torvalds@...ux-foundation.org>
Link: https://lore.kernel.org/r/20250214150929.5780-2-ubizjak@gmail.com
---
 arch/x86/include/asm/cmpxchg_32.h | 32 ++++++------
 arch/x86/include/asm/percpu.h     | 77 ++++++++++++++----------------
 2 files changed, 55 insertions(+), 54 deletions(-)

diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index fd1282a..95b5f99 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -91,12 +91,14 @@ static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, 
 	union __u64_halves o = { .full = (_old), },			\
 			   n = { .full = (_new), };			\
 									\
-	asm volatile(ALTERNATIVE(_lock_loc				\
-				 "call cmpxchg8b_emu",			\
-				 _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
-		     : ALT_OUTPUT_SP("+a" (o.low), "+d" (o.high))	\
-		     : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr)	\
-		     : "memory");					\
+	asm_inline volatile(						\
+		ALTERNATIVE(_lock_loc					\
+			    "call cmpxchg8b_emu",			\
+			    _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8)	\
+		: ALT_OUTPUT_SP("+a" (o.low), "+d" (o.high))		\
+		: "b" (n.low), "c" (n.high),				\
+		  [ptr] "S" (_ptr)					\
+		: "memory");						\
 									\
 	o.full;								\
 })
@@ -119,14 +121,16 @@ static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64 
 			   n = { .full = (_new), };			\
 	bool ret;							\
 									\
-	asm volatile(ALTERNATIVE(_lock_loc				\
-				 "call cmpxchg8b_emu",			\
-				 _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
-		     CC_SET(e)						\
-		     : ALT_OUTPUT_SP(CC_OUT(e) (ret),			\
-				     "+a" (o.low), "+d" (o.high))	\
-		     : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr)	\
-		     : "memory");					\
+	asm_inline volatile(						\
+		ALTERNATIVE(_lock_loc					\
+			    "call cmpxchg8b_emu",			\
+			    _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
+		CC_SET(e)						\
+		: ALT_OUTPUT_SP(CC_OUT(e) (ret),			\
+				"+a" (o.low), "+d" (o.high))		\
+		: "b" (n.low), "c" (n.high),				\
+		  [ptr] "S" (_ptr)					\
+		: "memory");						\
 									\
 	if (unlikely(!ret))						\
 		*(_oldp) = o.full;					\
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 0ab991f..08f5f61 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -348,15 +348,14 @@ do {									\
 	old__.var = _oval;						\
 	new__.var = _nval;						\
 									\
-	asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu",		\
-			      "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
-		  : ALT_OUTPUT_SP([var] "+m" (__my_cpu_var(_var)),	\
-				  "+a" (old__.low),			\
-				  "+d" (old__.high))			\
-		  : "b" (new__.low),					\
-		    "c" (new__.high),					\
-		    "S" (&(_var))					\
-		  : "memory");						\
+	asm_inline qual (						\
+		ALTERNATIVE("call this_cpu_cmpxchg8b_emu",		\
+			    "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
+		: ALT_OUTPUT_SP([var] "+m" (__my_cpu_var(_var)),	\
+				"+a" (old__.low), "+d" (old__.high))	\
+		: "b" (new__.low), "c" (new__.high),			\
+		  "S" (&(_var))						\
+		: "memory");						\
 									\
 	old__.var;							\
 })
@@ -378,17 +377,16 @@ do {									\
 	old__.var = *_oval;						\
 	new__.var = _nval;						\
 									\
-	asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu",		\
-			      "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
-		  CC_SET(z)						\
-		  : ALT_OUTPUT_SP(CC_OUT(z) (success),			\
-				  [var] "+m" (__my_cpu_var(_var)),	\
-				  "+a" (old__.low),			\
-				  "+d" (old__.high))			\
-		  : "b" (new__.low),					\
-		    "c" (new__.high),					\
-		    "S" (&(_var))					\
-		  : "memory");						\
+	asm_inline qual (						\
+		ALTERNATIVE("call this_cpu_cmpxchg8b_emu",		\
+			    "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
+		CC_SET(z)						\
+		: ALT_OUTPUT_SP(CC_OUT(z) (success),			\
+				[var] "+m" (__my_cpu_var(_var)),	\
+				"+a" (old__.low), "+d" (old__.high))	\
+		: "b" (new__.low), "c" (new__.high),			\
+		  "S" (&(_var))						\
+		: "memory");						\
 	if (unlikely(!success))						\
 		*_oval = old__.var;					\
 									\
@@ -419,15 +417,14 @@ do {									\
 	old__.var = _oval;						\
 	new__.var = _nval;						\
 									\
-	asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu",		\
-			      "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
-		  : ALT_OUTPUT_SP([var] "+m" (__my_cpu_var(_var)),	\
-				  "+a" (old__.low),			\
-				  "+d" (old__.high))			\
-		  : "b" (new__.low),					\
-		    "c" (new__.high),					\
-		    "S" (&(_var))					\
-		  : "memory");						\
+	asm_inline qual (						\
+		ALTERNATIVE("call this_cpu_cmpxchg16b_emu",		\
+			    "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
+		: ALT_OUTPUT_SP([var] "+m" (__my_cpu_var(_var)),	\
+				"+a" (old__.low), "+d" (old__.high))	\
+		: "b" (new__.low), "c" (new__.high),			\
+		  "S" (&(_var))						\
+		: "memory");						\
 									\
 	old__.var;							\
 })
@@ -449,19 +446,19 @@ do {									\
 	old__.var = *_oval;						\
 	new__.var = _nval;						\
 									\
-	asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu",		\
-			      "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
-		  CC_SET(z)						\
-		  : ALT_OUTPUT_SP(CC_OUT(z) (success),			\
-				  [var] "+m" (__my_cpu_var(_var)),	\
-				  "+a" (old__.low),			\
-				  "+d" (old__.high))			\
-		  : "b" (new__.low),					\
-		    "c" (new__.high),					\
-		    "S" (&(_var))					\
-		  : "memory");						\
+	asm_inline qual (						\
+		ALTERNATIVE("call this_cpu_cmpxchg16b_emu",		\
+			    "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
+		CC_SET(z)						\
+		: ALT_OUTPUT_SP(CC_OUT(z) (success),			\
+				[var] "+m" (__my_cpu_var(_var)),	\
+				"+a" (old__.low), "+d" (old__.high))	\
+		: "b" (new__.low), "c" (new__.high),			\
+		  "S" (&(_var))						\
+		: "memory");						\
 	if (unlikely(!success))						\
 		*_oval = old__.var;					\
+									\
 	likely(success);						\
 })
 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ