linux-kernel - [118/205] ARM: 6212/1: atomic ops: add memory constraints to inline asm

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20100730175144.669450417@clark.site>
Date:	Fri, 30 Jul 2010 10:52:15 -0700
From:	Greg KH <gregkh@...e.de>
To:	linux-kernel@...r.kernel.org, stable@...nel.org
Cc:	stable-review@...nel.org, torvalds@...ux-foundation.org,
	akpm@...ux-foundation.org, alan@...rguk.ukuu.org.uk,
	Will Deacon <will.deacon@....com>,
	Russell King <rmk+kernel@....linux.org.uk>
Subject: [118/205] ARM: 6212/1: atomic ops: add memory constraints to inline asm

2.6.34-stable review patch.  If anyone has any objections, please let us know.

------------------

From: Will Deacon <will.deacon@....com>

commit 398aa66827155ef52bab58bebd24597d90968929 upstream.

Currently, the 32-bit and 64-bit atomic operations on ARM do not
include memory constraints in the inline assembly blocks. In the
case of barrier-less operations [for example, atomic_add], this
means that the compiler may constant fold values which have actually
been modified by a call to an atomic operation.

This issue can be observed in the atomic64_test routine in
<kernel root>/lib/atomic64_test.c:

00000000 <test_atomic64>:
   0:	e1a0c00d 	mov	ip, sp
   4:	e92dd830 	push	{r4, r5, fp, ip, lr, pc}
   8:	e24cb004 	sub	fp, ip, #4
   c:	e24dd008 	sub	sp, sp, #8
  10:	e24b3014 	sub	r3, fp, #20
  14:	e30d000d 	movw	r0, #53261	; 0xd00d
  18:	e3011337 	movw	r1, #4919	; 0x1337
  1c:	e34c0001 	movt	r0, #49153	; 0xc001
  20:	e34a1aa3 	movt	r1, #43683	; 0xaaa3
  24:	e16300f8 	strd	r0, [r3, #-8]!
  28:	e30c0afe 	movw	r0, #51966	; 0xcafe
  2c:	e30b1eef 	movw	r1, #48879	; 0xbeef
  30:	e34d0eaf 	movt	r0, #57007	; 0xdeaf
  34:	e34d1ead 	movt	r1, #57005	; 0xdead
  38:	e1b34f9f 	ldrexd	r4, [r3]
  3c:	e1a34f90 	strexd	r4, r0, [r3]
  40:	e3340000 	teq	r4, #0
  44:	1afffffb 	bne	38 <test_atomic64+0x38>
  48:	e59f0004 	ldr	r0, [pc, #4]	; 54 <test_atomic64+0x54>
  4c:	e3a0101e 	mov	r1, #30
  50:	ebfffffe 	bl	0 <__bug>
  54:	00000000 	.word	0x00000000

The atomic64_set (0x38-0x44) writes to the atomic64_t, but the
compiler doesn't see this, assumes the test condition is always
false and generates an unconditional branch to __bug. The rest of the
test is optimised away.

This patch adds suitable memory constraints to the atomic operations on ARM
to ensure that the compiler is informed of the correct data hazards. We have
to use the "Qo" constraints to avoid hitting the GCC anomaly described at
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=44492 , where the compiler
makes assumptions about the writeback in the addressing mode used by the
inline assembly. These constraints forbid the use of auto{inc,dec} addressing
modes, so it doesn't matter if we don't use the operand exactly once.

Reviewed-by: Nicolas Pitre <nicolas.pitre@...aro.org>
Signed-off-by: Will Deacon <will.deacon@....com>
Signed-off-by: Russell King <rmk+kernel@....linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@...e.de>

---
 arch/arm/include/asm/atomic.h |  132 +++++++++++++++++++++---------------------
 1 file changed, 66 insertions(+), 66 deletions(-)

--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -40,12 +40,12 @@ static inline void atomic_add(int i, ato
 	int result;
 
 	__asm__ __volatile__("@ atomic_add\n"
-"1:	ldrex	%0, [%2]\n"
-"	add	%0, %0, %3\n"
-"	strex	%1, %0, [%2]\n"
+"1:	ldrex	%0, [%3]\n"
+"	add	%0, %0, %4\n"
+"	strex	%1, %0, [%3]\n"
 "	teq	%1, #0\n"
 "	bne	1b"
-	: "=&r" (result), "=&r" (tmp)
+	: "=&r" (result), "=&r" (tmp), "+Qo" (v->counter)
 	: "r" (&v->counter), "Ir" (i)
 	: "cc");
 }
@@ -58,12 +58,12 @@ static inline int atomic_add_return(int
 	smp_mb();
 
 	__asm__ __volatile__("@ atomic_add_return\n"
-"1:	ldrex	%0, [%2]\n"
-"	add	%0, %0, %3\n"
-"	strex	%1, %0, [%2]\n"
+"1:	ldrex	%0, [%3]\n"
+"	add	%0, %0, %4\n"
+"	strex	%1, %0, [%3]\n"
 "	teq	%1, #0\n"
 "	bne	1b"
-	: "=&r" (result), "=&r" (tmp)
+	: "=&r" (result), "=&r" (tmp), "+Qo" (v->counter)
 	: "r" (&v->counter), "Ir" (i)
 	: "cc");
 
@@ -78,12 +78,12 @@ static inline void atomic_sub(int i, ato
 	int result;
 
 	__asm__ __volatile__("@ atomic_sub\n"
-"1:	ldrex	%0, [%2]\n"
-"	sub	%0, %0, %3\n"
-"	strex	%1, %0, [%2]\n"
+"1:	ldrex	%0, [%3]\n"
+"	sub	%0, %0, %4\n"
+"	strex	%1, %0, [%3]\n"
 "	teq	%1, #0\n"
 "	bne	1b"
-	: "=&r" (result), "=&r" (tmp)
+	: "=&r" (result), "=&r" (tmp), "+Qo" (v->counter)
 	: "r" (&v->counter), "Ir" (i)
 	: "cc");
 }
@@ -96,12 +96,12 @@ static inline int atomic_sub_return(int
 	smp_mb();
 
 	__asm__ __volatile__("@ atomic_sub_return\n"
-"1:	ldrex	%0, [%2]\n"
-"	sub	%0, %0, %3\n"
-"	strex	%1, %0, [%2]\n"
+"1:	ldrex	%0, [%3]\n"
+"	sub	%0, %0, %4\n"
+"	strex	%1, %0, [%3]\n"
 "	teq	%1, #0\n"
 "	bne	1b"
-	: "=&r" (result), "=&r" (tmp)
+	: "=&r" (result), "=&r" (tmp), "+Qo" (v->counter)
 	: "r" (&v->counter), "Ir" (i)
 	: "cc");
 
@@ -118,11 +118,11 @@ static inline int atomic_cmpxchg(atomic_
 
 	do {
 		__asm__ __volatile__("@ atomic_cmpxchg\n"
-		"ldrex	%1, [%2]\n"
+		"ldrex	%1, [%3]\n"
 		"mov	%0, #0\n"
-		"teq	%1, %3\n"
-		"strexeq %0, %4, [%2]\n"
-		    : "=&r" (res), "=&r" (oldval)
+		"teq	%1, %4\n"
+		"strexeq %0, %5, [%3]\n"
+		    : "=&r" (res), "=&r" (oldval), "+Qo" (ptr->counter)
 		    : "r" (&ptr->counter), "Ir" (old), "r" (new)
 		    : "cc");
 	} while (res);
@@ -137,12 +137,12 @@ static inline void atomic_clear_mask(uns
 	unsigned long tmp, tmp2;
 
 	__asm__ __volatile__("@ atomic_clear_mask\n"
-"1:	ldrex	%0, [%2]\n"
-"	bic	%0, %0, %3\n"
-"	strex	%1, %0, [%2]\n"
+"1:	ldrex	%0, [%3]\n"
+"	bic	%0, %0, %4\n"
+"	strex	%1, %0, [%3]\n"
 "	teq	%1, #0\n"
 "	bne	1b"
-	: "=&r" (tmp), "=&r" (tmp2)
+	: "=&r" (tmp), "=&r" (tmp2), "+Qo" (*addr)
 	: "r" (addr), "Ir" (mask)
 	: "cc");
 }
@@ -249,7 +249,7 @@ static inline u64 atomic64_read(atomic64
 	__asm__ __volatile__("@ atomic64_read\n"
 "	ldrexd	%0, %H0, [%1]"
 	: "=&r" (result)
-	: "r" (&v->counter)
+	: "r" (&v->counter), "Qo" (v->counter)
 	);
 
 	return result;
@@ -260,11 +260,11 @@ static inline void atomic64_set(atomic64
 	u64 tmp;
 
 	__asm__ __volatile__("@ atomic64_set\n"
-"1:	ldrexd	%0, %H0, [%1]\n"
-"	strexd	%0, %2, %H2, [%1]\n"
+"1:	ldrexd	%0, %H0, [%2]\n"
+"	strexd	%0, %3, %H3, [%2]\n"
 "	teq	%0, #0\n"
 "	bne	1b"
-	: "=&r" (tmp)
+	: "=&r" (tmp), "=Qo" (v->counter)
 	: "r" (&v->counter), "r" (i)
 	: "cc");
 }
@@ -275,13 +275,13 @@ static inline void atomic64_add(u64 i, a
 	unsigned long tmp;
 
 	__asm__ __volatile__("@ atomic64_add\n"
-"1:	ldrexd	%0, %H0, [%2]\n"
-"	adds	%0, %0, %3\n"
-"	adc	%H0, %H0, %H3\n"
-"	strexd	%1, %0, %H0, [%2]\n"
+"1:	ldrexd	%0, %H0, [%3]\n"
+"	adds	%0, %0, %4\n"
+"	adc	%H0, %H0, %H4\n"
+"	strexd	%1, %0, %H0, [%3]\n"
 "	teq	%1, #0\n"
 "	bne	1b"
-	: "=&r" (result), "=&r" (tmp)
+	: "=&r" (result), "=&r" (tmp), "+Qo" (v->counter)
 	: "r" (&v->counter), "r" (i)
 	: "cc");
 }
@@ -294,13 +294,13 @@ static inline u64 atomic64_add_return(u6
 	smp_mb();
 
 	__asm__ __volatile__("@ atomic64_add_return\n"
-"1:	ldrexd	%0, %H0, [%2]\n"
-"	adds	%0, %0, %3\n"
-"	adc	%H0, %H0, %H3\n"
-"	strexd	%1, %0, %H0, [%2]\n"
+"1:	ldrexd	%0, %H0, [%3]\n"
+"	adds	%0, %0, %4\n"
+"	adc	%H0, %H0, %H4\n"
+"	strexd	%1, %0, %H0, [%3]\n"
 "	teq	%1, #0\n"
 "	bne	1b"
-	: "=&r" (result), "=&r" (tmp)
+	: "=&r" (result), "=&r" (tmp), "+Qo" (v->counter)
 	: "r" (&v->counter), "r" (i)
 	: "cc");
 
@@ -315,13 +315,13 @@ static inline void atomic64_sub(u64 i, a
 	unsigned long tmp;
 
 	__asm__ __volatile__("@ atomic64_sub\n"
-"1:	ldrexd	%0, %H0, [%2]\n"
-"	subs	%0, %0, %3\n"
-"	sbc	%H0, %H0, %H3\n"
-"	strexd	%1, %0, %H0, [%2]\n"
+"1:	ldrexd	%0, %H0, [%3]\n"
+"	subs	%0, %0, %4\n"
+"	sbc	%H0, %H0, %H4\n"
+"	strexd	%1, %0, %H0, [%3]\n"
 "	teq	%1, #0\n"
 "	bne	1b"
-	: "=&r" (result), "=&r" (tmp)
+	: "=&r" (result), "=&r" (tmp), "+Qo" (v->counter)
 	: "r" (&v->counter), "r" (i)
 	: "cc");
 }
@@ -334,13 +334,13 @@ static inline u64 atomic64_sub_return(u6
 	smp_mb();
 
 	__asm__ __volatile__("@ atomic64_sub_return\n"
-"1:	ldrexd	%0, %H0, [%2]\n"
-"	subs	%0, %0, %3\n"
-"	sbc	%H0, %H0, %H3\n"
-"	strexd	%1, %0, %H0, [%2]\n"
+"1:	ldrexd	%0, %H0, [%3]\n"
+"	subs	%0, %0, %4\n"
+"	sbc	%H0, %H0, %H4\n"
+"	strexd	%1, %0, %H0, [%3]\n"
 "	teq	%1, #0\n"
 "	bne	1b"
-	: "=&r" (result), "=&r" (tmp)
+	: "=&r" (result), "=&r" (tmp), "+Qo" (v->counter)
 	: "r" (&v->counter), "r" (i)
 	: "cc");
 
@@ -358,12 +358,12 @@ static inline u64 atomic64_cmpxchg(atomi
 
 	do {
 		__asm__ __volatile__("@ atomic64_cmpxchg\n"
-		"ldrexd		%1, %H1, [%2]\n"
+		"ldrexd		%1, %H1, [%3]\n"
 		"mov		%0, #0\n"
-		"teq		%1, %3\n"
-		"teqeq		%H1, %H3\n"
-		"strexdeq	%0, %4, %H4, [%2]"
-		: "=&r" (res), "=&r" (oldval)
+		"teq		%1, %4\n"
+		"teqeq		%H1, %H4\n"
+		"strexdeq	%0, %5, %H5, [%3]"
+		: "=&r" (res), "=&r" (oldval), "+Qo" (ptr->counter)
 		: "r" (&ptr->counter), "r" (old), "r" (new)
 		: "cc");
 	} while (res);
@@ -381,11 +381,11 @@ static inline u64 atomic64_xchg(atomic64
 	smp_mb();
 
 	__asm__ __volatile__("@ atomic64_xchg\n"
-"1:	ldrexd	%0, %H0, [%2]\n"
-"	strexd	%1, %3, %H3, [%2]\n"
+"1:	ldrexd	%0, %H0, [%3]\n"
+"	strexd	%1, %4, %H4, [%3]\n"
 "	teq	%1, #0\n"
 "	bne	1b"
-	: "=&r" (result), "=&r" (tmp)
+	: "=&r" (result), "=&r" (tmp), "+Qo" (ptr->counter)
 	: "r" (&ptr->counter), "r" (new)
 	: "cc");
 
@@ -402,16 +402,16 @@ static inline u64 atomic64_dec_if_positi
 	smp_mb();
 
 	__asm__ __volatile__("@ atomic64_dec_if_positive\n"
-"1:	ldrexd	%0, %H0, [%2]\n"
+"1:	ldrexd	%0, %H0, [%3]\n"
 "	subs	%0, %0, #1\n"
 "	sbc	%H0, %H0, #0\n"
 "	teq	%H0, #0\n"
 "	bmi	2f\n"
-"	strexd	%1, %0, %H0, [%2]\n"
+"	strexd	%1, %0, %H0, [%3]\n"
 "	teq	%1, #0\n"
 "	bne	1b\n"
 "2:"
-	: "=&r" (result), "=&r" (tmp)
+	: "=&r" (result), "=&r" (tmp), "+Qo" (v->counter)
 	: "r" (&v->counter)
 	: "cc");
 
@@ -429,18 +429,18 @@ static inline int atomic64_add_unless(at
 	smp_mb();
 
 	__asm__ __volatile__("@ atomic64_add_unless\n"
-"1:	ldrexd	%0, %H0, [%3]\n"
-"	teq	%0, %4\n"
-"	teqeq	%H0, %H4\n"
+"1:	ldrexd	%0, %H0, [%4]\n"
+"	teq	%0, %5\n"
+"	teqeq	%H0, %H5\n"
 "	moveq	%1, #0\n"
 "	beq	2f\n"
-"	adds	%0, %0, %5\n"
-"	adc	%H0, %H0, %H5\n"
-"	strexd	%2, %0, %H0, [%3]\n"
+"	adds	%0, %0, %6\n"
+"	adc	%H0, %H0, %H6\n"
+"	strexd	%2, %0, %H0, [%4]\n"
 "	teq	%2, #0\n"
 "	bne	1b\n"
 "2:"
-	: "=&r" (val), "+r" (ret), "=&r" (tmp)
+	: "=&r" (val), "+r" (ret), "=&r" (tmp), "+Qo" (v->counter)
 	: "r" (&v->counter), "r" (u), "r" (a)
 	: "cc");
 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/