[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20161026122308.954716671@linuxfoundation.org>
Date: Wed, 26 Oct 2016 14:23:22 +0200
From: Greg Kroah-Hartman <gregkh@...uxfoundation.org>
To: linux-kernel@...r.kernel.org
Cc: Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
stable@...r.kernel.org, Mark Rutland <mark.rutland@....com>,
Will Deacon <will.deacon@....com>
Subject: [PATCH 4.4 099/112] arm64: percpu: rewrite ll/sc loops in assembly
4.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Will Deacon <will.deacon@....com>
commit 1e6e57d9b34a9075d5f9e2048ea7b09756590d11 upstream.
Writing the outer loop of an LL/SC sequence using do {...} while
constructs potentially allows the compiler to hoist memory accesses
between the STXR and the branch back to the LDXR. On CPUs that do not
guarantee forward progress of LL/SC loops when faced with memory
accesses to the same ERG (up to 2k) between the failed STXR and the
branch back, we may end up livelocking.
This patch avoids this issue in our percpu atomics by rewriting the
outer loop as part of the LL/SC inline assembly block.
Fixes: f97fc810798c ("arm64: percpu: Implement this_cpu operations")
Reviewed-by: Mark Rutland <mark.rutland@....com>
Tested-by: Mark Rutland <mark.rutland@....com>
Signed-off-by: Will Deacon <will.deacon@....com>
Signed-off-by: Greg Kroah-Hartman <gregkh@...uxfoundation.org>
---
arch/arm64/include/asm/percpu.h | 120 ++++++++++++++++++----------------------
1 file changed, 56 insertions(+), 64 deletions(-)
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -44,48 +44,44 @@ static inline unsigned long __percpu_##o
\
switch (size) { \
case 1: \
- do { \
- asm ("//__per_cpu_" #op "_1\n" \
- "ldxrb %w[ret], %[ptr]\n" \
+ asm ("//__per_cpu_" #op "_1\n" \
+ "1: ldxrb %w[ret], %[ptr]\n" \
#asm_op " %w[ret], %w[ret], %w[val]\n" \
- "stxrb %w[loop], %w[ret], %[ptr]\n" \
- : [loop] "=&r" (loop), [ret] "=&r" (ret), \
- [ptr] "+Q"(*(u8 *)ptr) \
- : [val] "Ir" (val)); \
- } while (loop); \
+ " stxrb %w[loop], %w[ret], %[ptr]\n" \
+ " cbnz %w[loop], 1b" \
+ : [loop] "=&r" (loop), [ret] "=&r" (ret), \
+ [ptr] "+Q"(*(u8 *)ptr) \
+ : [val] "Ir" (val)); \
break; \
case 2: \
- do { \
- asm ("//__per_cpu_" #op "_2\n" \
- "ldxrh %w[ret], %[ptr]\n" \
+ asm ("//__per_cpu_" #op "_2\n" \
+ "1: ldxrh %w[ret], %[ptr]\n" \
#asm_op " %w[ret], %w[ret], %w[val]\n" \
- "stxrh %w[loop], %w[ret], %[ptr]\n" \
- : [loop] "=&r" (loop), [ret] "=&r" (ret), \
- [ptr] "+Q"(*(u16 *)ptr) \
- : [val] "Ir" (val)); \
- } while (loop); \
+ " stxrh %w[loop], %w[ret], %[ptr]\n" \
+ " cbnz %w[loop], 1b" \
+ : [loop] "=&r" (loop), [ret] "=&r" (ret), \
+ [ptr] "+Q"(*(u16 *)ptr) \
+ : [val] "Ir" (val)); \
break; \
case 4: \
- do { \
- asm ("//__per_cpu_" #op "_4\n" \
- "ldxr %w[ret], %[ptr]\n" \
+ asm ("//__per_cpu_" #op "_4\n" \
+ "1: ldxr %w[ret], %[ptr]\n" \
#asm_op " %w[ret], %w[ret], %w[val]\n" \
- "stxr %w[loop], %w[ret], %[ptr]\n" \
- : [loop] "=&r" (loop), [ret] "=&r" (ret), \
- [ptr] "+Q"(*(u32 *)ptr) \
- : [val] "Ir" (val)); \
- } while (loop); \
+ " stxr %w[loop], %w[ret], %[ptr]\n" \
+ " cbnz %w[loop], 1b" \
+ : [loop] "=&r" (loop), [ret] "=&r" (ret), \
+ [ptr] "+Q"(*(u32 *)ptr) \
+ : [val] "Ir" (val)); \
break; \
case 8: \
- do { \
- asm ("//__per_cpu_" #op "_8\n" \
- "ldxr %[ret], %[ptr]\n" \
+ asm ("//__per_cpu_" #op "_8\n" \
+ "1: ldxr %[ret], %[ptr]\n" \
#asm_op " %[ret], %[ret], %[val]\n" \
- "stxr %w[loop], %[ret], %[ptr]\n" \
- : [loop] "=&r" (loop), [ret] "=&r" (ret), \
- [ptr] "+Q"(*(u64 *)ptr) \
- : [val] "Ir" (val)); \
- } while (loop); \
+ " stxr %w[loop], %[ret], %[ptr]\n" \
+ " cbnz %w[loop], 1b" \
+ : [loop] "=&r" (loop), [ret] "=&r" (ret), \
+ [ptr] "+Q"(*(u64 *)ptr) \
+ : [val] "Ir" (val)); \
break; \
default: \
BUILD_BUG(); \
@@ -150,44 +146,40 @@ static inline unsigned long __percpu_xch
switch (size) {
case 1:
- do {
- asm ("//__percpu_xchg_1\n"
- "ldxrb %w[ret], %[ptr]\n"
- "stxrb %w[loop], %w[val], %[ptr]\n"
- : [loop] "=&r"(loop), [ret] "=&r"(ret),
- [ptr] "+Q"(*(u8 *)ptr)
- : [val] "r" (val));
- } while (loop);
+ asm ("//__percpu_xchg_1\n"
+ "1: ldxrb %w[ret], %[ptr]\n"
+ " stxrb %w[loop], %w[val], %[ptr]\n"
+ " cbnz %w[loop], 1b"
+ : [loop] "=&r"(loop), [ret] "=&r"(ret),
+ [ptr] "+Q"(*(u8 *)ptr)
+ : [val] "r" (val));
break;
case 2:
- do {
- asm ("//__percpu_xchg_2\n"
- "ldxrh %w[ret], %[ptr]\n"
- "stxrh %w[loop], %w[val], %[ptr]\n"
- : [loop] "=&r"(loop), [ret] "=&r"(ret),
- [ptr] "+Q"(*(u16 *)ptr)
- : [val] "r" (val));
- } while (loop);
+ asm ("//__percpu_xchg_2\n"
+ "1: ldxrh %w[ret], %[ptr]\n"
+ " stxrh %w[loop], %w[val], %[ptr]\n"
+ " cbnz %w[loop], 1b"
+ : [loop] "=&r"(loop), [ret] "=&r"(ret),
+ [ptr] "+Q"(*(u16 *)ptr)
+ : [val] "r" (val));
break;
case 4:
- do {
- asm ("//__percpu_xchg_4\n"
- "ldxr %w[ret], %[ptr]\n"
- "stxr %w[loop], %w[val], %[ptr]\n"
- : [loop] "=&r"(loop), [ret] "=&r"(ret),
- [ptr] "+Q"(*(u32 *)ptr)
- : [val] "r" (val));
- } while (loop);
+ asm ("//__percpu_xchg_4\n"
+ "1: ldxr %w[ret], %[ptr]\n"
+ " stxr %w[loop], %w[val], %[ptr]\n"
+ " cbnz %w[loop], 1b"
+ : [loop] "=&r"(loop), [ret] "=&r"(ret),
+ [ptr] "+Q"(*(u32 *)ptr)
+ : [val] "r" (val));
break;
case 8:
- do {
- asm ("//__percpu_xchg_8\n"
- "ldxr %[ret], %[ptr]\n"
- "stxr %w[loop], %[val], %[ptr]\n"
- : [loop] "=&r"(loop), [ret] "=&r"(ret),
- [ptr] "+Q"(*(u64 *)ptr)
- : [val] "r" (val));
- } while (loop);
+ asm ("//__percpu_xchg_8\n"
+ "1: ldxr %[ret], %[ptr]\n"
+ " stxr %w[loop], %[val], %[ptr]\n"
+ " cbnz %w[loop], 1b"
+ : [loop] "=&r"(loop), [ret] "=&r"(ret),
+ [ptr] "+Q"(*(u64 *)ptr)
+ : [val] "r" (val));
break;
default:
BUILD_BUG();
Powered by blists - more mailing lists