http://google-perftools.googlecode.com/svn-history/r48/trunk/src/base/atomicops-internals-x86.cc says " // Opteron Rev E has a bug in which on very rare occasions a locked // instruction doesn't act as a read-acquire barrier if followed by a // non-locked read-modify-write instruction. Rev F has this bug in // pre-release versions, but not in versions released to customers, // so we test only for Rev E, which is family 15, model 32..63 inclusive. if (strcmp(vendor, "AuthenticAMD") == 0 && // AMD family == 15 && 32 <= model && model <= 63) { AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug = true; } else { AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug = false; } " There is no official AMD bug ID yet, but it seems to be reported in the field and a fix is present in Solaris code base. The following links shows the current understanding of the issue. http://bugzilla.kernel.org/show_bug.cgi?id=11305 http://bugs.mysql.com/bug.php?id=26081 Since I needed to put the alternative within assembly statements, I decided to rework alternative.h and fold the multiple alternative*() definitions into the same ALTERNATIVE() macro. It is done in the previous patch. Those were duplicated code anyway. AFAIK, this bug has not received any official bug ID from AMD. Solaris already have a workaround for this issue. One should probably audit Xen cmpxchg too. Signed-off-by: Mathieu Desnoyers CC: Andrew Morton CC: mark.langsdorf@amd.com CC: arekm@maven.pl CC: 'Ingo Molnar' CC: H. Peter Anvin CC: Andi Kleen CC: Avi Kivity --- arch/x86/include/asm/alternative.h | 5 +++++ arch/x86/include/asm/cmpxchg_64.h | 21 ++++++++++++++------- arch/x86/include/asm/cpufeature.h | 1 + arch/x86/include/asm/futex.h | 2 ++ arch/x86/include/asm/rwsem.h | 1 + arch/x86/include/asm/spinlock.h | 2 ++ arch/x86/kernel/cpu/amd.c | 4 ++++ 7 files changed, 29 insertions(+), 7 deletions(-) Index: linux-2.6-lttng/arch/x86/include/asm/cpufeature.h =================================================================== --- linux-2.6-lttng.orig/arch/x86/include/asm/cpufeature.h 2009-04-18 23:58:23.000000000 -0400 +++ linux-2.6-lttng/arch/x86/include/asm/cpufeature.h 2009-04-18 23:58:28.000000000 -0400 @@ -94,6 +94,7 @@ #define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */ #define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ #define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ +#define X86_FEATURE_NEED_CMPXCHG_LFENCE (3*32+26) /* Fix AMD cmpxchg lfence */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ Index: linux-2.6-lttng/arch/x86/kernel/cpu/amd.c =================================================================== --- linux-2.6-lttng.orig/arch/x86/kernel/cpu/amd.c 2009-04-18 23:58:23.000000000 -0400 +++ linux-2.6-lttng/arch/x86/kernel/cpu/amd.c 2009-04-18 23:58:28.000000000 -0400 @@ -416,6 +416,10 @@ static void __cpuinit init_amd(struct cp clear_cpu_cap(c, X86_FEATURE_MCE); #endif + /* Enable workaround for missing AMD cmpxchg lfence */ + if (c->x86 == 0xf && c->x86_model >= 32 && c->x86_model <= 63) + set_cpu_cap(c, X86_FEATURE_NEED_CMPXCHG_LFENCE); + /* Enable workaround for FXSAVE leak */ if (c->x86 >= 6) set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); Index: linux-2.6-lttng/arch/x86/include/asm/cmpxchg_64.h =================================================================== --- linux-2.6-lttng.orig/arch/x86/include/asm/cmpxchg_64.h 2009-04-18 23:58:23.000000000 -0400 +++ linux-2.6-lttng/arch/x86/include/asm/cmpxchg_64.h 2009-04-18 23:58:28.000000000 -0400 @@ -66,25 +66,29 @@ static inline unsigned long __cmpxchg(vo unsigned long prev; switch (size) { case 1: - asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2" + asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2\n\t" + CMPXCHG_LFENCE : "=a"(prev) : "q"(new), "m"(*__xg(ptr)), "0"(old) : "memory"); return prev; case 2: - asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2" + asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2\n\t" + CMPXCHG_LFENCE : "=a"(prev) : "r"(new), "m"(*__xg(ptr)), "0"(old) : "memory"); return prev; case 4: - asm volatile(LOCK_PREFIX "cmpxchgl %k1,%2" + asm volatile(LOCK_PREFIX "cmpxchgl %k1,%2\n\t" + CMPXCHG_LFENCE : "=a"(prev) : "r"(new), "m"(*__xg(ptr)), "0"(old) : "memory"); return prev; case 8: - asm volatile(LOCK_PREFIX "cmpxchgq %1,%2" + asm volatile(LOCK_PREFIX "cmpxchgq %1,%2\n\t" + CMPXCHG_LFENCE : "=a"(prev) : "r"(new), "m"(*__xg(ptr)), "0"(old) : "memory"); @@ -105,19 +109,22 @@ static inline unsigned long __sync_cmpxc unsigned long prev; switch (size) { case 1: - asm volatile("lock; cmpxchgb %b1,%2" + asm volatile("lock; cmpxchgb %b1,%2\n\t" + CMPXCHG_LFENCE : "=a"(prev) : "q"(new), "m"(*__xg(ptr)), "0"(old) : "memory"); return prev; case 2: - asm volatile("lock; cmpxchgw %w1,%2" + asm volatile("lock; cmpxchgw %w1,%2\n\t" + CMPXCHG_LFENCE : "=a"(prev) : "r"(new), "m"(*__xg(ptr)), "0"(old) : "memory"); return prev; case 4: - asm volatile("lock; cmpxchgl %1,%2" + asm volatile("lock; cmpxchgl %1,%2\n\t" + CMPXCHG_LFENCE : "=a"(prev) : "r"(new), "m"(*__xg(ptr)), "0"(old) : "memory"); Index: linux-2.6-lttng/arch/x86/include/asm/alternative.h =================================================================== --- linux-2.6-lttng.orig/arch/x86/include/asm/alternative.h 2009-04-18 23:58:23.000000000 -0400 +++ linux-2.6-lttng/arch/x86/include/asm/alternative.h 2009-04-18 23:58:28.000000000 -0400 @@ -5,6 +5,7 @@ #include #include #include +#include /* * Alternative inline assembly for SMP. @@ -54,6 +55,10 @@ struct alt_instr { #endif }; +/* Needed both in UP and SMP build because of cmpxchg_sync */ +#define CMPXCHG_LFENCE ALTERNATIVE(ASM_NOP3, "lfence", \ + X86_FEATURE_NEED_CMPXCHG_LFENCE) + extern void alternative_instructions(void); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); Index: linux-2.6-lttng/arch/x86/include/asm/spinlock.h =================================================================== --- linux-2.6-lttng.orig/arch/x86/include/asm/spinlock.h 2009-04-18 23:58:23.000000000 -0400 +++ linux-2.6-lttng/arch/x86/include/asm/spinlock.h 2009-04-18 23:58:28.000000000 -0400 @@ -86,6 +86,7 @@ static __always_inline int __ticket_spin "leal 0x100(%" REG_PTR_MODE "0), %1\n\t" "jne 1f\n\t" LOCK_PREFIX "cmpxchgw %w1,%2\n\t" + CMPXCHG_LFENCE "\n\t" "1:" "sete %b1\n\t" "movzbl %b1,%0\n\t" @@ -139,6 +140,7 @@ static __always_inline int __ticket_spin "leal 0x00010000(%" REG_PTR_MODE "0), %1\n\t" "jne 1f\n\t" LOCK_PREFIX "cmpxchgl %1,%2\n\t" + CMPXCHG_LFENCE "\n\t" "1:" "sete %b1\n\t" "movzbl %b1,%0\n\t" Index: linux-2.6-lttng/arch/x86/include/asm/rwsem.h =================================================================== --- linux-2.6-lttng.orig/arch/x86/include/asm/rwsem.h 2009-04-18 23:58:23.000000000 -0400 +++ linux-2.6-lttng/arch/x86/include/asm/rwsem.h 2009-04-18 23:58:28.000000000 -0400 @@ -129,6 +129,7 @@ static inline int __down_read_trylock(st " addl %3,%2\n\t" " jle 2f\n\t" LOCK_PREFIX " cmpxchgl %2,%0\n\t" + CMPXCHG_LFENCE "\n\t" " jnz 1b\n\t" "2:\n\t" "# ending __down_read_trylock\n\t" Index: linux-2.6-lttng/arch/x86/include/asm/futex.h =================================================================== --- linux-2.6-lttng.orig/arch/x86/include/asm/futex.h 2009-04-18 23:58:23.000000000 -0400 +++ linux-2.6-lttng/arch/x86/include/asm/futex.h 2009-04-18 23:58:28.000000000 -0400 @@ -26,6 +26,7 @@ "\tmovl\t%0, %3\n" \ "\t" insn "\n" \ "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \ + CMPXCHG_LFENCE "\n" \ "\tjnz\t1b\n" \ "3:\t.section .fixup,\"ax\"\n" \ "4:\tmov\t%5, %1\n" \ @@ -123,6 +124,7 @@ static inline int futex_atomic_cmpxchg_i return -EFAULT; asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n" + CMPXCHG_LFENCE "\n" "2:\t.section .fixup, \"ax\"\n" "3:\tmov %2, %0\n" "\tjmp 2b\n" -- Mathieu Desnoyers OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/