lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Date: Wed, 18 Jan 2012 14:24:11 +0000 From: "Jan Beulich" <JBeulich@...e.com> To: <mingo@...e.hu>, <tglx@...utronix.de>, <hpa@...or.com> Cc: "eric.dumazet@...il.com" <eric.dumazet@...il.com>, <luca@...a-barbieri.com>, <linux-kernel@...r.kernel.org> Subject: [PATCH 2/2] ix86: atomic64 assembly improvements The cmpxchg8b variants of "set" and "xchg" are really identical, and hence don't need to be repeated: %ebx and %ecx don't need to be copied into %eax and %edx respectively (this is only necessary when desiring to only read the stored value), and the LOCK prefix should also be used in "set" (other than the comment that is now being removed was saying, there is - to my knowledge - no *architectural* guarantee that aligned 64-bit writes would always be carried out atomically). In the "add_unless" implementation, swapping the use of %ecx and %esi for passing arguments allows %esi to become an input only (i.e. permitting the register to be re-used to address the same object without reload). In "{add,sub}_return", doing the initial read64 through the passed in %ecx decreases a register dependency. In "inc_not_zero", a branch can be eliminated by or-ing together the two halves of the current (64-bit) value, and code size can be further reduced by adjusting the arithmetic slightly. Signed-off-by: Jan Beulich <jbeulich@...e.com> Cc: Luca Barbieri <luca@...a-barbieri.com> Cc: Eric Dumazet <eric.dumazet@...il.com> --- arch/x86/include/asm/atomic64_32.h | 13 +++++------ arch/x86/lib/atomic64_386_32.S | 6 ++--- arch/x86/lib/atomic64_cx8_32.S | 42 +++++++++---------------------------- 3 files changed, 20 insertions(+), 41 deletions(-) --- tip-i386-atomic64.orig/arch/x86/include/asm/atomic64_32.h +++ tip-i386-atomic64/arch/x86/include/asm/atomic64_32.h @@ -36,6 +36,7 @@ typedef struct { #define ATOMIC64_EXPORT(sym) __ATOMIC64_EXPORT(sym##_cx8); \ __ATOMIC64_EXPORT(sym##_386) +__ATOMIC64_EXPORT(set_386); __ATOMIC64_EXPORT(add_386); __ATOMIC64_EXPORT(sub_386); __ATOMIC64_EXPORT(inc_386); @@ -46,7 +47,6 @@ __ATOMIC64_EXPORT(dec_386); __alternative_atomic64(f, f, ASM_OUTPUT2(out), ## in) ATOMIC64_EXPORT(read); -ATOMIC64_EXPORT(set); ATOMIC64_EXPORT(xchg); ATOMIC64_EXPORT(add_return); ATOMIC64_EXPORT(sub_return); @@ -104,9 +104,9 @@ static inline void atomic64_set(atomic64 { unsigned high = (unsigned)(i >> 32); unsigned low = (unsigned)i; - alternative_atomic64(set, /* no output */, - "S" (v), "b" (low), "c" (high) - : "eax", "edx", "memory"); + __alternative_atomic64(set, xchg, /* no output */, + "S" (v), "b" (low), "c" (high) + : "eax", "edx", "memory"); } /** @@ -286,9 +286,8 @@ static inline int atomic64_add_unless(at unsigned low = (unsigned)u; unsigned high = (unsigned)(u >> 32); alternative_atomic64(add_unless, - ASM_OUTPUT2("+A" (a), "+c" (v), - "+S" (low), "+D" (high)), - ASM_NO_INPUT_CLOBBER("memory")); + ASM_OUTPUT2("+A" (a), "+c" (low), "+D" (high)), + "S" (v) : "memory"); return (int)a; } --- tip-i386-atomic64.orig/arch/x86/lib/atomic64_386_32.S +++ tip-i386-atomic64/arch/x86/lib/atomic64_386_32.S @@ -137,13 +137,13 @@ BEGIN(dec_return) RET_ENDP #undef v -#define v %ecx +#define v %esi BEGIN(add_unless) - addl %eax, %esi + addl %eax, %ecx adcl %edx, %edi addl (v), %eax adcl 4(v), %edx - cmpl %eax, %esi + cmpl %eax, %ecx je 3f 1: movl %eax, (v) --- tip-i386-atomic64.orig/arch/x86/lib/atomic64_cx8_32.S +++ tip-i386-atomic64/arch/x86/lib/atomic64_cx8_32.S @@ -39,24 +39,9 @@ ENTRY(atomic64_read_cx8) CFI_ENDPROC ENDPROC(atomic64_read_cx8) -ENTRY(atomic64_set_cx8) - CFI_STARTPROC - -1: -/* we don't need LOCK_PREFIX since aligned 64-bit writes - * are atomic on 586 and newer */ - cmpxchg8b (%esi) - jne 1b - - ret - CFI_ENDPROC -ENDPROC(atomic64_set_cx8) - ENTRY(atomic64_xchg_cx8) CFI_STARTPROC - movl %ebx, %eax - movl %ecx, %edx 1: LOCK_PREFIX cmpxchg8b (%esi) @@ -78,7 +63,7 @@ ENTRY(atomic64_\func\()_return_cx8) movl %edx, %edi movl %ecx, %ebp - read64 %ebp + read64 %ecx 1: movl %eax, %ebx movl %edx, %ecx @@ -159,23 +144,22 @@ ENTRY(atomic64_add_unless_cx8) SAVE ebx /* these just push these two parameters on the stack */ SAVE edi - SAVE esi + SAVE ecx - movl %ecx, %ebp - movl %eax, %esi + movl %eax, %ebp movl %edx, %edi - read64 %ebp + read64 %esi 1: cmpl %eax, 0(%esp) je 4f 2: movl %eax, %ebx movl %edx, %ecx - addl %esi, %ebx + addl %ebp, %ebx adcl %edi, %ecx LOCK_PREFIX - cmpxchg8b (%ebp) + cmpxchg8b (%esi) jne 1b movl $1, %eax @@ -199,13 +183,13 @@ ENTRY(atomic64_inc_not_zero_cx8) read64 %esi 1: - testl %eax, %eax - je 4f -2: + movl %eax, %ecx + orl %edx, %ecx + jz 3f movl %eax, %ebx - movl %edx, %ecx + xorl %ecx, %ecx addl $1, %ebx - adcl $0, %ecx + adcl %edx, %ecx LOCK_PREFIX cmpxchg8b (%esi) jne 1b @@ -214,9 +198,5 @@ ENTRY(atomic64_inc_not_zero_cx8) 3: RESTORE ebx ret -4: - testl %edx, %edx - jne 2b - jmp 3b CFI_ENDPROC ENDPROC(atomic64_inc_not_zero_cx8) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@...r.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists