lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <alpine.DEB.2.00.1012081207250.26943@router.home>
Date:	Wed, 8 Dec 2010 12:08:15 -0600 (CST)
From:	Christoph Lameter <cl@...ux.com>
To:	Tejun Heo <tj@...nel.org>
cc:	akpm@...ux-foundation.org, Pekka Enberg <penberg@...helsinki.fi>,
	linux-kernel@...r.kernel.org,
	Eric Dumazet <eric.dumazet@...il.com>,
	Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
Subject: Re: [cpuops cmpxchg V1 2/4] x86: this_cpu_cmpxchg and this_cpu_xchg
 operations

Alternate approach: Could also use cmpxchg for xchg..


Subject: cpuops: Use cmpxchg for xchg to avoid lock semantics

Cmpxchg has a lower cycle count due to the implied lock semantics of xchg.

Simulate the xchg through cmpxchg for the cpu ops.

Signed-off-by: Christoph Lameter <cl@...ux.com>

---
 arch/x86/include/asm/percpu.h |   68 +++++++-----------------------------------
 1 file changed, 12 insertions(+), 56 deletions(-)

Index: linux-2.6/arch/x86/include/asm/percpu.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/percpu.h	2010-12-08 11:43:50.000000000 -0600
+++ linux-2.6/arch/x86/include/asm/percpu.h	2010-12-08 12:00:21.000000000 -0600
@@ -212,48 +212,6 @@ do {									\
 	ret__;								\
 })

-/*
- * Beware: xchg on x86 has an implied lock prefix. There will be the cost of
- * full lock semantics even though they are not needed.
- */
-#define percpu_xchg_op(var, nval)					\
-({									\
-	typeof(var) __ret;						\
-	typeof(var) __new = (nval);					\
-	switch (sizeof(var)) {						\
-	case 1:								\
-		asm("xchgb %2, "__percpu_arg(1)			\
-			    : "=a" (__ret), "+m" (var)			\
-			    : "q" (__new)				\
-			    : "memory");				\
-		break;							\
-	case 2:								\
-		asm("xchgw %2, "__percpu_arg(1)			\
-			    : "=a" (__ret), "+m" (var)			\
-			    : "r" (__new)				\
-			    : "memory");				\
-		break;							\
-	case 4:								\
-		asm("xchgl %2, "__percpu_arg(1)			\
-			    : "=a" (__ret), "+m" (var)			\
-			    : "r" (__new)				\
-			    : "memory");				\
-		break;							\
-	case 8:								\
-		asm("xchgq %2, "__percpu_arg(1)			\
-			    : "=a" (__ret), "+m" (var)			\
-			    : "r" (__new)				\
-			    : "memory");				\
-		break;							\
-	default: __bad_percpu_size();					\
-	}								\
-	__ret;								\
-})
-
-/*
- * cmpxchg has no such implied lock semantics as a result it is much
- * more efficient for cpu local operations.
- */
 #define percpu_cmpxchg_op(var, oval, nval)				\
 ({									\
 	typeof(var) __ret;						\
@@ -412,16 +370,6 @@ do {									\
 #define irqsafe_cpu_xor_2(pcp, val)	percpu_to_op("xor", (pcp), val)
 #define irqsafe_cpu_xor_4(pcp, val)	percpu_to_op("xor", (pcp), val)

-#define __this_cpu_xchg_1(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define __this_cpu_xchg_2(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define __this_cpu_xchg_4(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define this_cpu_xchg_1(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define this_cpu_xchg_2(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define this_cpu_xchg_4(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define irqsafe_cpu_xchg_1(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define irqsafe_cpu_xchg_2(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define irqsafe_cpu_xchg_4(pcp, nval)	percpu_xchg_op(pcp, nval)
-
 #ifndef CONFIG_M386
 #define __this_cpu_add_return_1(pcp, val)	percpu_add_return_op(pcp, val)
 #define __this_cpu_add_return_2(pcp, val)	percpu_add_return_op(pcp, val)
@@ -489,16 +437,24 @@ do {									\
 #define __this_cpu_add_return_8(pcp, val)	percpu_add_return_op(pcp, val)
 #define this_cpu_add_return_8(pcp, val)	percpu_add_return_op(pcp, val)

-#define __this_cpu_xchg_8(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define this_cpu_xchg_8(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define irqsafe_cpu_xchg_8(pcp, nval)	percpu_xchg_op(pcp, nval)
-
 #define __this_cpu_cmpxchg_8(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
 #define this_cpu_cmpxchg_8(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
 #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)

 #endif

+#define this_cpu_xchg(pcp, val) \
+({									\
+	typeof(val) __o;						\
+	do {								\
+	 	__o = __this_cpu_read(pcp);				\
+	} while (this_cpu_cmpxchg(pcp, __o, val) != __o);		\
+	__o;								\
+})
+
+#define __this_cpu_xchg this_cpu_xchg
+#define irqsafe_cpu_xchg this_cpu_xchg
+
 /* This is not atomic against other CPUs -- CPU preemption needs to be off */
 #define x86_test_and_clear_bit_percpu(bit, var)				\
 ({									\

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ