[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <alpine.DEB.2.00.1012081207250.26943@router.home>
Date:	Wed, 8 Dec 2010 12:08:15 -0600 (CST)
From:	Christoph Lameter <cl@...ux.com>
To:	Tejun Heo <tj@...nel.org>
cc:	akpm@...ux-foundation.org, Pekka Enberg <penberg@...helsinki.fi>,
	linux-kernel@...r.kernel.org,
	Eric Dumazet <eric.dumazet@...il.com>,
	Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
Subject: Re: [cpuops cmpxchg V1 2/4] x86: this_cpu_cmpxchg and this_cpu_xchg
 operations
Alternate approach: Could also use cmpxchg for xchg..
Subject: cpuops: Use cmpxchg for xchg to avoid lock semantics
Cmpxchg has a lower cycle count due to the implied lock semantics of xchg.
Simulate the xchg through cmpxchg for the cpu ops.
Signed-off-by: Christoph Lameter <cl@...ux.com>
---
 arch/x86/include/asm/percpu.h |   68 +++++++-----------------------------------
 1 file changed, 12 insertions(+), 56 deletions(-)
Index: linux-2.6/arch/x86/include/asm/percpu.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/percpu.h	2010-12-08 11:43:50.000000000 -0600
+++ linux-2.6/arch/x86/include/asm/percpu.h	2010-12-08 12:00:21.000000000 -0600
@@ -212,48 +212,6 @@ do {									\
 	ret__;								\
 })
-/*
- * Beware: xchg on x86 has an implied lock prefix. There will be the cost of
- * full lock semantics even though they are not needed.
- */
-#define percpu_xchg_op(var, nval)					\
-({									\
-	typeof(var) __ret;						\
-	typeof(var) __new = (nval);					\
-	switch (sizeof(var)) {						\
-	case 1:								\
-		asm("xchgb %2, "__percpu_arg(1)			\
-			    : "=a" (__ret), "+m" (var)			\
-			    : "q" (__new)				\
-			    : "memory");				\
-		break;							\
-	case 2:								\
-		asm("xchgw %2, "__percpu_arg(1)			\
-			    : "=a" (__ret), "+m" (var)			\
-			    : "r" (__new)				\
-			    : "memory");				\
-		break;							\
-	case 4:								\
-		asm("xchgl %2, "__percpu_arg(1)			\
-			    : "=a" (__ret), "+m" (var)			\
-			    : "r" (__new)				\
-			    : "memory");				\
-		break;							\
-	case 8:								\
-		asm("xchgq %2, "__percpu_arg(1)			\
-			    : "=a" (__ret), "+m" (var)			\
-			    : "r" (__new)				\
-			    : "memory");				\
-		break;							\
-	default: __bad_percpu_size();					\
-	}								\
-	__ret;								\
-})
-
-/*
- * cmpxchg has no such implied lock semantics as a result it is much
- * more efficient for cpu local operations.
- */
 #define percpu_cmpxchg_op(var, oval, nval)				\
 ({									\
 	typeof(var) __ret;						\
@@ -412,16 +370,6 @@ do {									\
 #define irqsafe_cpu_xor_2(pcp, val)	percpu_to_op("xor", (pcp), val)
 #define irqsafe_cpu_xor_4(pcp, val)	percpu_to_op("xor", (pcp), val)
-#define __this_cpu_xchg_1(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define __this_cpu_xchg_2(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define __this_cpu_xchg_4(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define this_cpu_xchg_1(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define this_cpu_xchg_2(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define this_cpu_xchg_4(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define irqsafe_cpu_xchg_1(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define irqsafe_cpu_xchg_2(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define irqsafe_cpu_xchg_4(pcp, nval)	percpu_xchg_op(pcp, nval)
-
 #ifndef CONFIG_M386
 #define __this_cpu_add_return_1(pcp, val)	percpu_add_return_op(pcp, val)
 #define __this_cpu_add_return_2(pcp, val)	percpu_add_return_op(pcp, val)
@@ -489,16 +437,24 @@ do {									\
 #define __this_cpu_add_return_8(pcp, val)	percpu_add_return_op(pcp, val)
 #define this_cpu_add_return_8(pcp, val)	percpu_add_return_op(pcp, val)
-#define __this_cpu_xchg_8(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define this_cpu_xchg_8(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define irqsafe_cpu_xchg_8(pcp, nval)	percpu_xchg_op(pcp, nval)
-
 #define __this_cpu_cmpxchg_8(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
 #define this_cpu_cmpxchg_8(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
 #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
 #endif
+#define this_cpu_xchg(pcp, val) \
+({									\
+	typeof(val) __o;						\
+	do {								\
+	 	__o = __this_cpu_read(pcp);				\
+	} while (this_cpu_cmpxchg(pcp, __o, val) != __o);		\
+	__o;								\
+})
+
+#define __this_cpu_xchg this_cpu_xchg
+#define irqsafe_cpu_xchg this_cpu_xchg
+
 /* This is not atomic against other CPUs -- CPU preemption needs to be off */
 #define x86_test_and_clear_bit_percpu(bit, var)				\
 ({									\
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Powered by blists - more mailing lists
 
