[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230320180745.658574087@redhat.com>
Date: Mon, 20 Mar 2023 15:03:37 -0300
From: Marcelo Tosatti <mtosatti@...hat.com>
To: Christoph Lameter <cl@...ux.com>
Cc: Aaron Tomlin <atomlin@...mlin.com>,
Frederic Weisbecker <frederic@...nel.org>,
Andrew Morton <akpm@...ux-foundation.org>,
linux-kernel@...r.kernel.org, linux-mm@...ck.org,
Russell King <linux@...linux.org.uk>,
Huacai Chen <chenhuacai@...nel.org>,
Heiko Carstens <hca@...ux.ibm.com>, x86@...nel.org,
Vlastimil Babka <vbabka@...e.cz>,
Michal Hocko <mhocko@...e.com>,
Marcelo Tosatti <mtosatti@...hat.com>
Subject: [PATCH v7 05/13] this_cpu_cmpxchg: x86: switch this_cpu_cmpxchg to locked, add _local function
Goal is to have vmstat_shepherd to transfer from
per-CPU counters to global counters remotely. For this,
an atomic this_cpu_cmpxchg is necessary.
Following the kernel convention for cmpxchg/cmpxchg_local,
change x86's this_cpu_cmpxchg_ helpers to be atomic.
and add this_cpu_cmpxchg_local_ helpers which are not atomic.
Signed-off-by: Marcelo Tosatti <mtosatti@...hat.com>
---
Index: linux-vmstat-remote/arch/x86/include/asm/percpu.h
===================================================================
--- linux-vmstat-remote.orig/arch/x86/include/asm/percpu.h
+++ linux-vmstat-remote/arch/x86/include/asm/percpu.h
@@ -197,11 +197,11 @@ do { \
* cmpxchg has no such implied lock semantics as a result it is much
* more efficient for cpu local operations.
*/
-#define percpu_cmpxchg_op(size, qual, _var, _oval, _nval) \
+#define percpu_cmpxchg_op(size, qual, _var, _oval, _nval, lockp) \
({ \
__pcpu_type_##size pco_old__ = __pcpu_cast_##size(_oval); \
__pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval); \
- asm qual (__pcpu_op2_##size("cmpxchg", "%[nval]", \
+ asm qual (__pcpu_op2_##size(lockp "cmpxchg", "%[nval]", \
__percpu_arg([var])) \
: [oval] "+a" (pco_old__), \
[var] "+m" (_var) \
@@ -279,16 +279,20 @@ do { \
#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(1, , pcp, val)
#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(2, , pcp, val)
#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(4, , pcp, val)
-#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, , pcp, oval, nval)
-#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, , pcp, oval, nval)
-#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, , pcp, oval, nval)
+#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, , pcp, oval, nval, "")
+#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, , pcp, oval, nval, "")
+#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, , pcp, oval, nval, "")
#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(1, volatile, pcp, val)
#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(2, volatile, pcp, val)
#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(4, volatile, pcp, val)
-#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, volatile, pcp, oval, nval)
-#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, volatile, pcp, oval, nval)
-#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, volatile, pcp, oval, nval)
+#define this_cpu_cmpxchg_local_1(pcp, oval, nval) percpu_cmpxchg_op(1, volatile, pcp, oval, nval, "")
+#define this_cpu_cmpxchg_local_2(pcp, oval, nval) percpu_cmpxchg_op(2, volatile, pcp, oval, nval, "")
+#define this_cpu_cmpxchg_local_4(pcp, oval, nval) percpu_cmpxchg_op(4, volatile, pcp, oval, nval, "")
+
+#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, volatile, pcp, oval, nval, LOCK_PREFIX)
+#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, volatile, pcp, oval, nval, LOCK_PREFIX)
+#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, volatile, pcp, oval, nval, LOCK_PREFIX)
#ifdef CONFIG_X86_CMPXCHG64
#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \
@@ -319,16 +323,17 @@ do { \
#define raw_cpu_or_8(pcp, val) percpu_to_op(8, , "or", (pcp), val)
#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(8, , pcp, val)
#define raw_cpu_xchg_8(pcp, nval) raw_percpu_xchg_op(pcp, nval)
-#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval)
+#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval, "")
-#define this_cpu_read_8(pcp) percpu_from_op(8, volatile, "mov", pcp)
-#define this_cpu_write_8(pcp, val) percpu_to_op(8, volatile, "mov", (pcp), val)
-#define this_cpu_add_8(pcp, val) percpu_add_op(8, volatile, (pcp), val)
-#define this_cpu_and_8(pcp, val) percpu_to_op(8, volatile, "and", (pcp), val)
-#define this_cpu_or_8(pcp, val) percpu_to_op(8, volatile, "or", (pcp), val)
-#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(8, volatile, pcp, val)
-#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(8, volatile, pcp, nval)
-#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval)
+#define this_cpu_read_8(pcp) percpu_from_op(8, volatile, "mov", pcp)
+#define this_cpu_write_8(pcp, val) percpu_to_op(8, volatile, "mov", (pcp), val)
+#define this_cpu_add_8(pcp, val) percpu_add_op(8, volatile, (pcp), val)
+#define this_cpu_and_8(pcp, val) percpu_to_op(8, volatile, "and", (pcp), val)
+#define this_cpu_or_8(pcp, val) percpu_to_op(8, volatile, "or", (pcp), val)
+#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(8, volatile, pcp, val)
+#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(8, volatile, pcp, nval)
+#define this_cpu_cmpxchg_local_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval, "")
+#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval, LOCK_PREFIX)
/*
* Pretty complex macro to generate cmpxchg16 instruction. The instruction
Powered by blists - more mailing lists