linux-kernel - [PATCH v6 1/6] locking/qspinlock: relaxes cmpxchg & xchg ops in native code

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-Id: <1441996658-62854-2-git-send-email-Waiman.Long@hpe.com>
Date:	Fri, 11 Sep 2015 14:37:33 -0400
From:	Waiman Long <Waiman.Long@....com>
To:	Peter Zijlstra <peterz@...radead.org>,
	Ingo Molnar <mingo@...hat.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	"H. Peter Anvin" <hpa@...or.com>
Cc:	x86@...nel.org, linux-kernel@...r.kernel.org,
	Scott J Norton <scott.norton@...com>,
	Douglas Hatch <doug.hatch@...com>,
	Davidlohr Bueso <dave@...olabs.net>,
	Waiman Long <Waiman.Long@....com>
Subject: [PATCH v6 1/6] locking/qspinlock: relaxes cmpxchg & xchg ops in native code

This patch replaces the cmpxchg() and xchg() calls in the native
qspinlock code with more relaxed versions of those calls to enable
other architectures to adopt queued spinlocks with less performance
overhead.

Signed-off-by: Waiman Long <Waiman.Long@....com>
---
 arch/x86/include/asm/qspinlock.h |    2 +-
 include/asm-generic/qspinlock.h  |    6 +++---
 kernel/locking/qspinlock.c       |   21 +++++++++++++++++----
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index 9d51fae..053e70d 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -46,7 +46,7 @@ static inline bool virt_queued_spin_lock(struct qspinlock *lock)
 	if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
 		return false;
 
-	while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0)
+	while (atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) != 0)
 		cpu_relax();
 
 	return true;
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 83bfb87..efbd1fd 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -62,7 +62,7 @@ static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
 static __always_inline int queued_spin_trylock(struct qspinlock *lock)
 {
 	if (!atomic_read(&lock->val) &&
-	   (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) == 0))
+	   (atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0))
 		return 1;
 	return 0;
 }
@@ -77,7 +77,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
 {
 	u32 val;
 
-	val = atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL);
+	val = atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL);
 	if (likely(val == 0))
 		return;
 	queued_spin_lock_slowpath(lock, val);
@@ -93,7 +93,7 @@ static __always_inline void queued_spin_unlock(struct qspinlock *lock)
 	/*
 	 * smp_mb__before_atomic() in order to guarantee release semantics
 	 */
-	smp_mb__before_atomic_dec();
+	smp_mb__before_atomic();
 	atomic_sub(_Q_LOCKED_VAL, &lock->val);
 }
 #endif
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 337c881..28a15c7 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -176,7 +176,12 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
 {
 	struct __qspinlock *l = (void *)lock;
 
-	return (u32)xchg(&l->tail, tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
+	/*
+	 * Use release semantics to make sure that the MCS node is properly
+	 * initialized before changing the tail code.
+	 */
+	return (u32)xchg_release(&l->tail,
+				 tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
 }
 
 #else /* _Q_PENDING_BITS == 8 */
@@ -208,7 +213,11 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
 
 	for (;;) {
 		new = (val & _Q_LOCKED_PENDING_MASK) | tail;
-		old = atomic_cmpxchg(&lock->val, val, new);
+		/*
+		 * Use release semantics to make sure that the MCS node is
+		 * properly initialized before changing the tail code.
+		 */
+		old = atomic_cmpxchg_release(&lock->val, val, new);
 		if (old == val)
 			break;
 
@@ -319,7 +328,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 		if (val == new)
 			new |= _Q_PENDING_VAL;
 
-		old = atomic_cmpxchg(&lock->val, val, new);
+		old = atomic_cmpxchg_acquire(&lock->val, val, new);
 		if (old == val)
 			break;
 
@@ -426,7 +435,11 @@ queue:
 			set_locked(lock);
 			break;
 		}
-		old = atomic_cmpxchg(&lock->val, val, _Q_LOCKED_VAL);
+		/*
+		 * The smp_load_acquire() call above has provided the necessary
+		 * acquire semantics required for locking.
+		 */
+		old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL);
 		if (old == val)
 			goto release;	/* No contention */
 
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/