lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 13 Oct 2015 10:04:23 -0700
From:	Davidlohr Bueso <dave@...olabs.net>
To:	Peter Zijlstra <peterz@...radead.org>,
	Ingo Molnar <mingo@...nel.org>,
	Thomas Gleixner <tglx@...utronix.de>
Cc:	Andrew Morton <akpm@...ux-foundation.org>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Will Deacon <will.deacon@....com>,
	"Paul E.McKenney" <paulmck@...ux.vnet.ibm.com>,
	linux-kernel@...r.kernel.org, Waiman Long <Waiman.Long@....com>,
	Davidlohr Bueso <dave@...olabs.net>
Subject: [PATCH 6/5] locking/qspinlock: Use acquire/release semantics

As of 654672d4ba1 (locking/atomics: Add _{acquire|release|relaxed}()
variants of some atomic operations) and 6d79ef2d30e (locking, asm-generic:
Add _{relaxed|acquire|release}() variants for 'atomic_long_t'), weakly
ordered archs can benefit from more relaxed use of barriers when locking
and unlocking, instead of regular full barrier semantics. While currently
only arm64 supports such optimizations, updating corresponding locking
primitives serves for other archs to immediately benefit as well, once the
necessary machinery is implemented of course.

Signed-off-by: Davidlohr Bueso <dbueso@...e.de>
---

Hi Waiman, it seems you never sent an update patch regarding $TOPIC[1].
So I rebased, rewrote most of the comments and eliminated the x86
changes. What do you think? I'd like to get everything updated in
kernel/locking/ by 4.4, and qspinlock is the only primitive left.

x86 compile tested only.

Thanks!

[1] https://lkml.org/lkml/2015/9/11/540

  include/asm-generic/qspinlock.h | 10 ++++------
  kernel/locking/qspinlock.c      | 25 +++++++++++++++++++++----
  2 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index e2aadbc..799f960 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -62,7 +62,7 @@ static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
  static __always_inline int queued_spin_trylock(struct qspinlock *lock)
  {
	if (!atomic_read(&lock->val) &&
-	   (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) == 0))
+	   (atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0))
		return 1;
	return 0;
  }
@@ -77,7 +77,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
  {
	u32 val;

-	val = atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL);
+	val = atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL);
	if (likely(val == 0))
		return;
	queued_spin_lock_slowpath(lock, val);
@@ -90,10 +90,8 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
   */
  static __always_inline void queued_spin_unlock(struct qspinlock *lock)
  {
-	/*
-	 * smp_mb__before_atomic() in order to guarantee release semantics
-	 */
-	smp_mb__before_atomic_dec();
+	/* at minimum, guarantee RELEASE semantics */
+	smp_mb__before_atomic();
	atomic_sub(_Q_LOCKED_VAL, &lock->val);
  }
  #endif
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 87e9ce6a..6355d8a 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -176,7 +176,13 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
  {
	struct __qspinlock *l = (void *)lock;

-	return (u32)xchg(&l->tail, tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
+	/*
+	 * Use RELEASE semantics to ensure that nothing is
+	 * re-ordered out before we are done initializing the
+	 * new mcs node. Once the new tail is set, all is fair.
+	 */
+	return (u32)xchg_release(&l->tail,
+				 tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
  }

  #else /* _Q_PENDING_BITS == 8 */
@@ -208,7 +214,12 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)

	for (;;) {
		new = (val & _Q_LOCKED_PENDING_MASK) | tail;
-		old = atomic_cmpxchg(&lock->val, val, new);
+		/*
+		 * Use RELEASE semantics to ensure that nothing is
+		 * re-ordered out before we are done initializing the
+		 * new mcs node. Once the new tail is set, all is fair.
+		 */
+		old = atomic_cmpxchg_release(&lock->val, val, new);
		if (old == val)
			break;

@@ -319,7 +330,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
		if (val == new)
			new |= _Q_PENDING_VAL;

-		old = atomic_cmpxchg(&lock->val, val, new);
+		old = atomic_cmpxchg_acquire(&lock->val, val, new);
		if (old == val)
			break;

@@ -426,7 +437,13 @@ queue:
			set_locked(lock);
			break;
		}
-		old = atomic_cmpxchg(&lock->val, val, _Q_LOCKED_VAL);
+
+		/*
+		 * The above smp_load_acquire() provides us with the necessary
+		 * ACQUIRE semantics required for locking. We can, therefore,
+		 * fully relax the barriers in this case.
+		 */
+		old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL);
		if (old == val)
			goto release;	/* No contention */

--
2.1.4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ