lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Sat, 11 Jul 2015 16:36:56 -0400
From:	Waiman Long <Waiman.Long@...com>
To:	Peter Zijlstra <peterz@...radead.org>,
	Ingo Molnar <mingo@...hat.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	"H. Peter Anvin" <hpa@...or.com>
Cc:	x86@...nel.org, linux-kernel@...r.kernel.org,
	Scott J Norton <scott.norton@...com>,
	Douglas Hatch <doug.hatch@...com>,
	Waiman Long <Waiman.Long@...com>
Subject: [PATCH 5/7] locking/pvqspinlock: Add pending bit support

Like the native qspinlock, using the pending bit when it is lightly
loaded to acquire the lock is faster than going through the PV queuing
process which is even slower than the native queuing process. It also
avoids loading two additional cachelines (the MCS and PV nodes).

This patch adds the pending bit support for PV qspinlock. The
pending bit code has a smaller spin threshold. It will default back
to the queuing method if it cannot acquired the lock within a certain
time limit.

Signed-off-by: Waiman Long <Waiman.Long@...com>
---
 kernel/locking/qspinlock.c          |   27 +++++++++++++++-
 kernel/locking/qspinlock_paravirt.h |   61 +++++++++++++++++++++++++++++++++++
 2 files changed, 87 insertions(+), 1 deletions(-)

diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 782bc18..5a25e89 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -162,6 +162,17 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
 	WRITE_ONCE(l->locked_pending, _Q_LOCKED_VAL);
 }
 
+/**
+ * clear_pending - clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
+ */
+static __always_inline void clear_pending(struct qspinlock *lock)
+{
+	struct __qspinlock *l = (void *)lock;
+
+	WRITE_ONCE(l->pending, 0);
+}
+
 /*
  * xchg_tail - Put in the new queue tail code word & retrieve previous one
  * @lock : Pointer to queued spinlock structure
@@ -193,6 +204,15 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
 }
 
 /**
+ * clear_pending - clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
+ */
+static __always_inline void clear_pending(struct qspinlock *lock)
+{
+	atomic_add(-_Q_PENDING_VAL, &lock->val);
+}
+
+/**
  * xchg_tail - Put in the new queue tail code word & retrieve previous one
  * @lock : Pointer to queued spinlock structure
  * @tail : The new queue tail code word
@@ -246,6 +266,7 @@ static __always_inline void __pv_wait_head(struct qspinlock *lock,
 					   struct mcs_spinlock *node) { }
 
 #define pv_enabled()		false
+#define pv_pending_lock(l, v)	false
 
 #define pv_init_node		__pv_init_node
 #define pv_wait_node		__pv_wait_node
@@ -287,8 +308,11 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 
 	BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
 
-	if (pv_enabled())
+	if (pv_enabled()) {
+		if (pv_pending_lock(lock, val))
+			return;	/* Got the lock via pending bit */
 		goto queue;
+	}
 
 	if (virt_queued_spin_lock(lock))
 		return;
@@ -464,6 +488,7 @@ EXPORT_SYMBOL(queued_spin_lock_slowpath);
 #undef pv_wait_node
 #undef pv_scan_next
 #undef pv_wait_head
+#undef pv_pending_lock
 
 #undef  queued_spin_lock_slowpath
 #define queued_spin_lock_slowpath	__pv_queued_spin_lock_slowpath
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index efc9a72..d770694 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -40,6 +40,7 @@
 #define QNODE_SPIN_THRESHOLD		SPIN_THRESHOLD
 #define QNODE_SPIN_THRESHOLD_SHORT	(QNODE_SPIN_THRESHOLD >> 4)
 #define QNODE_SPIN_CHECK_MASK		0xff
+#define PENDING_SPIN_THRESHOLD		QNODE_SPIN_THRESHOLD_SHORT
 
 /*
  * Queue node uses: vcpu_running & vcpu_halted.
@@ -70,6 +71,8 @@ enum pv_qlock_stat {
 	pvstat_kick_cpu,
 	pvstat_kick_ahead,
 	pvstat_no_kick,
+	pvstat_pend_lock,
+	pvstat_pend_fail,
 	pvstat_spurious,
 	pvstat_hash,
 	pvstat_hops,
@@ -91,6 +94,8 @@ static const char * const stat_fsnames[pvstat_num] = {
 	[pvstat_kick_cpu]    = "kick_cpu_count",
 	[pvstat_kick_ahead]  = "kick_ahead_count",
 	[pvstat_no_kick]     = "no_kick_count",
+	[pvstat_pend_lock]   = "pending_lock_count",
+	[pvstat_pend_fail]   = "pending_fail_count",
 	[pvstat_spurious]    = "spurious_wakeup",
 	[pvstat_hash]	     = "hash_count",
 	[pvstat_hops]	     = "hash_hops_count",
@@ -355,6 +360,62 @@ static void pv_init_node(struct mcs_spinlock *node)
 }
 
 /*
+ * Try to acquire the lock and wait using the pending bit
+ */
+static int pv_pending_lock(struct qspinlock *lock, u32 val)
+{
+	int loop = PENDING_SPIN_THRESHOLD;
+	u32 new, old;
+
+	/*
+	 * wait for in-progress pending->locked hand-overs
+	 */
+	if (val == _Q_PENDING_VAL) {
+		while (((val = atomic_read(&lock->val)) == _Q_PENDING_VAL) &&
+			loop--)
+			cpu_relax();
+	}
+
+	/*
+	 * trylock || pending
+	 */
+	for (;;) {
+		if (val & ~_Q_LOCKED_MASK)
+			goto queue;
+		new = _Q_LOCKED_VAL;
+		if (val == new)
+			new |= _Q_PENDING_VAL;
+		old = atomic_cmpxchg(&lock->val, val, new);
+		if (old == val)
+			break;
+		if (loop-- <= 0)
+			goto queue;
+	}
+
+	if (new == _Q_LOCKED_VAL)
+		goto gotlock;
+	/*
+	 * We are pending, wait for the owner to go away.
+	 */
+	while (((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_MASK)
+		&& (loop-- > 0))
+		cpu_relax();
+	if (!(val & _Q_LOCKED_MASK)) {
+		clear_pending_set_locked(lock);
+		goto gotlock;
+	}
+	clear_pending(lock);	/* Clear the pending bit only */
+	pvstat_inc(pvstat_pend_fail);
+
+queue:
+	return 0;
+
+gotlock:
+	pvstat_inc(pvstat_pend_lock);
+	return 1;
+}
+
+/*
  * Wait for node->locked to become true, halt the vcpu after a short spin.
  * pv_scan_next() is used to set _Q_SLOW_VAL and fill in hash table on its
  * behalf.
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ