linux-kernel - [PATCH v4 5/7] locking/pvqspinlock: Enable deferment of vCPU kicking to unlock call

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-Id: <1438395724-25910-6-git-send-email-Waiman.Long@hp.com>
Date:	Fri, 31 Jul 2015 22:22:02 -0400
From:	Waiman Long <Waiman.Long@...com>
To:	Peter Zijlstra <peterz@...radead.org>,
	Ingo Molnar <mingo@...hat.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	"H. Peter Anvin" <hpa@...or.com>
Cc:	x86@...nel.org, linux-kernel@...r.kernel.org,
	Scott J Norton <scott.norton@...com>,
	Douglas Hatch <doug.hatch@...com>,
	Davidlohr Bueso <dave@...olabs.net>,
	Waiman Long <Waiman.Long@...com>
Subject: [PATCH v4 5/7] locking/pvqspinlock: Enable deferment of vCPU kicking to unlock call

Most of the vCPU kickings are done on the locking side where the new
lock holder wake up the queue head vCPU to spin on the lock. However,
there are situations where it may be advantageous to defer the vCPU
kicking to when the lock holder releases the lock.

This patch enables the deferment of vCPU kicking to the unlock function
by adding a new vCPU state (vcpu_hashed) to marks the fact that
 1) _Q_SLOW_VAL is set in the lock, and
 2) the pv_node address is stored in the hash table

This enablement patch, by itself, should not change the performance
of the pvqspinlock code. Actual deferment vCPU kicks will be added
in a later patch.

Signed-off-by: Waiman Long <Waiman.Long@...com>
---
 kernel/locking/qspinlock.c          |    6 +++---
 kernel/locking/qspinlock_paravirt.h |   34 ++++++++++++++++++++++++----------
 2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 6518ee9..94fdd27 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -259,8 +259,8 @@ static __always_inline void set_locked(struct qspinlock *lock)
 
 static __always_inline void __pv_init_node(struct mcs_spinlock *node) { }
 static __always_inline void __pv_wait_node(struct mcs_spinlock *node) { }
-static __always_inline void __pv_kick_node(struct mcs_spinlock *node) { }
-
+static __always_inline void __pv_kick_node(struct qspinlock *lock,
+					   struct mcs_spinlock *node) { }
 static __always_inline void __pv_wait_head(struct qspinlock *lock,
 					   struct mcs_spinlock *node) { }
 
@@ -464,7 +464,7 @@ queue:
 		cpu_relax();
 
 	arch_mcs_spin_unlock_contended(&next->locked);
-	pv_kick_node(next);
+	pv_kick_node(lock, next);
 
 release:
 	/*
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 5efcc65..5e140fe 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -33,6 +33,7 @@
 enum vcpu_state {
 	vcpu_running = 0,
 	vcpu_halted,
+	vcpu_hashed,	/* vcpu_halted + node stored in hash table */
 };
 
 struct pv_node {
@@ -406,13 +407,17 @@ static void pv_wait_node(struct mcs_spinlock *node)
 			pv_wait(&pn->state, vcpu_halted);
 		}
 
+		if (READ_ONCE(node->locked))
+			break;
+
 		/*
-		 * Reset the vCPU state to avoid unncessary CPU kicking
+		 * Reset the vCPU state to running to avoid unncessary CPU
+		 * kicking unless vcpu_hashed had already been set. In this
+		 * case, node->locked should have just been set, and we
+		 * aren't going to set state to vcpu_halted again.
 		 */
-		WRITE_ONCE(pn->state, vcpu_running);
+		cmpxchg(&pn->state, vcpu_halted, vcpu_running);
 
-		if (READ_ONCE(node->locked))
-			break;
 		/*
 		 * If the locked flag is still not set after wakeup, it is a
 		 * spurious wakeup and the vCPU should wait again. However,
@@ -431,12 +436,16 @@ static void pv_wait_node(struct mcs_spinlock *node)
 
 /*
  * Called after setting next->locked = 1, used to wake those stuck in
- * pv_wait_node().
+ * pv_wait_node(). Alternatively, it can also defer the kicking to the
+ * unlock function.
  */
-static void pv_kick_node(struct mcs_spinlock *node)
+static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
 {
 	struct pv_node *pn = (struct pv_node *)node;
 
+	if (xchg(&pn->state, vcpu_running) != vcpu_halted)
+		return;
+
 	/*
 	 * Note that because node->locked is already set, this actual
 	 * mcs_spinlock entry could be re-used already.
@@ -446,10 +455,8 @@ static void pv_kick_node(struct mcs_spinlock *node)
 	 *
 	 * See the comment in pv_wait_node().
 	 */
-	if (xchg(&pn->state, vcpu_running) == vcpu_halted) {
-		pvstat_inc(pvstat_lock_kick);
-		pv_kick(pn->cpu);
-	}
+	pvstat_inc(pvstat_lock_kick);
+	pv_kick(pn->cpu);
 }
 
 /*
@@ -471,6 +478,13 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
 			cpu_relax();
 		}
 
+		if (!lp && (xchg(&pn->state, vcpu_hashed) == vcpu_hashed))
+			/*
+			 * The hashed table & _Q_SLOW_VAL had been filled
+			 * by the lock holder.
+			 */
+			lp = (struct qspinlock **)-1;
+
 		if (!lp) { /* ONCE */
 			lp = pv_hash(lock, pn);
 			/*
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/