lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1436647018-49734-3-git-send-email-Waiman.Long@hp.com>
Date:	Sat, 11 Jul 2015 16:36:53 -0400
From:	Waiman Long <Waiman.Long@...com>
To:	Peter Zijlstra <peterz@...radead.org>,
	Ingo Molnar <mingo@...hat.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	"H. Peter Anvin" <hpa@...or.com>
Cc:	x86@...nel.org, linux-kernel@...r.kernel.org,
	Scott J Norton <scott.norton@...com>,
	Douglas Hatch <doug.hatch@...com>,
	Waiman Long <Waiman.Long@...com>
Subject: [PATCH 2/7] locking/pvqspinlock: Allow vCPUs kick-ahead

Frequent CPU halting (vmexit) and CPU kicking (vmenter) lengthens
critical section and block forward progress.  This patch implements
a kick-ahead mechanism where the unlocker will kick the queue head
vCPUs as well as up to two additional vCPUs next to the queue head if
they were halted.  The kickings are done after exiting the critical
section to improve parallelism.

The amount of kick-ahead allowed depends on the number of vCPUs in
the VM guest.  This change should improve overall system performance
in a busy overcommitted guest.

Signed-off-by: Waiman Long <Waiman.Long@...com>
---
 kernel/locking/qspinlock_paravirt.h |   71 ++++++++++++++++++++++++++++++++++-
 1 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index d302c39..4c1a299 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -67,6 +67,12 @@ static struct pv_hash_entry *pv_lock_hash;
 static unsigned int pv_lock_hash_bits __read_mostly;
 
 /*
+ * Allow kick-ahead of vCPUs at unlock time
+ */
+#define PV_KICK_AHEAD_MAX	4
+static int pv_kick_ahead __read_mostly;
+
+/*
  * Allocate memory for the PV qspinlock hash buckets
  *
  * This function should be called from the paravirt spinlock initialization
@@ -74,7 +80,16 @@ static unsigned int pv_lock_hash_bits __read_mostly;
  */
 void __init __pv_init_lock_hash(void)
 {
-	int pv_hash_size = ALIGN(4 * num_possible_cpus(), PV_HE_PER_LINE);
+	int ncpus = num_possible_cpus();
+	int pv_hash_size = ALIGN(4 * ncpus, PV_HE_PER_LINE);
+	int i;
+
+	/*
+	 * The minimum number of vCPUs required in each kick-ahead level
+	 */
+	static const u8 kick_ahead_threshold[PV_KICK_AHEAD_MAX] = {
+		4, 8, 16, 32
+	};
 
 	if (pv_hash_size < PV_HE_MIN)
 		pv_hash_size = PV_HE_MIN;
@@ -88,6 +103,18 @@ void __init __pv_init_lock_hash(void)
 					       pv_hash_size, 0, HASH_EARLY,
 					       &pv_lock_hash_bits, NULL,
 					       pv_hash_size, pv_hash_size);
+	/*
+	 * Enable the unlock kick ahead mode according to the number of
+	 * vCPUs available.
+	 */
+	for (i = PV_KICK_AHEAD_MAX; i > 0; i--)
+		if (ncpus >= kick_ahead_threshold[i - 1]) {
+			pv_kick_ahead = i;
+			break;
+		}
+	if (pv_kick_ahead)
+		printk(KERN_INFO "PV unlock kick ahead level %d enabled\n",
+		       pv_kick_ahead);
 }
 
 #define for_each_hash_entry(he, offset, hash)						\
@@ -317,13 +344,33 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
 }
 
 /*
+ * Helper to get the address of the next kickable node
+ * The node has to be in the halted state and is being transitioned to
+ * running state by this function. Otherwise, NULL will be returned.
+ */
+static inline struct pv_node *pv_get_kick_node(struct pv_node *node)
+{
+	struct pv_node *next = (struct pv_node *)READ_ONCE(node->mcs.next);
+
+	if (!next)
+		return NULL;
+
+	if ((READ_ONCE(next->state) != vcpu_halted) ||
+	    (cmpxchg(&next->state, vcpu_halted, vcpu_running) != vcpu_halted))
+		next = NULL;	/* No kicking is needed */
+
+	return next;
+}
+
+/*
  * PV version of the unlock function to be used in stead of
  * queued_spin_unlock().
  */
 __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
 {
 	struct __qspinlock *l = (void *)lock;
-	struct pv_node *node;
+	struct pv_node *node, *nxt, *next[PV_KICK_AHEAD_MAX];
+	int i, nr_kick;
 
 	/*
 	 * We must not unlock if SLOW, because in that case we must first
@@ -340,6 +387,20 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
 	node = pv_unhash(lock);
 
 	/*
+	 * Implement kick-ahead mode
+	 *
+	 * Access the next group of nodes, if available, and prepare to kick
+	 * them after releasing the lock if they are in the halted state. This
+	 * should improve performance on an overcommitted system.
+	 */
+	for (nr_kick = 0, nxt = node; nr_kick < pv_kick_ahead;
+	     nxt = next[nr_kick], nr_kick++) {
+		next[nr_kick] = pv_get_kick_node(nxt);
+		if (!next[nr_kick])
+			break;
+	}
+
+	/*
 	 * Now that we have a reference to the (likely) blocked pv_node,
 	 * release the lock.
 	 */
@@ -354,6 +415,12 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
 	 */
 	if (READ_ONCE(node->state) == vcpu_hashed)
 		pv_kick(node->cpu);
+
+	/*
+	 * Kick the next group of vCPUs, if available.
+	 */
+	for (i = 0; i < nr_kick; i++)
+		pv_kick(next[i]->cpu);
 }
 /*
  * Include the architecture specific callee-save thunk of the
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ