>From 5d7941a498935fb225b2c7a3108cbf590114c3db Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Tue, 7 Jul 2020 22:29:16 -0400
Subject: [PATCH 2/9] locking/pvqspinlock: Introduce
 CONFIG_PARAVIRT_QSPINLOCKS_LITE

Add a new PARAVIRT_QSPINLOCKS_LITE config option that allows
architectures to use the PV qspinlock code without the need to use or
implement a pv_kick() function, thus eliminating the atomic unlock
overhead. The non-atomic queued_spin_unlock() can be used instead.
The pv_wait() function will still be needed, but it can be a dummy
function.

With that option set, the hybrid PV queued/unfair locking code should
still be able to make it performant enough in a paravirtualized
environment.

Signed-off-by: Waiman Long <longman@redhat.com>
---
 kernel/Kconfig.locks                |  4 +++
 kernel/locking/lock_events_list.h   |  3 ++
 kernel/locking/qspinlock_paravirt.h | 49 ++++++++++++++++++++++++-----
 kernel/locking/qspinlock_stat.h     |  5 +--
 4 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 3de8fd11873b..1824ba8c44a9 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -243,6 +243,10 @@ config QUEUED_SPINLOCKS
 	def_bool y if ARCH_USE_QUEUED_SPINLOCKS
 	depends on SMP
 
+config PARAVIRT_QSPINLOCKS_LITE
+	bool
+	depends on QUEUED_SPINLOCKS && PARAVIRT_SPINLOCKS
+
 config BPF_ARCH_SPINLOCK
 	bool
 
diff --git a/kernel/locking/lock_events_list.h b/kernel/locking/lock_events_list.h
index 239039d0ce21..9ae07a7148e8 100644
--- a/kernel/locking/lock_events_list.h
+++ b/kernel/locking/lock_events_list.h
@@ -22,11 +22,14 @@
 /*
  * Locking events for PV qspinlock.
  */
+#ifndef CONFIG_PARAVIRT_QSPINLOCKS_LITE
 LOCK_EVENT(pv_hash_hops)	/* Average # of hops per hashing operation */
 LOCK_EVENT(pv_kick_unlock)	/* # of vCPU kicks issued at unlock time   */
 LOCK_EVENT(pv_kick_wake)	/* # of vCPU kicks for pv_latency_wake	   */
 LOCK_EVENT(pv_latency_kick)	/* Average latency (ns) of vCPU kick	   */
 LOCK_EVENT(pv_latency_wake)	/* Average latency (ns) of kick-to-wakeup  */
+#endif
+
 LOCK_EVENT(pv_lock_stealing)	/* # of lock stealing operations	   */
 LOCK_EVENT(pv_spurious_wakeup)	/* # of spurious wakeups in non-head vCPUs */
 LOCK_EVENT(pv_wait_again)	/* # of wait's after queue head vCPU kick  */
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 8eec58320b85..2d24563aa9b9 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -77,6 +77,23 @@ struct pv_node {
  * This hybrid PV queued/unfair lock combines the best attributes of a
  * queued lock (no lock starvation) and an unfair lock (good performance
  * on not heavily contended locks).
+ *
+ * PV lock lite
+ * ------------
+ *
+ * By default, the PV lock uses two hypervisor specific functions pv_wait()
+ * and pv_kick() to release the vcpu back to the hypervisor and request the
+ * hypervisor to put the given vcpu online again respectively.
+ *
+ * The pv_kick() function is called at unlock time and requires the use of
+ * an atomic instruction to prevent missed wakeup. The unlock overhead of
+ * the PV lock is a major reason why the PV lock is slightly slower than
+ * the native lock. Not all the hypervisors need to really use both
+ * pv_wait() and pv_kick(). The PARAVIRT_QSPINLOCKS_LITE config option
+ * enables a lighter version of PV lock that relies mainly on the hybrid
+ * queued/unfair lock. The pv_wait() function will be used if provided.
+ * The pv_kick() function isn't used to eliminate the unlock overhead and
+ * the non-atomic queued_spin_unlock() can be used.
  */
 #define queued_spin_trylock(l)	pv_hybrid_queued_unfair_trylock(l)
 static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
@@ -153,6 +170,7 @@ static __always_inline int trylock_clear_pending(struct qspinlock *lock)
 }
 #endif /* _Q_PENDING_BITS == 8 */
 
+#ifndef CONFIG_PARAVIRT_QSPINLOCKS_LITE
 /*
  * Lock and MCS node addresses hash table for fast lookup
  *
@@ -410,6 +428,29 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
 }
 #endif /* __pv_queued_spin_unlock */
 
+static inline void set_pv_node_running(struct pv_node *pn)
+{
+	/*
+	 * If pv_kick_node() changed us to vcpu_hashed, retain that value so
+	 * that pv_wait_head_or_lock() will not try to hash this lock.
+	 */
+	cmpxchg(&pn->state, vcpu_halted, vcpu_running);
+}
+#else
+static inline bool pv_hash_lock(struct qspinlock *lock, struct pv_node *node)
+{
+	return false;
+}
+
+static inline void pv_kick_node(struct qspinlock *lock,
+				struct mcs_spinlock *node) { }
+
+static inline void set_pv_node_running(struct pv_node *pn)
+{
+	pn->state = vcpu_running;
+}
+#endif /* CONFIG_PARAVIRT_QSPINLOCKS_LITE */
+
 /*
  * Return true if when it is time to check the previous node which is not
  * in a running state.
@@ -475,13 +516,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
 			lockevent_cond_inc(pv_wait_early, wait_early);
 			pv_wait(&pn->state, vcpu_halted);
 		}
-
-		/*
-		 * If pv_kick_node() changed us to vcpu_hashed, retain that
-		 * value so that pv_wait_head_or_lock() knows to not also try
-		 * to hash this lock.
-		 */
-		cmpxchg(&pn->state, vcpu_halted, vcpu_running);
+		set_pv_node_running(pn);
 
 		/*
 		 * If the locked flag is still not set after wakeup, it is a
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index e625bb410aa2..e9f63240785b 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -7,7 +7,8 @@
 #include "lock_events.h"
 
 #ifdef CONFIG_LOCK_EVENT_COUNTS
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && \
+    !defined(CONFIG_PARAVIRT_QSPINLOCKS_LITE)
 /*
  * Collect pvqspinlock locking event counts
  */
@@ -133,7 +134,7 @@ static inline void __pv_wait(u8 *ptr, u8 val)
 #define pv_kick(c)	__pv_kick(c)
 #define pv_wait(p, v)	__pv_wait(p, v)
 
-#endif /* CONFIG_PARAVIRT_SPINLOCKS */
+#endif /* CONFIG_PARAVIRT_SPINLOCKS && !CONFIG_PARAVIRT_QSPINLOCKS_LITE */
 
 #else /* CONFIG_LOCK_EVENT_COUNTS */
 
-- 
2.18.1