>From 5d7941a498935fb225b2c7a3108cbf590114c3db Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 7 Jul 2020 22:29:16 -0400 Subject: [PATCH 2/9] locking/pvqspinlock: Introduce CONFIG_PARAVIRT_QSPINLOCKS_LITE Add a new PARAVIRT_QSPINLOCKS_LITE config option that allows architectures to use the PV qspinlock code without the need to use or implement a pv_kick() function, thus eliminating the atomic unlock overhead. The non-atomic queued_spin_unlock() can be used instead. The pv_wait() function will still be needed, but it can be a dummy function. With that option set, the hybrid PV queued/unfair locking code should still be able to make it performant enough in a paravirtualized environment. Signed-off-by: Waiman Long --- kernel/Kconfig.locks | 4 +++ kernel/locking/lock_events_list.h | 3 ++ kernel/locking/qspinlock_paravirt.h | 49 ++++++++++++++++++++++++----- kernel/locking/qspinlock_stat.h | 5 +-- 4 files changed, 52 insertions(+), 9 deletions(-) diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 3de8fd11873b..1824ba8c44a9 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -243,6 +243,10 @@ config QUEUED_SPINLOCKS def_bool y if ARCH_USE_QUEUED_SPINLOCKS depends on SMP +config PARAVIRT_QSPINLOCKS_LITE + bool + depends on QUEUED_SPINLOCKS && PARAVIRT_SPINLOCKS + config BPF_ARCH_SPINLOCK bool diff --git a/kernel/locking/lock_events_list.h b/kernel/locking/lock_events_list.h index 239039d0ce21..9ae07a7148e8 100644 --- a/kernel/locking/lock_events_list.h +++ b/kernel/locking/lock_events_list.h @@ -22,11 +22,14 @@ /* * Locking events for PV qspinlock. */ +#ifndef CONFIG_PARAVIRT_QSPINLOCKS_LITE LOCK_EVENT(pv_hash_hops) /* Average # of hops per hashing operation */ LOCK_EVENT(pv_kick_unlock) /* # of vCPU kicks issued at unlock time */ LOCK_EVENT(pv_kick_wake) /* # of vCPU kicks for pv_latency_wake */ LOCK_EVENT(pv_latency_kick) /* Average latency (ns) of vCPU kick */ LOCK_EVENT(pv_latency_wake) /* Average latency (ns) of kick-to-wakeup */ +#endif + LOCK_EVENT(pv_lock_stealing) /* # of lock stealing operations */ LOCK_EVENT(pv_spurious_wakeup) /* # of spurious wakeups in non-head vCPUs */ LOCK_EVENT(pv_wait_again) /* # of wait's after queue head vCPU kick */ diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 8eec58320b85..2d24563aa9b9 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -77,6 +77,23 @@ struct pv_node { * This hybrid PV queued/unfair lock combines the best attributes of a * queued lock (no lock starvation) and an unfair lock (good performance * on not heavily contended locks). + * + * PV lock lite + * ------------ + * + * By default, the PV lock uses two hypervisor specific functions pv_wait() + * and pv_kick() to release the vcpu back to the hypervisor and request the + * hypervisor to put the given vcpu online again respectively. + * + * The pv_kick() function is called at unlock time and requires the use of + * an atomic instruction to prevent missed wakeup. The unlock overhead of + * the PV lock is a major reason why the PV lock is slightly slower than + * the native lock. Not all the hypervisors need to really use both + * pv_wait() and pv_kick(). The PARAVIRT_QSPINLOCKS_LITE config option + * enables a lighter version of PV lock that relies mainly on the hybrid + * queued/unfair lock. The pv_wait() function will be used if provided. + * The pv_kick() function isn't used to eliminate the unlock overhead and + * the non-atomic queued_spin_unlock() can be used. */ #define queued_spin_trylock(l) pv_hybrid_queued_unfair_trylock(l) static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock) @@ -153,6 +170,7 @@ static __always_inline int trylock_clear_pending(struct qspinlock *lock) } #endif /* _Q_PENDING_BITS == 8 */ +#ifndef CONFIG_PARAVIRT_QSPINLOCKS_LITE /* * Lock and MCS node addresses hash table for fast lookup * @@ -410,6 +428,29 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock) } #endif /* __pv_queued_spin_unlock */ +static inline void set_pv_node_running(struct pv_node *pn) +{ + /* + * If pv_kick_node() changed us to vcpu_hashed, retain that value so + * that pv_wait_head_or_lock() will not try to hash this lock. + */ + cmpxchg(&pn->state, vcpu_halted, vcpu_running); +} +#else +static inline bool pv_hash_lock(struct qspinlock *lock, struct pv_node *node) +{ + return false; +} + +static inline void pv_kick_node(struct qspinlock *lock, + struct mcs_spinlock *node) { } + +static inline void set_pv_node_running(struct pv_node *pn) +{ + pn->state = vcpu_running; +} +#endif /* CONFIG_PARAVIRT_QSPINLOCKS_LITE */ + /* * Return true if when it is time to check the previous node which is not * in a running state. @@ -475,13 +516,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) lockevent_cond_inc(pv_wait_early, wait_early); pv_wait(&pn->state, vcpu_halted); } - - /* - * If pv_kick_node() changed us to vcpu_hashed, retain that - * value so that pv_wait_head_or_lock() knows to not also try - * to hash this lock. - */ - cmpxchg(&pn->state, vcpu_halted, vcpu_running); + set_pv_node_running(pn); /* * If the locked flag is still not set after wakeup, it is a diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h index e625bb410aa2..e9f63240785b 100644 --- a/kernel/locking/qspinlock_stat.h +++ b/kernel/locking/qspinlock_stat.h @@ -7,7 +7,8 @@ #include "lock_events.h" #ifdef CONFIG_LOCK_EVENT_COUNTS -#ifdef CONFIG_PARAVIRT_SPINLOCKS +#if defined(CONFIG_PARAVIRT_SPINLOCKS) && \ + !defined(CONFIG_PARAVIRT_QSPINLOCKS_LITE) /* * Collect pvqspinlock locking event counts */ @@ -133,7 +134,7 @@ static inline void __pv_wait(u8 *ptr, u8 val) #define pv_kick(c) __pv_kick(c) #define pv_wait(p, v) __pv_wait(p, v) -#endif /* CONFIG_PARAVIRT_SPINLOCKS */ +#endif /* CONFIG_PARAVIRT_SPINLOCKS && !CONFIG_PARAVIRT_QSPINLOCKS_LITE */ #else /* CONFIG_LOCK_EVENT_COUNTS */ -- 2.18.1