lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <1471567197.4991.41.camel@j-VirtualBox>
Date:   Thu, 18 Aug 2016 17:39:57 -0700
From:   Jason Low <jason.low2@....com>
To:     Peter Zijlstra <peterz@...radead.org>
Cc:     Linus Torvalds <torvalds@...ux-foundation.org>,
        Ding Tianhong <dingtianhong@...wei.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Will Deacon <Will.Deacon@....com>,
        Jason Low <jason.low2@....com>, Ingo Molnar <mingo@...hat.com>,
        imre.deak@...el.com, linux-kernel@...r.kernel.org,
        Waiman Long <Waiman.Long@....com>,
        Davidlohr Bueso <dave@...olabs.net>,
        Tim Chen <tim.c.chen@...ux.intel.com>, terry.rudd@....com,
        "Paul E. McKenney" <paulmck@...ibm.com>, jason.low2@...com
Subject: [PATCH v4] locking/mutex: Prevent lock starvation when spinning is
 disabled

Imre reported an issue where threads are getting starved when trying
to acquire a mutex. Threads acquiring a mutex can get arbitrarily delayed
sleeping on a mutex because other threads can continually steal the lock
in the fastpath and/or through optimistic spinning.

Waiman has developed patches that allow waiters to return to optimistic
spinning, thus reducing the probability that starvation occurs. However,
Imre still sees this starvation problem in the workloads when optimistic
spinning is disabled.

This patch adds an additional boolean to the mutex that gets used in
the CONFIG_SMP && !CONFIG_MUTEX_SPIN_ON_OWNER cases. The flag signifies
whether or not other threads need to yield to a waiter and gets set
when a waiter spends too much time waiting for the mutex. The threshold
is currently set to 16 wakeups, and once the wakeup threshold is exceeded,
other threads must yield to the top waiter. The flag gets cleared
immediately after the top waiter acquires the mutex.

This prevents waiters from getting starved without sacrificing much
much performance, as lock stealing is still allowed and only
temporarily disabled when it is detected that a waiter has been waiting
for too long.

Reported-by: Imre Deak <imre.deak@...el.com>
Signed-off-by: Jason Low <jason.low2@....com>
---
 include/linux/mutex.h  |   2 +
 kernel/locking/mutex.c | 122 +++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 99 insertions(+), 25 deletions(-)

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index f8e91ad..988c020 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -58,6 +58,8 @@ struct mutex {
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
 	struct optimistic_spin_queue osq; /* Spinner MCS lock */
 	int waiter_spinning;
+#elif defined(CONFIG_SMP)
+	int yield_to_waiter;
 #endif
 #ifdef CONFIG_DEBUG_MUTEXES
 	void			*magic;
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 64a0bfa..e078c49 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -56,6 +56,8 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
 	osq_lock_init(&lock->osq);
 	lock->waiter_spinning = false;
+#elif defined(CONFIG_SMP)
+	lock->yield_to_waiter = false;
 #endif
 
 	debug_mutex_init(lock, name, key);
@@ -72,6 +74,9 @@ EXPORT_SYMBOL(__mutex_init);
  */
 __visible void __sched __mutex_lock_slowpath(atomic_t *lock_count);
 
+
+static inline bool need_yield_to_waiter(struct mutex *lock);
+
 /**
  * mutex_lock - acquire the mutex
  * @lock: the mutex to be acquired
@@ -100,7 +105,10 @@ void __sched mutex_lock(struct mutex *lock)
 	 * The locking fastpath is the 1->0 transition from
 	 * 'unlocked' into 'locked' state.
 	 */
-	__mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
+	if (!need_yield_to_waiter(lock))
+		__mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
+	else
+		__mutex_lock_slowpath(&lock->count);
 	mutex_set_owner(lock);
 }
 
@@ -449,6 +457,49 @@ static bool mutex_optimistic_spin(struct mutex *lock,
 }
 #endif
 
+#if !defined(CONFIG_MUTEX_SPIN_ON_OWNER) && defined(CONFIG_SMP)
+
+#define MUTEX_WAKEUP_THRESHOLD 16
+
+static inline void update_yield_to_waiter(struct mutex *lock, int *wakeups)
+{
+	if (++(*wakeups) > MUTEX_WAKEUP_THRESHOLD && !lock->yield_to_waiter)
+		lock->yield_to_waiter = true;
+}
+
+static inline void clear_yield_to_waiter(struct mutex *lock,
+					 struct mutex_waiter *waiter)
+{
+	/*  Only clear yield_to_waiter if we are the top waiter. */
+	if (lock->wait_list.next == &waiter->list && lock->yield_to_waiter)
+		lock->yield_to_waiter = false;
+}
+
+static inline bool need_yield_to_waiter(struct mutex *lock)
+{
+	return unlikely(lock->yield_to_waiter);
+}
+
+#else /* !yield_to_waiter */
+
+static inline void update_yield_to_waiter(struct mutex *lock, int *wakeups)
+{
+	return;
+}
+
+static inline void clear_yield_to_waiter(struct mutex *lock,
+					 struct mutex_waiter *waiter)
+{
+	return;
+}
+
+static inline bool need_yield_to_waiter(struct mutex *lock)
+{
+	return false;
+}
+
+#endif /* yield_to_waiter */
+
 __visible __used noinline
 void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
 
@@ -541,6 +592,12 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
 	return 0;
 }
 
+static inline bool __mutex_trylock_pending(struct mutex *lock)
+{
+	return atomic_read(&lock->count) >= 0 &&
+	       atomic_xchg_acquire(&lock->count, -1) == 1;
+}
+
 /*
  * Lock a mutex (possibly interruptible), slowpath:
  */
@@ -553,7 +610,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	struct mutex_waiter waiter;
 	unsigned long flags;
 	bool  acquired = false;	/* True if the lock is acquired */
-	int ret;
+	int ret, wakeups = 0;
 
 	if (use_ww_ctx) {
 		struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
@@ -576,7 +633,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	 * Once more, try to acquire the lock. Only try-lock the mutex if
 	 * it is unlocked to reduce unnecessary xchg() operations.
 	 */
-	if (!mutex_is_locked(lock) &&
+	if (!need_yield_to_waiter(lock) && !mutex_is_locked(lock) &&
 	    (atomic_xchg_acquire(&lock->count, 0) == 1))
 		goto skip_wait;
 
@@ -587,24 +644,18 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	list_add_tail(&waiter.list, &lock->wait_list);
 	waiter.task = task;
 
+	/*
+	 * If this is the first waiter, mark the lock as having pending
+	 * waiters, if we happen to acquire it while doing so, yay!
+	 */
+	if (list_is_singular(&lock->wait_list) &&
+	    __mutex_trylock_pending(lock))
+		goto remove_waiter;
+
 	lock_contended(&lock->dep_map, ip);
 
 	while (!acquired) {
 		/*
-		 * Lets try to take the lock again - this is needed even if
-		 * we get here for the first time (shortly after failing to
-		 * acquire the lock), to make sure that we get a wakeup once
-		 * it's unlocked. Later on, if we sleep, this is the
-		 * operation that gives us the lock. We xchg it to -1, so
-		 * that when we release the lock, we properly wake up the
-		 * other waiters. We only attempt the xchg if the count is
-		 * non-negative in order to avoid unnecessary xchg operations:
-		 */
-		if (atomic_read(&lock->count) >= 0 &&
-		    (atomic_xchg_acquire(&lock->count, -1) == 1))
-			break;
-
-		/*
 		 * got a signal? (This code gets eliminated in the
 		 * TASK_UNINTERRUPTIBLE case.)
 		 */
@@ -631,9 +682,21 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		acquired = mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx,
 						 true);
 		spin_lock_mutex(&lock->wait_lock, flags);
+
+		update_yield_to_waiter(lock, &wakeups);
+
+		/*
+		 * Try-acquire now that we got woken at the head of the queue
+		 * or we received a signal.
+		 */
+		if (__mutex_trylock_pending(lock))
+			break;
 	}
 	__set_task_state(task, TASK_RUNNING);
 
+	clear_yield_to_waiter(lock, &waiter);
+
+remove_waiter:
 	mutex_remove_waiter(lock, &waiter, task);
 	/* set it to 0 if there are no waiters left: */
 	if (likely(list_empty(&lock->wait_list)))
@@ -659,6 +722,7 @@ unlock:
 	return 0;
 
 err:
+	clear_yield_to_waiter(lock, &waiter);
 	mutex_remove_waiter(lock, &waiter, task);
 	spin_unlock_mutex(&lock->wait_lock, flags);
 	debug_mutex_free_waiter(&waiter);
@@ -843,10 +907,13 @@ __mutex_lock_interruptible_slowpath(struct mutex *lock);
  */
 int __sched mutex_lock_interruptible(struct mutex *lock)
 {
-	int ret;
+	int ret = 1;
 
 	might_sleep();
-	ret =  __mutex_fastpath_lock_retval(&lock->count);
+
+	if (!need_yield_to_waiter(lock))
+		ret =  __mutex_fastpath_lock_retval(&lock->count);
+
 	if (likely(!ret)) {
 		mutex_set_owner(lock);
 		return 0;
@@ -858,10 +925,13 @@ EXPORT_SYMBOL(mutex_lock_interruptible);
 
 int __sched mutex_lock_killable(struct mutex *lock)
 {
-	int ret;
+	int ret = 1;
 
 	might_sleep();
-	ret = __mutex_fastpath_lock_retval(&lock->count);
+
+	if (!need_yield_to_waiter(lock))
+		ret = __mutex_fastpath_lock_retval(&lock->count);
+
 	if (likely(!ret)) {
 		mutex_set_owner(lock);
 		return 0;
@@ -971,11 +1041,12 @@ EXPORT_SYMBOL(mutex_trylock);
 int __sched
 __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
 {
-	int ret;
+	int ret = 1;
 
 	might_sleep();
 
-	ret = __mutex_fastpath_lock_retval(&lock->base.count);
+	if (!need_yield_to_waiter(&lock->base))
+		ret = __mutex_fastpath_lock_retval(&lock->base.count);
 
 	if (likely(!ret)) {
 		ww_mutex_set_context_fastpath(lock, ctx);
@@ -989,11 +1060,12 @@ EXPORT_SYMBOL(__ww_mutex_lock);
 int __sched
 __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
 {
-	int ret;
+	int ret = 1;
 
 	might_sleep();
 
-	ret = __mutex_fastpath_lock_retval(&lock->base.count);
+	if (!need_yield_to_waiter(&lock->base))
+		ret = __mutex_fastpath_lock_retval(&lock->base.count);
 
 	if (likely(!ret)) {
 		ww_mutex_set_context_fastpath(lock, ctx);
-- 
2.1.4



Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ