[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <57B73A62.9020901@hpe.com>
Date: Fri, 19 Aug 2016 12:57:06 -0400
From: Waiman Long <waiman.long@....com>
To: Jason Low <jason.low2@....com>
CC: Peter Zijlstra <peterz@...radead.org>,
Linus Torvalds <torvalds@...ux-foundation.org>,
Ding Tianhong <dingtianhong@...wei.com>,
Thomas Gleixner <tglx@...utronix.de>,
Will Deacon <Will.Deacon@....com>,
Ingo Molnar <mingo@...hat.com>, <imre.deak@...el.com>,
<linux-kernel@...r.kernel.org>,
Davidlohr Bueso <dave@...olabs.net>,
Tim Chen <tim.c.chen@...ux.intel.com>, <terry.rudd@....com>,
"Paul E. McKenney" <paulmck@...ibm.com>, <jason.low2@...com>
Subject: Re: [PATCH v4] locking/mutex: Prevent lock starvation when spinning
is disabled
On 08/18/2016 08:39 PM, Jason Low wrote:
> Imre reported an issue where threads are getting starved when trying
> to acquire a mutex. Threads acquiring a mutex can get arbitrarily delayed
> sleeping on a mutex because other threads can continually steal the lock
> in the fastpath and/or through optimistic spinning.
>
> Waiman has developed patches that allow waiters to return to optimistic
> spinning, thus reducing the probability that starvation occurs. However,
> Imre still sees this starvation problem in the workloads when optimistic
> spinning is disabled.
>
> This patch adds an additional boolean to the mutex that gets used in
> the CONFIG_SMP&& !CONFIG_MUTEX_SPIN_ON_OWNER cases. The flag signifies
> whether or not other threads need to yield to a waiter and gets set
> when a waiter spends too much time waiting for the mutex. The threshold
> is currently set to 16 wakeups, and once the wakeup threshold is exceeded,
> other threads must yield to the top waiter. The flag gets cleared
> immediately after the top waiter acquires the mutex.
>
> This prevents waiters from getting starved without sacrificing much
> much performance, as lock stealing is still allowed and only
> temporarily disabled when it is detected that a waiter has been waiting
> for too long.
>
> Reported-by: Imre Deak<imre.deak@...el.com>
> Signed-off-by: Jason Low<jason.low2@....com>
> ---
> include/linux/mutex.h | 2 +
> kernel/locking/mutex.c | 122 +++++++++++++++++++++++++++++++++++++++----------
> 2 files changed, 99 insertions(+), 25 deletions(-)
>
> diff --git a/include/linux/mutex.h b/include/linux/mutex.h
> index f8e91ad..988c020 100644
> --- a/include/linux/mutex.h
> +++ b/include/linux/mutex.h
> @@ -58,6 +58,8 @@ struct mutex {
> #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> struct optimistic_spin_queue osq; /* Spinner MCS lock */
> int waiter_spinning;
> +#elif defined(CONFIG_SMP)
> + int yield_to_waiter;
> #endif
> #ifdef CONFIG_DEBUG_MUTEXES
> void *magic;
> diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> index 64a0bfa..e078c49 100644
> --- a/kernel/locking/mutex.c
> +++ b/kernel/locking/mutex.c
> @@ -56,6 +56,8 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
> #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> osq_lock_init(&lock->osq);
> lock->waiter_spinning = false;
> +#elif defined(CONFIG_SMP)
> + lock->yield_to_waiter = false;
> #endif
>
> debug_mutex_init(lock, name, key);
> @@ -72,6 +74,9 @@ EXPORT_SYMBOL(__mutex_init);
> */
> __visible void __sched __mutex_lock_slowpath(atomic_t *lock_count);
>
> +
> +static inline bool need_yield_to_waiter(struct mutex *lock);
> +
> /**
> * mutex_lock - acquire the mutex
> * @lock: the mutex to be acquired
> @@ -100,7 +105,10 @@ void __sched mutex_lock(struct mutex *lock)
> * The locking fastpath is the 1->0 transition from
> * 'unlocked' into 'locked' state.
> */
> - __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
> + if (!need_yield_to_waiter(lock))
> + __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
> + else
> + __mutex_lock_slowpath(&lock->count);
> mutex_set_owner(lock);
> }
>
> @@ -449,6 +457,49 @@ static bool mutex_optimistic_spin(struct mutex *lock,
> }
> #endif
>
> +#if !defined(CONFIG_MUTEX_SPIN_ON_OWNER)&& defined(CONFIG_SMP)
> +
> +#define MUTEX_WAKEUP_THRESHOLD 16
> +
> +static inline void update_yield_to_waiter(struct mutex *lock, int *wakeups)
> +{
> + if (++(*wakeups)> MUTEX_WAKEUP_THRESHOLD&& !lock->yield_to_waiter)
> + lock->yield_to_waiter = true;
> +}
> +
> +static inline void clear_yield_to_waiter(struct mutex *lock,
> + struct mutex_waiter *waiter)
> +{
> + /* Only clear yield_to_waiter if we are the top waiter. */
> + if (lock->wait_list.next ==&waiter->list&& lock->yield_to_waiter)
> + lock->yield_to_waiter = false;
> +}
> +
> +static inline bool need_yield_to_waiter(struct mutex *lock)
> +{
> + return unlikely(lock->yield_to_waiter);
> +}
> +
> +#else /* !yield_to_waiter */
> +
> +static inline void update_yield_to_waiter(struct mutex *lock, int *wakeups)
> +{
> + return;
> +}
> +
> +static inline void clear_yield_to_waiter(struct mutex *lock,
> + struct mutex_waiter *waiter)
> +{
> + return;
> +}
> +
> +static inline bool need_yield_to_waiter(struct mutex *lock)
> +{
> + return false;
> +}
> +
> +#endif /* yield_to_waiter */
> +
> __visible __used noinline
> void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
>
> @@ -541,6 +592,12 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
> return 0;
> }
>
> +static inline bool __mutex_trylock_pending(struct mutex *lock)
> +{
> + return atomic_read(&lock->count)>= 0&&
> + atomic_xchg_acquire(&lock->count, -1) == 1;
> +}
> +
Maybe you can make a more general __mutex_trylock function that is used
in all three trylock attempts in the slowpath. For example,
static inline bool __mutex_trylock(struct mutex *lock, bool waiter)
{
if (waiter) {
return atomic_read(&lock->count) >= 0 &&
atomic_xchg_acquire(&lock->count, -1) == 1;
} else {
return !need_yield_to_waiter(lock) &&
!mutex_is_locked(lock) &&
((atomic_xchg_acquire(&lock->count, 0) == 1);
}
}
> /*
> * Lock a mutex (possibly interruptible), slowpath:
> */
> @@ -553,7 +610,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> struct mutex_waiter waiter;
> unsigned long flags;
> bool acquired = false; /* True if the lock is acquired */
> - int ret;
> + int ret, wakeups = 0;
>
> if (use_ww_ctx) {
> struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
> @@ -576,7 +633,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> * Once more, try to acquire the lock. Only try-lock the mutex if
> * it is unlocked to reduce unnecessary xchg() operations.
> */
> - if (!mutex_is_locked(lock)&&
> + if (!need_yield_to_waiter(lock)&& !mutex_is_locked(lock)&&
> (atomic_xchg_acquire(&lock->count, 0) == 1))
> goto skip_wait;
>
> @@ -587,24 +644,18 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> list_add_tail(&waiter.list,&lock->wait_list);
> waiter.task = task;
>
> + /*
> + * If this is the first waiter, mark the lock as having pending
> + * waiters, if we happen to acquire it while doing so, yay!
> + */
> + if (list_is_singular(&lock->wait_list)&&
> + __mutex_trylock_pending(lock))
> + goto remove_waiter;
> +
> lock_contended(&lock->dep_map, ip);
>
> while (!acquired) {
> /*
> - * Lets try to take the lock again - this is needed even if
> - * we get here for the first time (shortly after failing to
> - * acquire the lock), to make sure that we get a wakeup once
> - * it's unlocked. Later on, if we sleep, this is the
> - * operation that gives us the lock. We xchg it to -1, so
> - * that when we release the lock, we properly wake up the
> - * other waiters. We only attempt the xchg if the count is
> - * non-negative in order to avoid unnecessary xchg operations:
> - */
> - if (atomic_read(&lock->count)>= 0&&
> - (atomic_xchg_acquire(&lock->count, -1) == 1))
> - break;
> -
> - /*
> * got a signal? (This code gets eliminated in the
> * TASK_UNINTERRUPTIBLE case.)
> */
> @@ -631,9 +682,21 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> acquired = mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx,
> true);
> spin_lock_mutex(&lock->wait_lock, flags);
> +
> + update_yield_to_waiter(lock,&wakeups);
> +
> + /*
> + * Try-acquire now that we got woken at the head of the queue
> + * or we received a signal.
> + */
> + if (__mutex_trylock_pending(lock))
> + break;
That is not quite right. The lock may have been acquired in the
optimistic spinning loop. You either have to move it back to the top or
add a "!acquired" check before the trylock.
Cheers,
Longman
Powered by blists - more mailing lists