linux-kernel - Re: [PATCH v4] locking/mutex: Prevent lock starvation when spinning is disabled

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <57B73A62.9020901@hpe.com>
Date:   Fri, 19 Aug 2016 12:57:06 -0400
From:   Waiman Long <waiman.long@....com>
To:     Jason Low <jason.low2@....com>
CC:     Peter Zijlstra <peterz@...radead.org>,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        Ding Tianhong <dingtianhong@...wei.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Will Deacon <Will.Deacon@....com>,
        Ingo Molnar <mingo@...hat.com>, <imre.deak@...el.com>,
        <linux-kernel@...r.kernel.org>,
        Davidlohr Bueso <dave@...olabs.net>,
        Tim Chen <tim.c.chen@...ux.intel.com>, <terry.rudd@....com>,
        "Paul E. McKenney" <paulmck@...ibm.com>, <jason.low2@...com>
Subject: Re: [PATCH v4] locking/mutex: Prevent lock starvation when spinning
 is disabled

On 08/18/2016 08:39 PM, Jason Low wrote:
> Imre reported an issue where threads are getting starved when trying
> to acquire a mutex. Threads acquiring a mutex can get arbitrarily delayed
> sleeping on a mutex because other threads can continually steal the lock
> in the fastpath and/or through optimistic spinning.
>
> Waiman has developed patches that allow waiters to return to optimistic
> spinning, thus reducing the probability that starvation occurs. However,
> Imre still sees this starvation problem in the workloads when optimistic
> spinning is disabled.
>
> This patch adds an additional boolean to the mutex that gets used in
> the CONFIG_SMP&&  !CONFIG_MUTEX_SPIN_ON_OWNER cases. The flag signifies
> whether or not other threads need to yield to a waiter and gets set
> when a waiter spends too much time waiting for the mutex. The threshold
> is currently set to 16 wakeups, and once the wakeup threshold is exceeded,
> other threads must yield to the top waiter. The flag gets cleared
> immediately after the top waiter acquires the mutex.
>
> This prevents waiters from getting starved without sacrificing much
> much performance, as lock stealing is still allowed and only
> temporarily disabled when it is detected that a waiter has been waiting
> for too long.
>
> Reported-by: Imre Deak<imre.deak@...el.com>
> Signed-off-by: Jason Low<jason.low2@....com>
> ---
>   include/linux/mutex.h  |   2 +
>   kernel/locking/mutex.c | 122 +++++++++++++++++++++++++++++++++++++++----------
>   2 files changed, 99 insertions(+), 25 deletions(-)
>
> diff --git a/include/linux/mutex.h b/include/linux/mutex.h
> index f8e91ad..988c020 100644
> --- a/include/linux/mutex.h
> +++ b/include/linux/mutex.h
> @@ -58,6 +58,8 @@ struct mutex {
>   #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
>   	struct optimistic_spin_queue osq; /* Spinner MCS lock */
>   	int waiter_spinning;
> +#elif defined(CONFIG_SMP)
> +	int yield_to_waiter;
>   #endif
>   #ifdef CONFIG_DEBUG_MUTEXES
>   	void			*magic;
> diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> index 64a0bfa..e078c49 100644
> --- a/kernel/locking/mutex.c
> +++ b/kernel/locking/mutex.c
> @@ -56,6 +56,8 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
>   #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
>   	osq_lock_init(&lock->osq);
>   	lock->waiter_spinning = false;
> +#elif defined(CONFIG_SMP)
> +	lock->yield_to_waiter = false;
>   #endif
>
>   	debug_mutex_init(lock, name, key);
> @@ -72,6 +74,9 @@ EXPORT_SYMBOL(__mutex_init);
>    */
>   __visible void __sched __mutex_lock_slowpath(atomic_t *lock_count);
>
> +
> +static inline bool need_yield_to_waiter(struct mutex *lock);
> +
>   /**
>    * mutex_lock - acquire the mutex
>    * @lock: the mutex to be acquired
> @@ -100,7 +105,10 @@ void __sched mutex_lock(struct mutex *lock)
>   	 * The locking fastpath is the 1->0 transition from
>   	 * 'unlocked' into 'locked' state.
>   	 */
> -	__mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
> +	if (!need_yield_to_waiter(lock))
> +		__mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
> +	else
> +		__mutex_lock_slowpath(&lock->count);
>   	mutex_set_owner(lock);
>   }
>
> @@ -449,6 +457,49 @@ static bool mutex_optimistic_spin(struct mutex *lock,
>   }
>   #endif
>
> +#if !defined(CONFIG_MUTEX_SPIN_ON_OWNER)&&  defined(CONFIG_SMP)
> +
> +#define MUTEX_WAKEUP_THRESHOLD 16
> +
> +static inline void update_yield_to_waiter(struct mutex *lock, int *wakeups)
> +{
> +	if (++(*wakeups)>  MUTEX_WAKEUP_THRESHOLD&&  !lock->yield_to_waiter)
> +		lock->yield_to_waiter = true;
> +}
> +
> +static inline void clear_yield_to_waiter(struct mutex *lock,
> +					 struct mutex_waiter *waiter)
> +{
> +	/*  Only clear yield_to_waiter if we are the top waiter. */
> +	if (lock->wait_list.next ==&waiter->list&&  lock->yield_to_waiter)
> +		lock->yield_to_waiter = false;
> +}
> +
> +static inline bool need_yield_to_waiter(struct mutex *lock)
> +{
> +	return unlikely(lock->yield_to_waiter);
> +}
> +
> +#else /* !yield_to_waiter */
> +
> +static inline void update_yield_to_waiter(struct mutex *lock, int *wakeups)
> +{
> +	return;
> +}
> +
> +static inline void clear_yield_to_waiter(struct mutex *lock,
> +					 struct mutex_waiter *waiter)
> +{
> +	return;
> +}
> +
> +static inline bool need_yield_to_waiter(struct mutex *lock)
> +{
> +	return false;
> +}
> +
> +#endif /* yield_to_waiter */
> +
>   __visible __used noinline
>   void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
>
> @@ -541,6 +592,12 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
>   	return 0;
>   }
>
> +static inline bool __mutex_trylock_pending(struct mutex *lock)
> +{
> +	return atomic_read(&lock->count)>= 0&&
> +	       atomic_xchg_acquire(&lock->count, -1) == 1;
> +}
> +

Maybe you can make a more general __mutex_trylock function that is used 
in all three trylock attempts in the slowpath. For example,

static inline bool __mutex_trylock(struct mutex *lock, bool waiter)
{
     if (waiter) {
         return atomic_read(&lock->count) >= 0 &&
                atomic_xchg_acquire(&lock->count, -1) == 1;
     } else {
         return !need_yield_to_waiter(lock) &&
                !mutex_is_locked(lock) &&
                ((atomic_xchg_acquire(&lock->count, 0) == 1);
     }
}
>   /*
>    * Lock a mutex (possibly interruptible), slowpath:
>    */
> @@ -553,7 +610,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>   	struct mutex_waiter waiter;
>   	unsigned long flags;
>   	bool  acquired = false;	/* True if the lock is acquired */
> -	int ret;
> +	int ret, wakeups = 0;
>
>   	if (use_ww_ctx) {
>   		struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
> @@ -576,7 +633,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>   	 * Once more, try to acquire the lock. Only try-lock the mutex if
>   	 * it is unlocked to reduce unnecessary xchg() operations.
>   	 */
> -	if (!mutex_is_locked(lock)&&
> +	if (!need_yield_to_waiter(lock)&&  !mutex_is_locked(lock)&&
>   	(atomic_xchg_acquire(&lock->count, 0) == 1))
>   		goto skip_wait;
>
> @@ -587,24 +644,18 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>   	list_add_tail(&waiter.list,&lock->wait_list);
>   	waiter.task = task;
>
> +	/*
> +	 * If this is the first waiter, mark the lock as having pending
> +	 * waiters, if we happen to acquire it while doing so, yay!
> +	 */
> +	if (list_is_singular(&lock->wait_list)&&
> +	    __mutex_trylock_pending(lock))
> +		goto remove_waiter;
> +
>   	lock_contended(&lock->dep_map, ip);
>
>   	while (!acquired) {
>   		/*
> -		 * Lets try to take the lock again - this is needed even if
> -		 * we get here for the first time (shortly after failing to
> -		 * acquire the lock), to make sure that we get a wakeup once
> -		 * it's unlocked. Later on, if we sleep, this is the
> -		 * operation that gives us the lock. We xchg it to -1, so
> -		 * that when we release the lock, we properly wake up the
> -		 * other waiters. We only attempt the xchg if the count is
> -		 * non-negative in order to avoid unnecessary xchg operations:
> -		 */
> -		if (atomic_read(&lock->count)>= 0&&
> -		    (atomic_xchg_acquire(&lock->count, -1) == 1))
> -			break;
> -
> -		/*
>   		 * got a signal? (This code gets eliminated in the
>   		 * TASK_UNINTERRUPTIBLE case.)
>   		 */
> @@ -631,9 +682,21 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>   		acquired = mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx,
>   						 true);
>   		spin_lock_mutex(&lock->wait_lock, flags);
> +
> +		update_yield_to_waiter(lock,&wakeups);
> +
> +		/*
> +		 * Try-acquire now that we got woken at the head of the queue
> +		 * or we received a signal.
> +		 */
> +		if (__mutex_trylock_pending(lock))
> +			break;

That is not quite right. The lock may have been acquired in the 
optimistic spinning loop. You either have to move it back to the top or 
add a "!acquired" check before the trylock.

Cheers,
Longman