Now that we have an atomic owner field, we can do explicit lock handoff. Use this to avoid starvation. Signed-off-by: Peter Zijlstra (Intel) --- kernel/locking/mutex.c | 52 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 6 deletions(-) --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -52,6 +52,7 @@ __mutex_init(struct mutex *lock, const c EXPORT_SYMBOL(__mutex_init); #define MUTEX_FLAG_WAITERS 0x01 +#define MUTEX_FLAG_HANDOFF 0x02 #define MUTEX_FLAGS 0x03 @@ -117,6 +118,33 @@ static inline void __mutex_clear_flag(st atomic_long_andnot(flag, &lock->owner); } +static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_waiter *waiter) +{ + return list_first_entry(&lock->wait_list, struct mutex_waiter, list) == waiter; +} + +static void __mutex_handoff(struct mutex *lock, struct task_struct *task) +{ + unsigned long owner = atomic_long_read(&lock->owner); + + for (;;) { + unsigned long old, new; + +#ifdef CONFIG_DEBUG_MUTEXES + DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current); +#endif + + new = (owner & MUTEX_FLAG_WAITERS); + new |= (unsigned long)task; + + old = atomic_long_cmpxchg(&lock->owner, owner, new); + if (old == owner) + break; + + owner = old; + } +} + #ifndef CONFIG_DEBUG_LOCK_ALLOC /* * We split the mutex lock/unlock logic into separate fastpath and @@ -447,7 +475,7 @@ static bool mutex_optimistic_spin(struct } #endif -static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock); +static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long owner); /** * mutex_unlock - release the mutex @@ -468,9 +496,12 @@ void __sched mutex_unlock(struct mutex * DEBUG_LOCKS_WARN_ON(__mutex_owner(lock) != current); #endif - owner = atomic_long_fetch_and(MUTEX_FLAGS, &lock->owner); + owner = atomic_long_read(&lock->owner); + if (!(owner & MUTEX_FLAG_HANDOFF)) + owner = atomic_long_fetch_and(MUTEX_FLAGS, &lock->owner); + if (__owner_flags(owner)) - __mutex_unlock_slowpath(lock); + __mutex_unlock_slowpath(lock, owner); } EXPORT_SYMBOL(mutex_unlock); @@ -568,7 +599,7 @@ __mutex_lock_common(struct mutex *lock, list_add_tail(&waiter.list, &lock->wait_list); waiter.task = task; - if (list_first_entry(&lock->wait_list, struct mutex_waiter, list) == &waiter) { + if (__mutex_waiter_is_first(lock, &waiter)) { __mutex_set_flag(lock, MUTEX_FLAG_WAITERS); /* * We must be sure to set WAITERS before attempting the trylock @@ -605,13 +636,16 @@ __mutex_lock_common(struct mutex *lock, spin_unlock_mutex(&lock->wait_lock, flags); schedule_preempt_disabled(); spin_lock_mutex(&lock->wait_lock, flags); + + if (__mutex_waiter_is_first(lock, &waiter)) + __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF); } __set_task_state(task, TASK_RUNNING); mutex_remove_waiter(lock, &waiter, task); /* set it to 0 if there are no waiters left: */ if (likely(list_empty(&lock->wait_list))) - __mutex_clear_flag(lock, MUTEX_FLAG_WAITERS); + __mutex_clear_flag(lock, MUTEX_FLAGS); debug_mutex_free_waiter(&waiter); @@ -737,8 +771,9 @@ EXPORT_SYMBOL_GPL(__ww_mutex_lock_interr /* * Release the lock, slowpath: */ -static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock) +static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long owner) { + struct task_struct *next = NULL; unsigned long flags; WAKE_Q(wake_q); @@ -752,10 +787,15 @@ static noinline void __sched __mutex_unl list_entry(lock->wait_list.next, struct mutex_waiter, list); + next = waiter->task; + debug_mutex_wake_waiter(lock, waiter); wake_q_add(&wake_q, waiter->task); } + if (owner & MUTEX_FLAG_HANDOFF) + __mutex_handoff(lock, next); + spin_unlock_mutex(&lock->wait_lock, flags); wake_up_q(&wake_q); }