This patch adds the priority inheritance (PI) to the read / write locks. When a task is blocked on the lock that eventually is owned by a reader in the PI chain, it will boost all the readers if they are of lower priority than the blocked task. Signed-off-by: Steven Rostedt --- include/linux/init_task.h | 8 +++ include/linux/rt_lock.h | 4 + kernel/fork.c | 1 kernel/rtmutex.c | 115 ++++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 118 insertions(+), 10 deletions(-) Index: linux-2.6.24.4-rt4/include/linux/rt_lock.h =================================================================== --- linux-2.6.24.4-rt4.orig/include/linux/rt_lock.h 2008-03-25 23:13:17.000000000 -0400 +++ linux-2.6.24.4-rt4/include/linux/rt_lock.h 2008-03-25 23:14:47.000000000 -0400 @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef CONFIG_PREEMPT_RT /* @@ -66,6 +67,7 @@ struct rw_mutex { atomic_t count; /* number of times held for read */ atomic_t owners; /* number of owners as readers */ struct list_head readers; + int prio; }; /* @@ -98,6 +100,7 @@ typedef struct { #define __RW_LOCK_UNLOCKED(name) (rwlock_t) \ { .owners.mutex = __RT_SPIN_INITIALIZER(name.owners.mutex), \ + .owners.prio = MAX_PRIO, \ RW_DEP_MAP_INIT(name) } #else /* !PREEMPT_RT */ @@ -196,6 +199,7 @@ extern int __bad_func_type(void); #define __RWSEM_INITIALIZER(name) \ { .owners.mutex = __RT_MUTEX_INITIALIZER(name.owners.mutex), \ + .owners.prio = MAX_PRIO, \ RW_DEP_MAP_INIT(name) } #define DECLARE_RWSEM(lockname) \ Index: linux-2.6.24.4-rt4/kernel/rtmutex.c =================================================================== --- linux-2.6.24.4-rt4.orig/kernel/rtmutex.c 2008-03-25 23:13:17.000000000 -0400 +++ linux-2.6.24.4-rt4/kernel/rtmutex.c 2008-03-25 23:14:47.000000000 -0400 @@ -133,6 +133,8 @@ static inline void init_lists(struct rt_ } } +static int rt_mutex_get_readers_prio(struct task_struct *task, int prio); + /* * Calculate task priority from the waiter list priority * @@ -143,6 +145,8 @@ int rt_mutex_getprio(struct task_struct { int prio = min(task->normal_prio, get_rcu_prio(task)); + prio = rt_mutex_get_readers_prio(task, prio); + if (likely(!task_has_pi_waiters(task))) return prio; @@ -185,6 +189,11 @@ static void rt_mutex_adjust_prio(struct */ int max_lock_depth = 1024; +static int rt_mutex_adjust_readers(struct rt_mutex *orig_lock, + struct rt_mutex_waiter *orig_waiter, + struct task_struct *top_task, + struct rt_mutex *lock, + int recursion_depth); /* * Adjust the priority chain. Also used for deadlock detection. * Decreases task's usage by one - may thus free the task. @@ -194,7 +203,8 @@ static int rt_mutex_adjust_prio_chain(st int deadlock_detect, struct rt_mutex *orig_lock, struct rt_mutex_waiter *orig_waiter, - struct task_struct *top_task) + struct task_struct *top_task, + int recursion_depth) { struct rt_mutex *lock; struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; @@ -296,8 +306,13 @@ static int rt_mutex_adjust_prio_chain(st /* Grab the next task */ task = rt_mutex_owner(lock); - /* Writers do not boost their readers. */ + /* + * Readers are special. We may need to boost more than one owner. + */ if (task == RT_RW_READER) { + ret = rt_mutex_adjust_readers(orig_lock, orig_waiter, + top_task, lock, + recursion_depth); spin_unlock_irqrestore(&lock->wait_lock, flags); goto out; } @@ -479,9 +494,12 @@ static int task_blocks_on_rt_mutex(struc spin_unlock(¤t->pi_lock); if (waiter == rt_mutex_top_waiter(lock)) { - /* readers are not handled */ - if (owner == RT_RW_READER) - return 0; + /* readers are handled differently */ + if (owner == RT_RW_READER) { + res = rt_mutex_adjust_readers(lock, waiter, + current, lock, 0); + return res; + } spin_lock(&owner->pi_lock); plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters); @@ -508,7 +526,7 @@ static int task_blocks_on_rt_mutex(struc spin_unlock_irqrestore(&lock->wait_lock, flags); res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, - current); + current, 0); spin_lock_irq(&lock->wait_lock); @@ -625,7 +643,7 @@ static void remove_waiter(struct rt_mute spin_unlock_irqrestore(&lock->wait_lock, flags); - rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current); + rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current, 0); spin_lock_irq(&lock->wait_lock); } @@ -652,7 +670,7 @@ void rt_mutex_adjust_pi(struct task_stru get_task_struct(task); spin_unlock_irqrestore(&task->pi_lock, flags); - rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task); + rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task, 0); } /* @@ -1088,7 +1106,6 @@ static int try_to_take_rw_read(struct rw if (rt_rwlock_pending_writer(rwm)) return 0; if (rt_mutex_has_waiters(mutex)) { - /* readers don't do PI */ waiter = rt_mutex_top_waiter(mutex); if (current->prio >= waiter->task->prio) return 0; @@ -1102,7 +1119,7 @@ static int try_to_take_rw_read(struct rw spin_unlock(&mtxowner->pi_lock); } } else if (rt_mutex_has_waiters(mutex)) { - /* Readers don't do PI */ + /* Readers do things differently with respect to PI */ waiter = rt_mutex_top_waiter(mutex); spin_lock(¤t->pi_lock); plist_del(&waiter->pi_list_entry, ¤t->pi_waiters); @@ -1608,6 +1625,7 @@ rt_read_slowunlock(struct rw_mutex *rwm, /* If no one is blocked, then clear all ownership */ if (!rt_mutex_has_waiters(mutex)) { + rwm->prio = MAX_PRIO; /* * If count is not zero, we are under the limit with * no other readers. @@ -1838,11 +1856,88 @@ void rt_mutex_rwsem_init(struct rw_mutex rwm->owner = NULL; atomic_set(&rwm->count, 0); atomic_set(&rwm->owners, 0); + rwm->prio = MAX_PRIO; INIT_LIST_HEAD(&rwm->readers); __rt_mutex_init(mutex, name); } +static int rt_mutex_get_readers_prio(struct task_struct *task, int prio) +{ + struct reader_lock_struct *rls; + struct rw_mutex *rwm; + int lock_prio; + int i; + + for (i = 0; i < task->reader_lock_count; i++) { + rls = &task->owned_read_locks[i]; + rwm = rls->lock; + if (rwm) { + lock_prio = rwm->prio; + if (prio > lock_prio) + prio = lock_prio; + } + } + + return prio; +} + +static int rt_mutex_adjust_readers(struct rt_mutex *orig_lock, + struct rt_mutex_waiter *orig_waiter, + struct task_struct *top_task, + struct rt_mutex *lock, + int recursion_depth) +{ + struct reader_lock_struct *rls; + struct rt_mutex_waiter *waiter; + struct task_struct *task; + struct rw_mutex *rwm = container_of(lock, struct rw_mutex, mutex); + + if (rt_mutex_has_waiters(lock)) { + waiter = rt_mutex_top_waiter(lock); + /* + * Do we need to grab the task->pi_lock? + * Really, we are only reading it. If it + * changes, then that should follow this chain + * too. + */ + rwm->prio = waiter->task->prio; + } else + rwm->prio = MAX_PRIO; + + if (recursion_depth >= MAX_RWLOCK_DEPTH) { + WARN_ON(1); + return 1; + } + + list_for_each_entry(rls, &rwm->readers, list) { + task = rls->task; + get_task_struct(task); + /* + * rt_mutex_adjust_prio_chain will do + * the put_task_struct + */ + rt_mutex_adjust_prio_chain(task, 0, orig_lock, + orig_waiter, top_task, + recursion_depth+1); + } + + return 0; +} +#else +static int rt_mutex_adjust_readers(struct rt_mutex *orig_lock, + struct rt_mutex_waiter *orig_waiter, + struct task_struct *top_task, + struct rt_mutex *lock, + int recursion_depth) +{ + return 0; +} + +static int rt_mutex_get_readers_prio(struct task_struct *task, int prio) +{ + return prio; +} #endif /* CONFIG_PREEMPT_RT */ #ifdef CONFIG_PREEMPT_BKL Index: linux-2.6.24.4-rt4/include/linux/init_task.h =================================================================== --- linux-2.6.24.4-rt4.orig/include/linux/init_task.h 2008-03-25 16:41:47.000000000 -0400 +++ linux-2.6.24.4-rt4/include/linux/init_task.h 2008-03-25 23:14:47.000000000 -0400 @@ -99,6 +99,13 @@ extern struct nsproxy init_nsproxy; #define INIT_PREEMPT_RCU_BOOST(tsk) #endif /* #else #ifdef CONFIG_PREEMPT_RCU_BOOST */ +#ifdef CONFIG_PREEMPT_RT +# define INIT_RW_OWNERS(tsk) .owned_read_locks = { \ + [0 ... (MAX_RWLOCK_DEPTH - 1) ] = { .task = &tsk } }, +#else +# define INIT_RW_OWNERS(tsk) +#endif + extern struct group_info init_groups; #define INIT_STRUCT_PID { \ @@ -189,6 +196,7 @@ extern struct group_info init_groups; INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ INIT_PREEMPT_RCU_BOOST(tsk) \ + INIT_RW_OWNERS(tsk) \ } Index: linux-2.6.24.4-rt4/kernel/fork.c =================================================================== --- linux-2.6.24.4-rt4.orig/kernel/fork.c 2008-03-25 23:13:17.000000000 -0400 +++ linux-2.6.24.4-rt4/kernel/fork.c 2008-03-25 23:14:47.000000000 -0400 @@ -1214,6 +1214,7 @@ static struct task_struct *copy_process( INIT_LIST_HEAD(&p->owned_read_locks[i].list); p->owned_read_locks[i].count = 0; p->owned_read_locks[i].lock = NULL; + p->owned_read_locks[i].task = p; } } #endif -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/