Add support for priority queueing and priority inheritance to the workqueue infrastructure. This is done by replacing the linear linked worklist with a priority sorted plist. The drawback is that this breaks the workqueue barrier, needed to support flush_workqueue() and wait_on_work(). Signed-off-by: Daniel Walker Signed-off-by: Peter Zijlstra --- include/linux/workqueue.h | 7 ++++--- kernel/power/poweroff.c | 1 + kernel/sched.c | 4 ---- kernel/workqueue.c | 40 +++++++++++++++++++++++++--------------- 4 files changed, 30 insertions(+), 22 deletions(-) Index: linux-2.6/include/linux/workqueue.h =================================================================== --- linux-2.6.orig/include/linux/workqueue.h +++ linux-2.6/include/linux/workqueue.h @@ -8,6 +8,7 @@ #include #include #include +#include #include struct workqueue_struct; @@ -26,7 +27,7 @@ struct work_struct { #define WORK_STRUCT_PENDING 0 /* T if work item pending execution */ #define WORK_STRUCT_FLAG_MASK (3UL) #define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK) - struct list_head entry; + struct plist_node entry; work_func_t func; }; @@ -43,7 +44,7 @@ struct execute_work { #define __WORK_INITIALIZER(n, f) { \ .data = WORK_DATA_INIT(), \ - .entry = { &(n).entry, &(n).entry }, \ + .entry = PLIST_NODE_INIT(n.entry, MAX_PRIO), \ .func = (f), \ } @@ -79,7 +80,7 @@ struct execute_work { #define INIT_WORK(_work, _func) \ do { \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ - INIT_LIST_HEAD(&(_work)->entry); \ + plist_node_init(&(_work)->entry, -1); \ PREPARE_WORK((_work), (_func)); \ } while (0) Index: linux-2.6/kernel/power/poweroff.c =================================================================== --- linux-2.6.orig/kernel/power/poweroff.c +++ linux-2.6/kernel/power/poweroff.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include Index: linux-2.6/kernel/workqueue.c =================================================================== --- linux-2.6.orig/kernel/workqueue.c +++ linux-2.6/kernel/workqueue.c @@ -44,7 +44,7 @@ struct cpu_workqueue_struct { spinlock_t lock; - struct list_head worklist; + struct plist_head worklist; wait_queue_head_t more_work; struct work_struct *current_work; @@ -127,16 +127,19 @@ struct cpu_workqueue_struct *get_wq_data static void insert_work(struct cpu_workqueue_struct *cwq, struct work_struct *work, int tail) { + int prio = current->normal_prio; + set_wq_data(work, cwq); /* * Ensure that we get the right work->data if we see the * result of list_add() below, see try_to_grab_pending(). */ smp_wmb(); - if (tail) - list_add_tail(&work->entry, &cwq->worklist); - else - list_add(&work->entry, &cwq->worklist); + plist_node_init(&work->entry, prio); + plist_add(&work->entry, &cwq->worklist); + + if (prio < cwq->thread->prio) + task_setprio(cwq->thread, prio); wake_up(&cwq->more_work); } @@ -168,7 +171,7 @@ int fastcall queue_work(struct workqueue int ret = 0, cpu = raw_smp_processor_id(); if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) { - BUG_ON(!list_empty(&work->entry)); + BUG_ON(!plist_node_empty(&work->entry)); __queue_work(wq_per_cpu(wq, cpu), work); ret = 1; } @@ -222,7 +225,7 @@ int queue_delayed_work_on(int cpu, struc if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) { BUG_ON(timer_pending(timer)); - BUG_ON(!list_empty(&work->entry)); + BUG_ON(!plist_node_empty(&work->entry)); /* This stores cwq for the moment, for the timer_fn */ set_wq_data(work, wq_per_cpu(wq, raw_smp_processor_id())); @@ -264,13 +267,17 @@ static void run_workqueue(struct cpu_wor __FUNCTION__, cwq->run_depth); dump_stack(); } - while (!list_empty(&cwq->worklist)) { - struct work_struct *work = list_entry(cwq->worklist.next, + while (!plist_head_empty(&cwq->worklist)) { + struct work_struct *work = plist_first_entry(&cwq->worklist, struct work_struct, entry); work_func_t f = work->func; + if (likely(cwq->thread->prio != work->entry.prio)) + task_setprio(cwq->thread, work->entry.prio); + cwq->current_work = work; - list_del_init(cwq->worklist.next); + plist_del(&work->entry, &cwq->worklist); + plist_node_init(&work->entry, MAX_PRIO); spin_unlock_irq(&cwq->lock); BUG_ON(get_wq_data(work) != cwq); @@ -283,6 +290,7 @@ static void run_workqueue(struct cpu_wor spin_lock_irq(&cwq->lock); cwq->current_work = NULL; } + task_setprio(cwq->thread, current->normal_prio); cwq->run_depth--; spin_unlock_irq(&cwq->lock); } @@ -301,7 +309,7 @@ static int worker_thread(void *__cwq) prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE); if (!freezing(current) && !kthread_should_stop() && - list_empty(&cwq->worklist)) + plist_head_empty(&cwq->worklist)) schedule(); finish_wait(&cwq->more_work, &wait); @@ -354,7 +362,8 @@ static int flush_cpu_workqueue(struct cp active = 0; spin_lock_irq(&cwq->lock); - if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) { + if (!plist_head_empty(&cwq->worklist) || + cwq->current_work != NULL) { insert_wq_barrier(cwq, &barr, 1); active = 1; } @@ -413,7 +422,7 @@ static int try_to_grab_pending(struct wo return ret; spin_lock_irq(&cwq->lock); - if (!list_empty(&work->entry)) { + if (!plist_node_empty(&work->entry)) { /* * This work is queued, but perhaps we locked the wrong cwq. * In that case we must see the new value after rmb(), see @@ -421,7 +430,8 @@ static int try_to_grab_pending(struct wo */ smp_rmb(); if (cwq == get_wq_data(work)) { - list_del_init(&work->entry); + plist_del(&work->entry, &cwq->worklist); + plist_node_init(&work->entry, MAX_PRIO); ret = 1; } } @@ -747,7 +757,7 @@ init_cpu_workqueue(struct workqueue_stru cwq->wq = wq; spin_lock_init(&cwq->lock); - INIT_LIST_HEAD(&cwq->worklist); + plist_head_init(&cwq->worklist, NULL); init_waitqueue_head(&cwq->more_work); return cwq; -- - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/