In order to pass around more than just the IRQ flags, add a rq_flags structure. No difference in code generation for the x86_64-defconfig build I tested. Signed-off-by: Peter Zijlstra (Intel) --- kernel/sched/core.c | 98 ++++++++++++++++++++++++------------------------ kernel/sched/deadline.c | 6 +- kernel/sched/sched.h | 14 ++++-- 3 files changed, 62 insertions(+), 56 deletions(-) --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -173,7 +173,7 @@ static struct rq *this_rq_lock(void) /* * __task_rq_lock - lock the rq @p resides on. */ -struct rq *__task_rq_lock(struct task_struct *p) +struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf) __acquires(rq->lock) { struct rq *rq; @@ -197,14 +197,14 @@ struct rq *__task_rq_lock(struct task_st /* * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. */ -struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) +struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) __acquires(p->pi_lock) __acquires(rq->lock) { struct rq *rq; for (;;) { - raw_spin_lock_irqsave(&p->pi_lock, *flags); + raw_spin_lock_irqsave(&p->pi_lock, rf->flags); rq = task_rq(p); raw_spin_lock(&rq->lock); /* @@ -228,7 +228,7 @@ struct rq *task_rq_lock(struct task_stru return rq; } raw_spin_unlock(&rq->lock); - raw_spin_unlock_irqrestore(&p->pi_lock, *flags); + raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); while (unlikely(task_on_rq_migrating(p))) cpu_relax(); @@ -1147,12 +1147,12 @@ void do_set_cpus_allowed(struct task_str static int __set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask, bool check) { - unsigned long flags; - struct rq *rq; unsigned int dest_cpu; + struct rq_flags rf; + struct rq *rq; int ret = 0; - rq = task_rq_lock(p, &flags); + rq = task_rq_lock(p, &rf); /* * Must re-check here, to close a race against __kthread_bind(), @@ -1181,7 +1181,7 @@ static int __set_cpus_allowed_ptr(struct if (task_running(rq, p) || p->state == TASK_WAKING) { struct migration_arg arg = { p, dest_cpu }; /* Need help from migration thread: drop lock and wait. */ - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, p, &rf); stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); tlb_migrate_finish(p->mm); return 0; @@ -1195,7 +1195,7 @@ static int __set_cpus_allowed_ptr(struct lockdep_pin_lock(&rq->lock); } out: - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, p, &rf); return ret; } @@ -1379,8 +1379,8 @@ int migrate_swap(struct task_struct *cur */ unsigned long wait_task_inactive(struct task_struct *p, long match_state) { - unsigned long flags; int running, queued; + struct rq_flags rf; unsigned long ncsw; struct rq *rq; @@ -1415,14 +1415,14 @@ unsigned long wait_task_inactive(struct * lock now, to be *sure*. If we're wrong, we'll * just go back and repeat. */ - rq = task_rq_lock(p, &flags); + rq = task_rq_lock(p, &rf); trace_sched_wait_task(p); running = task_running(rq, p); queued = task_on_rq_queued(p); ncsw = 0; if (!match_state || p->state == match_state) ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, p, &rf); /* * If it changed from the expected state, bail out now. @@ -1720,17 +1720,18 @@ ttwu_do_activate(struct rq *rq, struct t */ static int ttwu_remote(struct task_struct *p, int wake_flags) { + struct rq_flags rf; struct rq *rq; int ret = 0; - rq = __task_rq_lock(p); + rq = __task_rq_lock(p, &rf); if (task_on_rq_queued(p)) { /* check_preempt_curr() may use rq clock */ update_rq_clock(rq); ttwu_do_wakeup(rq, p, wake_flags); ret = 1; } - __task_rq_unlock(rq); + __task_rq_unlock(rq, &rf); return ret; } @@ -2483,12 +2484,12 @@ extern void init_dl_bw(struct dl_bw *dl_ */ void wake_up_new_task(struct task_struct *p) { - unsigned long flags; + struct rq_flags rf; struct rq *rq; - raw_spin_lock_irqsave(&p->pi_lock, flags); /* Initialize new task's runnable average */ init_entity_runnable_average(&p->se); + raw_spin_lock_irqsave(&p->pi_lock, rf.flags); #ifdef CONFIG_SMP /* * Fork balancing, do it here and not earlier because: @@ -2500,7 +2501,7 @@ void wake_up_new_task(struct task_struct /* Post initialize new task's util average when its cfs_rq is set */ post_init_entity_util_avg(&p->se); - rq = __task_rq_lock(p); + rq = __task_rq_lock(p, &rf); activate_task(rq, p, 0); p->on_rq = TASK_ON_RQ_QUEUED; trace_sched_wakeup_new(p); @@ -2516,7 +2517,7 @@ void wake_up_new_task(struct task_struct lockdep_pin_lock(&rq->lock); } #endif - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, p, &rf); } #ifdef CONFIG_PREEMPT_NOTIFIERS @@ -2932,7 +2933,7 @@ EXPORT_PER_CPU_SYMBOL(kernel_cpustat); */ unsigned long long task_sched_runtime(struct task_struct *p) { - unsigned long flags; + struct rq_flags rf; struct rq *rq; u64 ns; @@ -2952,7 +2953,7 @@ unsigned long long task_sched_runtime(st return p->se.sum_exec_runtime; #endif - rq = task_rq_lock(p, &flags); + rq = task_rq_lock(p, &rf); /* * Must be ->curr _and_ ->on_rq. If dequeued, we would * project cycles that may never be accounted to this @@ -2963,7 +2964,7 @@ unsigned long long task_sched_runtime(st p->sched_class->update_curr(rq); } ns = p->se.sum_exec_runtime; - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, p, &rf); return ns; } @@ -3521,12 +3522,13 @@ EXPORT_SYMBOL(default_wake_function); void rt_mutex_setprio(struct task_struct *p, int prio) { int oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE; - struct rq *rq; const struct sched_class *prev_class; + struct rq_flags rf; + struct rq *rq; BUG_ON(prio > MAX_PRIO); - rq = __task_rq_lock(p); + rq = __task_rq_lock(p, &rf); /* * Idle task boosting is a nono in general. There is one @@ -3602,7 +3604,7 @@ void rt_mutex_setprio(struct task_struct check_class_changed(rq, p, prev_class, oldprio); out_unlock: preempt_disable(); /* avoid rq from going away on us */ - __task_rq_unlock(rq); + __task_rq_unlock(rq, &rf); balance_callback(rq); preempt_enable(); @@ -3612,7 +3614,7 @@ void rt_mutex_setprio(struct task_struct void set_user_nice(struct task_struct *p, long nice) { int old_prio, delta, queued; - unsigned long flags; + struct rq_flags rf; struct rq *rq; if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE) @@ -3621,7 +3623,7 @@ void set_user_nice(struct task_struct *p * We have to be careful, if called from sys_setpriority(), * the task might be in the middle of scheduling on another CPU. */ - rq = task_rq_lock(p, &flags); + rq = task_rq_lock(p, &rf); /* * The RT priorities are set via sched_setscheduler(), but we still * allow the 'normal' nice value to be set - but as expected @@ -3652,7 +3654,7 @@ void set_user_nice(struct task_struct *p resched_curr(rq); } out_unlock: - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, p, &rf); } EXPORT_SYMBOL(set_user_nice); @@ -3949,11 +3951,11 @@ static int __sched_setscheduler(struct t MAX_RT_PRIO - 1 - attr->sched_priority; int retval, oldprio, oldpolicy = -1, queued, running; int new_effective_prio, policy = attr->sched_policy; - unsigned long flags; const struct sched_class *prev_class; - struct rq *rq; + struct rq_flags rf; int reset_on_fork; int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE; + struct rq *rq; /* may grab non-irq protected spin_locks */ BUG_ON(in_interrupt()); @@ -4048,13 +4050,13 @@ static int __sched_setscheduler(struct t * To be able to change p->policy safely, the appropriate * runqueue lock must be held. */ - rq = task_rq_lock(p, &flags); + rq = task_rq_lock(p, &rf); /* * Changing the policy of the stop threads its a very bad idea */ if (p == rq->stop) { - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, p, &rf); return -EINVAL; } @@ -4071,7 +4073,7 @@ static int __sched_setscheduler(struct t goto change; p->sched_reset_on_fork = reset_on_fork; - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, p, &rf); return 0; } change: @@ -4085,7 +4087,7 @@ static int __sched_setscheduler(struct t if (rt_bandwidth_enabled() && rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0 && !task_group_is_autogroup(task_group(p))) { - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, p, &rf); return -EPERM; } #endif @@ -4100,7 +4102,7 @@ static int __sched_setscheduler(struct t */ if (!cpumask_subset(span, &p->cpus_allowed) || rq->rd->dl_bw.bw == 0) { - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, p, &rf); return -EPERM; } } @@ -4110,7 +4112,7 @@ static int __sched_setscheduler(struct t /* recheck policy now with rq lock held */ if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { policy = oldpolicy = -1; - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, p, &rf); goto recheck; } @@ -4120,7 +4122,7 @@ static int __sched_setscheduler(struct t * is available. */ if ((dl_policy(policy) || dl_task(p)) && dl_overflow(p, policy, attr)) { - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, p, &rf); return -EBUSY; } @@ -4165,7 +4167,7 @@ static int __sched_setscheduler(struct t check_class_changed(rq, p, prev_class, oldprio); preempt_disable(); /* avoid rq from going away on us */ - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, p, &rf); if (pi) rt_mutex_adjust_pi(p); @@ -5018,10 +5020,10 @@ SYSCALL_DEFINE2(sched_rr_get_interval, p { struct task_struct *p; unsigned int time_slice; - unsigned long flags; + struct rq_flags rf; + struct timespec t; struct rq *rq; int retval; - struct timespec t; if (pid < 0) return -EINVAL; @@ -5036,11 +5038,11 @@ SYSCALL_DEFINE2(sched_rr_get_interval, p if (retval) goto out_unlock; - rq = task_rq_lock(p, &flags); + rq = task_rq_lock(p, &rf); time_slice = 0; if (p->sched_class->get_rr_interval) time_slice = p->sched_class->get_rr_interval(rq, p); - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, p, &rf); rcu_read_unlock(); jiffies_to_timespec(time_slice, &t); @@ -5304,11 +5306,11 @@ int migrate_task_to(struct task_struct * */ void sched_setnuma(struct task_struct *p, int nid) { - struct rq *rq; - unsigned long flags; bool queued, running; + struct rq_flags rf; + struct rq *rq; - rq = task_rq_lock(p, &flags); + rq = task_rq_lock(p, &rf); queued = task_on_rq_queued(p); running = task_current(rq, p); @@ -5323,7 +5325,7 @@ void sched_setnuma(struct task_struct *p p->sched_class->set_curr_task(rq); if (queued) enqueue_task(rq, p, ENQUEUE_RESTORE); - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, p, &rf); } #endif /* CONFIG_NUMA_BALANCING */ @@ -7754,10 +7756,10 @@ void sched_move_task(struct task_struct { struct task_group *tg; int queued, running; - unsigned long flags; + struct rq_flags rf; struct rq *rq; - rq = task_rq_lock(tsk, &flags); + rq = task_rq_lock(tsk, &rf); running = task_current(rq, tsk); queued = task_on_rq_queued(tsk); @@ -7789,7 +7791,7 @@ void sched_move_task(struct task_struct if (queued) enqueue_task(rq, tsk, ENQUEUE_RESTORE | ENQUEUE_MOVE); - task_rq_unlock(rq, tsk, &flags); + task_rq_unlock(rq, tsk, &rf); } #endif /* CONFIG_CGROUP_SCHED */ --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -591,10 +591,10 @@ static enum hrtimer_restart dl_task_time struct sched_dl_entity, dl_timer); struct task_struct *p = dl_task_of(dl_se); - unsigned long flags; + struct rq_flags rf; struct rq *rq; - rq = task_rq_lock(p, &flags); + rq = task_rq_lock(p, &rf); /* * The task might have changed its scheduling policy to something @@ -677,7 +677,7 @@ static enum hrtimer_restart dl_task_time #endif unlock: - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, p, &rf); /* * This can free the task_struct, including this hrtimer, do not touch --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1451,13 +1451,17 @@ static inline void sched_rt_avg_update(s static inline void sched_avg_update(struct rq *rq) { } #endif -struct rq *__task_rq_lock(struct task_struct *p) +struct rq_flags { + unsigned long flags; +}; + +struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf) __acquires(rq->lock); -struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) +struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) __acquires(p->pi_lock) __acquires(rq->lock); -static inline void __task_rq_unlock(struct rq *rq) +static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf) __releases(rq->lock) { lockdep_unpin_lock(&rq->lock); @@ -1465,13 +1469,13 @@ static inline void __task_rq_unlock(stru } static inline void -task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags) +task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf) __releases(rq->lock) __releases(p->pi_lock) { lockdep_unpin_lock(&rq->lock); raw_spin_unlock(&rq->lock); - raw_spin_unlock_irqrestore(&p->pi_lock, *flags); + raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); } #ifdef CONFIG_SMP