Expand sched_{set,get}attr() to include the policy and nice value. This obviates the need for sched_setscheduler2(). The new sched_setattr() call now covers the functionality of: sched_setscheduler(), sched_setparam(), setpriority(.which = PRIO_PROCESS) And sched_getattr() now covers: sched_getscheduler(), sched_getparam(), getpriority(.which = PRIO_PROCESS) Signed-off-by: Peter Zijlstra --- arch/arm/include/asm/unistd.h | 2 arch/arm/include/uapi/asm/unistd.h | 5 - arch/arm/kernel/calls.S | 3 arch/x86/syscalls/syscall_32.tbl | 1 arch/x86/syscalls/syscall_64.tbl | 1 include/linux/sched.h | 24 +++-- include/linux/syscalls.h | 2 kernel/sched/core.c | 173 +++++++++++++++++++------------------ kernel/sched/sched.h | 13 +- 9 files changed, 119 insertions(+), 105 deletions(-) --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -15,7 +15,7 @@ #include -#define __NR_syscalls (383) +#define __NR_syscalls (382) #define __ARM_NR_cmpxchg (__ARM_NR_BASE+0x00fff0) #define __ARCH_WANT_STAT64 --- a/arch/arm/include/uapi/asm/unistd.h +++ b/arch/arm/include/uapi/asm/unistd.h @@ -406,9 +406,8 @@ #define __NR_process_vm_writev (__NR_SYSCALL_BASE+377) #define __NR_kcmp (__NR_SYSCALL_BASE+378) #define __NR_finit_module (__NR_SYSCALL_BASE+379) -#define __NR_sched_setscheduler2 (__NR_SYSCALL_BASE+380) -#define __NR_sched_setattr (__NR_SYSCALL_BASE+381) -#define __NR_sched_getattr (__NR_SYSCALL_BASE+382) +#define __NR_sched_setattr (__NR_SYSCALL_BASE+380) +#define __NR_sched_getattr (__NR_SYSCALL_BASE+381) /* * This may need to be greater than __NR_last_syscall+1 in order to --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -389,8 +389,7 @@ CALL(sys_process_vm_writev) CALL(sys_kcmp) CALL(sys_finit_module) -/* 380 */ CALL(sys_sched_setscheduler2) - CALL(sys_sched_setattr) +/* 380 */ CALL(sys_sched_setattr) CALL(sys_sched_getattr) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -359,4 +359,3 @@ 350 i386 finit_module sys_finit_module 351 i386 sched_setattr sys_sched_setattr 352 i386 sched_getattr sys_sched_getattr -353 i386 sched_setscheduler2 sys_sched_setscheduler2 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -322,7 +322,6 @@ 313 common finit_module sys_finit_module 314 common sched_setattr sys_sched_setattr 315 common sched_getattr sys_sched_getattr -316 common sched_setscheduler2 sys_sched_setscheduler2 # # x32-specific system call numbers start at 512 to avoid cache impact --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -57,7 +57,7 @@ struct sched_param { #include -#define SCHED_ATTR_SIZE_VER0 40 /* sizeof first published struct */ +#define SCHED_ATTR_SIZE_VER0 48 /* sizeof first published struct */ /* * Extended scheduling parameters data structure. @@ -85,7 +85,9 @@ struct sched_param { * * This is reflected by the actual fields of the sched_attr structure: * - * @sched_priority task's priority (might still be useful) + * @sched_policy task's scheduling policy + * @sched_nice task's nice value (SCHED_NORMAL/BATCH) + * @sched_priority task's static priority (SCHED_FIFO/RR) * @sched_flags for customizing the scheduler behaviour * @sched_deadline representative of the task's deadline * @sched_runtime representative of the task's runtime @@ -102,15 +104,21 @@ struct sched_param { * available in the scheduling class file or in Documentation/. */ struct sched_attr { - int sched_priority; - unsigned int sched_flags; + u32 size; + + u32 sched_policy; + u64 sched_flags; + + /* SCHED_NORMAL, SCHED_BATCH */ + s32 sched_nice; + + /* SCHED_FIFO, SCHED_RR */ + u32 sched_priority; + + /* SCHED_DEADLINE */ u64 sched_runtime; u64 sched_deadline; u64 sched_period; - u32 size; - - /* Align to u64. */ - u32 __reserved; }; struct exec_domain; --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -278,8 +278,6 @@ asmlinkage long sys_clock_nanosleep(cloc asmlinkage long sys_nice(int increment); asmlinkage long sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param); -asmlinkage long sys_sched_setscheduler2(pid_t pid, int policy, - struct sched_attr __user *attr); asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param); asmlinkage long sys_sched_setattr(pid_t pid, --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2973,6 +2973,7 @@ void rt_mutex_setprio(struct task_struct __task_rq_unlock(rq); } #endif + void set_user_nice(struct task_struct *p, long nice) { int old_prio, delta, on_rq; @@ -3147,24 +3148,6 @@ static struct task_struct *find_process_ return pid ? find_task_by_vpid(pid) : current; } -/* Actually do priority change: must hold rq lock. */ -static void -__setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) -{ - p->policy = policy; - p->rt_priority = prio; - p->normal_prio = normal_prio(p); - /* we are holding p->pi_lock already */ - p->prio = rt_mutex_getprio(p); - if (dl_prio(p->prio)) - p->sched_class = &dl_sched_class; - else if (rt_prio(p->prio)) - p->sched_class = &rt_sched_class; - else - p->sched_class = &fair_sched_class; - set_load_weight(p); -} - /* * This function initializes the sched_dl_entity of a newly becoming * SCHED_DEADLINE task. @@ -3188,6 +3171,34 @@ __setparam_dl(struct task_struct *p, con dl_se->dl_new = 1; } +/* Actually do priority change: must hold pi & rq lock. */ +static void __setscheduler(struct rq *rq, struct task_struct *p, + const struct sched_attr *attr) +{ + int policy = attr->sched_policy; + + p->policy = policy; + + if (fair_policy(policy)) + p->static_prio = NICE_TO_PRIO(attr->sched_nice); + if (rt_policy(policy)) + p->rt_priority = attr->sched_priority; + if (dl_policy(policy)) + __setparam_dl(p, attr); + + p->normal_prio = normal_prio(p); + p->prio = rt_mutex_getprio(p); + + if (dl_prio(p->prio)) + p->sched_class = &dl_sched_class; + else if (rt_prio(p->prio)) + p->sched_class = &rt_sched_class; + else + p->sched_class = &fair_sched_class; + + set_load_weight(p); +} + static void __getparam_dl(struct task_struct *p, struct sched_attr *attr) { @@ -3234,11 +3245,12 @@ static bool check_same_owner(struct task return match; } -static int __sched_setscheduler(struct task_struct *p, int policy, +static int __sched_setscheduler(struct task_struct *p, const struct sched_attr *attr, bool user) { int retval, oldprio, oldpolicy = -1, on_rq, running; + int policy = attr->sched_policy; unsigned long flags; const struct sched_class *prev_class; struct rq *rq; @@ -3271,6 +3283,7 @@ static int __sched_setscheduler(struct t (p->mm && attr->sched_priority > MAX_USER_RT_PRIO-1) || (!p->mm && attr->sched_priority > MAX_RT_PRIO-1)) return -EINVAL; + if ((dl_policy(policy) && !__checkparam_dl(attr)) || (rt_policy(policy) != (attr->sched_priority != 0))) return -EINVAL; @@ -3279,6 +3292,11 @@ static int __sched_setscheduler(struct t * Allow unprivileged RT tasks to decrease priority: */ if (user && !capable(CAP_SYS_NICE)) { + if (fair_policy(policy)) { + if (!can_nice(p, attr->sched_nice)) + return -EPERM; + } + if (rt_policy(policy)) { unsigned long rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO); @@ -3337,12 +3355,18 @@ static int __sched_setscheduler(struct t /* * If not changing anything there's no need to proceed further: */ - if (unlikely(policy == p->policy && (!rt_policy(policy) || - attr->sched_priority == p->rt_priority) && - !dl_policy(policy))) { + if (unlikely(policy == p->policy)) { + if (fair_policy(policy) && attr->sched_nice != TASK_NICE(p)) + goto change; + if (rt_policy(policy) && attr->sched_priority != p->rt_priority) + goto change; + if (dl_policy(policy)) + goto change; + task_rq_unlock(rq, p, &flags); return 0; } +change: if (user) { #ifdef CONFIG_RT_GROUP_SCHED @@ -3399,8 +3423,7 @@ static int __sched_setscheduler(struct t */ if ((dl_policy(policy) || dl_task(p)) && dl_overflow(p, policy, attr)) { - __task_rq_unlock(rq); - raw_spin_unlock_irqrestore(&p->pi_lock, flags); + task_rq_unlock(rq, p, &flags); return -EBUSY; } @@ -3415,9 +3438,7 @@ static int __sched_setscheduler(struct t oldprio = p->prio; prev_class = p->sched_class; - if (dl_policy(policy)) - __setparam_dl(p, attr); - __setscheduler(rq, p, policy, attr->sched_priority); + __setscheduler(rq, p, attr); if (running) p->sched_class->set_curr_task(rq); @@ -3446,18 +3467,18 @@ int sched_setscheduler(struct task_struc const struct sched_param *param) { struct sched_attr attr = { + .sched_policy = policy, .sched_priority = param->sched_priority }; - return __sched_setscheduler(p, policy, &attr, true); + return __sched_setscheduler(p, &attr, true); } EXPORT_SYMBOL_GPL(sched_setscheduler); -int sched_setscheduler2(struct task_struct *p, int policy, - const struct sched_attr *attr) +int sched_setattr(struct task_struct *p, const struct sched_attr *attr) { - return __sched_setscheduler(p, policy, attr, true); + return __sched_setscheduler(p, attr, true); } -EXPORT_SYMBOL_GPL(sched_setscheduler2); +EXPORT_SYMBOL_GPL(sched_setattr); /** * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace. @@ -3476,9 +3497,10 @@ int sched_setscheduler_nocheck(struct ta const struct sched_param *param) { struct sched_attr attr = { + .sched_policy = policy, .sched_priority = param->sched_priority }; - return __sched_setscheduler(p, policy, &attr, false); + return __sched_setscheduler(p, &attr, false); } static int @@ -3561,6 +3583,12 @@ static int sched_copy_attr(struct sched_ if (ret) return -EFAULT; + /* + * XXX: do we want to be lenient like existing syscalls; or do we want + * to be strict and return an error on out-of-bounds values? + */ + attr->sched_nice = clamp(attr->sched_nice, -20, 19); + out: return ret; @@ -3570,33 +3598,6 @@ static int sched_copy_attr(struct sched_ goto out; } -static int -do_sched_setscheduler2(pid_t pid, int policy, - struct sched_attr __user *attr_uptr) -{ - struct sched_attr attr; - struct task_struct *p; - int retval; - - if (!attr_uptr || pid < 0) - return -EINVAL; - - if (sched_copy_attr(attr_uptr, &attr)) - return -EFAULT; - - rcu_read_lock(); - retval = -ESRCH; - p = find_process_by_pid(pid); - if (p != NULL) { - if (dl_policy(policy)) - attr.sched_priority = 0; - retval = sched_setscheduler2(p, policy, &attr); - } - rcu_read_unlock(); - - return retval; -} - /** * sys_sched_setscheduler - set/change the scheduler policy and RT priority * @pid: the pid in question. @@ -3616,21 +3617,6 @@ SYSCALL_DEFINE3(sched_setscheduler, pid_ } /** - * sys_sched_setscheduler2 - same as above, but with extended sched_param - * @pid: the pid in question. - * @policy: new policy (could use extended sched_param). - * @attr: structure containg the extended parameters. - */ -SYSCALL_DEFINE3(sched_setscheduler2, pid_t, pid, int, policy, - struct sched_attr __user *, attr) -{ - if (policy < 0) - return -EINVAL; - - return do_sched_setscheduler2(pid, policy, attr); -} - -/** * sys_sched_setparam - set/change the RT priority of a thread * @pid: the pid in question. * @param: structure containing the new RT priority. @@ -3647,10 +3633,26 @@ SYSCALL_DEFINE2(sched_setparam, pid_t, p * @pid: the pid in question. * @attr: structure containing the extended parameters. */ -SYSCALL_DEFINE2(sched_setattr, pid_t, pid, - struct sched_attr __user *, attr) +SYSCALL_DEFINE2(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr) { - return do_sched_setscheduler2(pid, -1, attr); + struct sched_attr attr; + struct task_struct *p; + int retval; + + if (!uattr || pid < 0) + return -EINVAL; + + if (sched_copy_attr(uattr, &attr)) + return -EFAULT; + + rcu_read_lock(); + retval = -ESRCH; + p = find_process_by_pid(pid); + if (p != NULL) + retval = sched_setattr(p, &attr); + rcu_read_unlock(); + + return retval; } /** @@ -3797,8 +3799,14 @@ SYSCALL_DEFINE3(sched_getattr, pid_t, pi if (retval) goto out_unlock; - __getparam_dl(p, &attr); - attr.sched_priority = p->rt_priority; + attr.sched_policy = p->policy; + if (task_has_dl_policy(p)) + __getparam_dl(p, &attr); + else if (task_has_rt_policy(p)) + attr.sched_priority = p->rt_priority; + else + attr.sched_nice = TASK_NICE(p); + rcu_read_unlock(); retval = sched_read_attr(uattr, &attr, size); @@ -6948,13 +6956,16 @@ EXPORT_SYMBOL(__might_sleep); static void normalize_task(struct rq *rq, struct task_struct *p) { const struct sched_class *prev_class = p->sched_class; + struct sched_attr attr = { + .sched_policy = SCHED_NORMAL, + }; int old_prio = p->prio; int on_rq; on_rq = p->on_rq; if (on_rq) dequeue_task(rq, p, 0); - __setscheduler(rq, p, SCHED_NORMAL, 0); + __setscheduler(rq, p, &attr); if (on_rq) { enqueue_task(rq, p, 0); resched_task(rq->curr); --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -90,18 +90,19 @@ extern void update_cpu_load_active(struc */ #define DL_SCALE (10) +static inline int fair_policy(int policy) +{ + return policy == SCHED_NORMAL || policy == SCHED_BATCH; +} + static inline int rt_policy(int policy) { - if (policy == SCHED_FIFO || policy == SCHED_RR) - return 1; - return 0; + return policy == SCHED_FIFO || policy == SCHED_RR; } static inline int dl_policy(int policy) { - if (unlikely(policy == SCHED_DEADLINE)) - return 1; - return 0; + return unlikely(policy == SCHED_DEADLINE); } static inline int task_has_rt_policy(struct task_struct *p) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/