Change the rt_ratio interface to rt_runtime_us, to match rt_period_us. This avoids picking a granularity for the ratio. Signed-off-by: Peter Zijlstra --- include/linux/sched.h | 8 +++ kernel/sched.c | 116 ++++++++++++++++++++++++++++++++++---------------- kernel/sched_rt.c | 42 +++++++----------- kernel/sysctl.c | 4 - 4 files changed, 106 insertions(+), 64 deletions(-) Index: linux-2.6/include/linux/sched.h =================================================================== --- linux-2.6.orig/include/linux/sched.h +++ linux-2.6/include/linux/sched.h @@ -1518,7 +1518,7 @@ extern unsigned int sysctl_sched_feature extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; extern unsigned int sysctl_sched_rt_period; -extern unsigned int sysctl_sched_rt_ratio; +extern unsigned int sysctl_sched_rt_runtime; #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) extern unsigned int sysctl_sched_min_bal_int_shares; extern unsigned int sysctl_sched_max_bal_int_shares; @@ -2014,6 +2014,12 @@ extern void sched_destroy_group(struct t extern void sched_move_task(struct task_struct *tsk); extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); extern unsigned long sched_group_shares(struct task_group *tg); +extern int sched_group_set_rt_runtime(struct task_group *tg, + unsigned long rt_runtime_us); +extern unsigned long sched_group_rt_runtime(struct task_group *tg); +extern int sched_group_set_rt_period(struct task_group *tg, + unsigned long rt_runtime_us); +extern unsigned long sched_group_rt_period(struct task_group *tg); #endif Index: linux-2.6/kernel/sched.c =================================================================== --- linux-2.6.orig/kernel/sched.c +++ linux-2.6/kernel/sched.c @@ -176,7 +176,7 @@ struct task_group { struct sched_rt_entity **rt_se; struct rt_rq **rt_rq; - unsigned int rt_ratio; + u64 rt_runtime; ktime_t rt_period; /* @@ -646,19 +646,16 @@ const_debug unsigned int sysctl_sched_fe const_debug unsigned int sysctl_sched_nr_migrate = 32; /* - * period over which we measure -rt task cpu usage in us. + * period over which we measure rt task cpu usage in us. * default: 1s */ const_debug unsigned int sysctl_sched_rt_period = 1000000; -#define SCHED_RT_FRAC_SHIFT 16 -#define SCHED_RT_FRAC (1UL << SCHED_RT_FRAC_SHIFT) - /* - * ratio of time -rt tasks may consume. - * default: 95% + * part of the period that we allow rt tasks to run in us. + * default: 0.95s */ -const_debug unsigned int sysctl_sched_rt_ratio = 62259; +const_debug unsigned int sysctl_sched_rt_runtime = 950000; /* * For kernel-internal use: high-speed (but slightly incorrect) per-cpu @@ -7209,7 +7206,8 @@ void __init sched_init(void) &per_cpu(init_sched_entity, i), i, 1); rq->rt.rt_rq_type = RT_RQ_EDF; - init_task_group.rt_ratio = sysctl_sched_rt_ratio; /* XXX */ + init_task_group.rt_runtime = + sysctl_sched_rt_runtime * NSEC_PER_USEC; init_task_group.rt_period = ns_to_ktime(sysctl_sched_rt_period * NSEC_PER_USEC); INIT_LIST_HEAD(&rq->leaf_rt_rq_list); @@ -7606,7 +7604,7 @@ struct task_group *sched_create_group(vo goto err; tg->shares = NICE_0_LOAD; - tg->rt_ratio = 0; /* XXX */ + tg->rt_runtime = 0; /* XXX */ tg->rt_period = ns_to_ktime(sysctl_sched_rt_period * NSEC_PER_USEC); for_each_possible_cpu(i) { @@ -7801,41 +7799,87 @@ unsigned long sched_group_shares(struct } /* - * Ensure the total rt_ratio <= sysctl_sched_rt_ratio + * Ensure that the real time constraints are schedulable. */ -int sched_group_set_rt_ratio(struct task_group *tg, unsigned long rt_ratio) +static DEFINE_MUTEX(rt_constraints_mutex); + +static unsigned long to_ratio(u64 period, u64 runtime) +{ + u64 r = runtime * (1ULL << 16); + do_div(r, period); + return r; +} + +static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) { struct task_group *tgi; unsigned long total = 0; + unsigned long global_ratio = + to_ratio(sysctl_sched_rt_period, sysctl_sched_rt_runtime); rcu_read_lock(); - list_for_each_entry_rcu(tgi, &task_groups, list) - total += tgi->rt_ratio; + list_for_each_entry_rcu(tgi, &task_groups, list) { + if (tgi == tg) + continue; + + total += to_ratio(ktime_to_ns(tgi->rt_period), tgi->rt_runtime); + } rcu_read_unlock(); - if (total + rt_ratio - tg->rt_ratio > sysctl_sched_rt_ratio) - return -EINVAL; + return total + to_ratio(period, runtime) < global_ratio; +} - tg->rt_ratio = rt_ratio; - return 0; +int sched_group_set_rt_runtime(struct task_group *tg, + unsigned long rt_runtime_us) +{ + u64 rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC; + int err = 0; + + mutex_lock(&rt_constraints_mutex); + if (!__rt_schedulable(tg, ktime_to_ns(tg->rt_period), rt_runtime)) { + err = -EINVAL; + goto unlock; + } + + tg->rt_runtime = rt_runtime; + unlock: + mutex_unlock(&rt_constraints_mutex); + + return err; } -unsigned long sched_group_rt_ratio(struct task_group *tg) +unsigned long sched_group_rt_runtime(struct task_group *tg) { - return tg->rt_ratio; + u64 rt_runtime_us = tg->rt_runtime; + + do_div(rt_runtime_us, NSEC_PER_USEC); + return rt_runtime_us; } -int sched_group_set_rt_period(struct task_group *tg, unsigned long rt_period) +int sched_group_set_rt_period(struct task_group *tg, unsigned long rt_period_us) { - tg->rt_period = ns_to_ktime((u64)rt_period * NSEC_PER_USEC); - return 0; + u64 rt_period = (u64)rt_period_us * NSEC_PER_USEC; + int err = 0; + + mutex_lock(&rt_constraints_mutex); + if (!__rt_schedulable(tg, rt_period, tg->rt_runtime)) { + err = -EINVAL; + goto unlock; + } + + tg->rt_period = ns_to_ktime(rt_period); + unlock: + mutex_unlock(&rt_constraints_mutex); + + return err; } unsigned long sched_group_rt_period(struct task_group *tg) { - u64 ns = ktime_to_ns(tg->rt_period); - do_div(ns, NSEC_PER_USEC); - return ns; + u64 rt_period_us = ktime_to_ns(tg->rt_period); + + do_div(rt_period_us, NSEC_PER_USEC); + return rt_period_us; } #endif /* CONFIG_FAIR_GROUP_SCHED */ @@ -7913,17 +7957,15 @@ static u64 cpu_shares_read_uint(struct c return (u64) tg->shares; } -static int cpu_rt_ratio_write_uint(struct cgroup *cgrp, struct cftype *cftype, - u64 rt_ratio_val) +static int cpu_rt_runtime_write_uint(struct cgroup *cgrp, struct cftype *cftype, + u64 rt_runtime_val) { - return sched_group_set_rt_ratio(cgroup_tg(cgrp), rt_ratio_val); + return sched_group_set_rt_runtime(cgroup_tg(cgrp), rt_runtime_val); } -static u64 cpu_rt_ratio_read_uint(struct cgroup *cgrp, struct cftype *cft) +static u64 cpu_rt_runtime_read_uint(struct cgroup *cgrp, struct cftype *cft) { - struct task_group *tg = cgroup_tg(cgrp); - - return (u64) tg->rt_ratio; + return sched_group_rt_runtime(cgroup_tg(cgrp)); } static int cpu_rt_period_write_uint(struct cgroup *cgrp, struct cftype *cftype, @@ -7934,7 +7976,7 @@ static int cpu_rt_period_write_uint(stru static u64 cpu_rt_period_read_uint(struct cgroup *cgrp, struct cftype *cft) { - return (u64) sched_group_rt_period(cgroup_tg(cgrp)); + return sched_group_rt_period(cgroup_tg(cgrp)); } static struct cftype cpu_files[] = { @@ -7944,9 +7986,9 @@ static struct cftype cpu_files[] = { .write_uint = cpu_shares_write_uint, }, { - .name = "rt_ratio", - .read_uint = cpu_rt_ratio_read_uint, - .write_uint = cpu_rt_ratio_write_uint, + .name = "rt_runtime_us", + .read_uint = cpu_rt_runtime_read_uint, + .write_uint = cpu_rt_runtime_write_uint, }, { .name = "rt_period_us", Index: linux-2.6/kernel/sched_rt.c =================================================================== --- linux-2.6.orig/kernel/sched_rt.c +++ linux-2.6/kernel/sched_rt.c @@ -57,12 +57,12 @@ static inline int on_rt_rq(struct sched_ #ifdef CONFIG_FAIR_GROUP_SCHED -static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq) +static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) { if (!rt_rq->tg) - return SCHED_RT_FRAC; + return 0; - return rt_rq->tg->rt_ratio; + return rt_rq->tg->rt_runtime; } static inline ktime_t sched_rt_period(struct rt_rq *rt_rq) @@ -100,7 +100,7 @@ static inline struct rt_rq *group_rt_rq( static void enqueue_rt_entity(struct sched_rt_entity *rt_se); static void dequeue_rt_entity(struct sched_rt_entity *rt_se); -static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq) +static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) { struct sched_rt_entity *rt_se = rt_rq->rt_se; @@ -113,7 +113,7 @@ static void sched_rt_ratio_enqueue(struc } } -static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq) +static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) { struct sched_rt_entity *rt_se = rt_rq->rt_se; @@ -218,9 +218,9 @@ static struct sched_rt_entity *next_rt_d #else -static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq) +static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) { - return sysctl_sched_rt_ratio; + return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; } static inline ktime_t sched_rt_period(struct rt_rq *rt_rq) @@ -257,11 +257,11 @@ static inline struct rt_rq *group_rt_rq( return NULL; } -static inline void sched_rt_ratio_enqueue(struct rt_rq *rt_rq) +static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) { } -static inline void sched_rt_ratio_dequeue(struct rt_rq *rt_rq) +static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) { } @@ -300,25 +300,21 @@ static inline int rt_se_prio(struct sche return rt_task_of(rt_se)->prio; } -static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq) +static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) { - unsigned int rt_ratio = sched_rt_ratio(rt_rq); - u64 period, ratio; + u64 runtime = sched_rt_runtime(rt_rq); - if (rt_ratio == SCHED_RT_FRAC) + if (!runtime) goto out; if (rt_rq->rt_throttled) goto out; - period = sched_rt_period_ns(rt_rq); - ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT; - - if (rt_rq->rt_time > ratio) { + if (rt_rq->rt_time > runtime) { rt_rq->rt_throttled = 1; if (rt_rq_throttled(rt_rq)) { WARN_ON(!hrtimer_active(&rt_rq->rt_period_timer)); - sched_rt_ratio_dequeue(rt_rq); + sched_rt_rq_dequeue(rt_rq); } } @@ -328,14 +324,12 @@ out: static void update_sched_rt_period(struct rt_rq *rt_rq) { - u64 period = sched_rt_period_ns(rt_rq); - unsigned long rt_ratio = sched_rt_ratio(rt_rq); - u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT; + u64 runtime = sched_rt_runtime(rt_rq); - rt_rq->rt_time -= min(rt_rq->rt_time, ratio); + rt_rq->rt_time -= min(rt_rq->rt_time, runtime); if (rt_rq->rt_throttled) { rt_rq->rt_throttled = 0; - sched_rt_ratio_enqueue(rt_rq); + sched_rt_rq_enqueue(rt_rq); } } @@ -412,7 +406,7 @@ static void update_curr_rt(struct rq *rq cpuacct_charge(curr, delta_exec); rt_rq->rt_time += delta_exec; - if (sched_rt_ratio_exceeded(rt_rq)) + if (sched_rt_runtime_exceeded(rt_rq)) resched_task(curr); } Index: linux-2.6/kernel/sysctl.c =================================================================== --- linux-2.6.orig/kernel/sysctl.c +++ linux-2.6/kernel/sysctl.c @@ -319,8 +319,8 @@ static struct ctl_table kern_table[] = { }, { .ctl_name = CTL_UNNUMBERED, - .procname = "sched_rt_ratio", - .data = &sysctl_sched_rt_ratio, + .procname = "sched_rt_runtime_us", + .data = &sysctl_sched_rt_runtime, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec, -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/