[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <AANLkTinTQ5vPk_a91YRMd2qxkScQFQfRiSWhKK07rmkN@mail.gmail.com>
Date: Thu, 24 Feb 2011 19:11:29 -0800
From: Paul Turner <pjt@...gle.com>
To: Peter Zijlstra <a.p.zijlstra@...llo.nl>
Cc: linux-kernel@...r.kernel.org,
Bharata B Rao <bharata@...ux.vnet.ibm.com>,
Dhaval Giani <dhaval@...ux.vnet.ibm.com>,
Balbir Singh <balbir@...ux.vnet.ibm.com>,
Vaidyanathan Srinivasan <svaidy@...ux.vnet.ibm.com>,
Gautham R Shenoy <ego@...ibm.com>,
Srivatsa Vaddagiri <vatsa@...ibm.com>,
Kamalesh Babulal <kamalesh@...ux.vnet.ibm.com>,
Ingo Molnar <mingo@...e.hu>,
Pavel Emelyanov <xemul@...nvz.org>,
Herbert Poetzl <herbert@...hfloor.at>,
Avi Kivity <avi@...hat.com>,
Chris Friesen <cfriesen@...tel.com>,
Nikhil Rao <ncrao@...gle.com>
Subject: Re: [CFS Bandwidth Control v4 1/7] sched: introduce primitives to
account for CFS bandwidth tracking
On Wed, Feb 23, 2011 at 5:32 AM, Peter Zijlstra <a.p.zijlstra@...llo.nl> wrote:
> On Tue, 2011-02-15 at 19:18 -0800, Paul Turner wrote:
>
>> @@ -245,6 +248,15 @@ struct cfs_rq;
>>
>> static LIST_HEAD(task_groups);
>>
>> +#ifdef CONFIG_CFS_BANDWIDTH
>> +struct cfs_bandwidth {
>> + raw_spinlock_t lock;
>> + ktime_t period;
>> + u64 runtime, quota;
>> + struct hrtimer period_timer;
>> +};
>> +#endif
>
> If you write that as:
>
> struct cfs_bandwidth {
> #ifdef CONFIG_CFS_BANDWIDTH
> ...
> #endif
> };
>
While I prefer (entirely subjectively) making the #ifdef's in cfs_rq
explicit; I have no real objection and this lets us kill #ifdefs
around init_cfs_bandwidth (since it does reference the member).
Done.
>> /* task group related information */
>> struct task_group {
>> struct cgroup_subsys_state css;
>> @@ -276,6 +288,10 @@ struct task_group {
>> #ifdef CONFIG_SCHED_AUTOGROUP
>> struct autogroup *autogroup;
>> #endif
>> +
>> +#ifdef CONFIG_CFS_BANDWIDTH
>> + struct cfs_bandwidth cfs_bandwidth;
>> +#endif
>> };
>
> You can avoid the #ifdef'ery here
>
Done
>> /* task_group_lock serializes the addition/removal of task groups */
>> @@ -370,9 +386,76 @@ struct cfs_rq {
>
>> +#ifdef CONFIG_CFS_BANDWIDTH
>> +static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun);
>> +
>> +static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
>> +{
>> + struct cfs_bandwidth *cfs_b =
>> + container_of(timer, struct cfs_bandwidth, period_timer);
>> + ktime_t now;
>> + int overrun;
>> + int idle = 0;
>> +
>> + for (;;) {
>> + now = hrtimer_cb_get_time(timer);
>> + overrun = hrtimer_forward(timer, now, cfs_b->period);
>> +
>> + if (!overrun)
>> + break;
>> +
>> + idle = do_sched_cfs_period_timer(cfs_b, overrun);
>> + }
>> +
>> + return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
>> +}
>> +
>> +static
>> +void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, u64 quota, u64 period)
>> +{
>> + raw_spin_lock_init(&cfs_b->lock);
>> + cfs_b->quota = cfs_b->runtime = quota;
>> + cfs_b->period = ns_to_ktime(period);
>> +
>> + hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>> + cfs_b->period_timer.function = sched_cfs_period_timer;
>> +}
>> +
>> +static
>> +void init_cfs_rq_quota(struct cfs_rq *cfs_rq)
>> +{
>> + cfs_rq->quota_used = 0;
>> + if (cfs_rq->tg->cfs_bandwidth.quota == RUNTIME_INF)
>> + cfs_rq->quota_assigned = RUNTIME_INF;
>> + else
>> + cfs_rq->quota_assigned = 0;
>> +}
>> +
>> +static void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
>> +{
>> + if (cfs_b->quota == RUNTIME_INF)
>> + return;
>> +
>> + if (hrtimer_active(&cfs_b->period_timer))
>> + return;
>> +
>> + raw_spin_lock(&cfs_b->lock);
>> + start_bandwidth_timer(&cfs_b->period_timer, cfs_b->period);
>> + raw_spin_unlock(&cfs_b->lock);
>> +}
>> +
>> +static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
>> +{
>> + hrtimer_cancel(&cfs_b->period_timer);
>> +}
>> +#endif
>
> and #else
>
> stubs
> #endif
>
>> /* Real-Time classes' related field in a runqueue: */
>> struct rt_rq {
>> struct rt_prio_array active;
>> @@ -8038,6 +8121,9 @@ static void init_tg_cfs_entry(struct tas
>> tg->cfs_rq[cpu] = cfs_rq;
>> init_cfs_rq(cfs_rq, rq);
>> cfs_rq->tg = tg;
>> +#ifdef CONFIG_CFS_BANDWIDTH
>> + init_cfs_rq_quota(cfs_rq);
>> +#endif
>
> also avoids #ifdef'ery here
>
Done
>> tg->se[cpu] = se;
>> /* se could be NULL for root_task_group */
>> @@ -8173,6 +8259,10 @@ void __init sched_init(void)
>> * We achieve this by letting root_task_group's tasks sit
>> * directly in rq->cfs (i.e root_task_group->se[] = NULL).
>> */
>> +#ifdef CONFIG_CFS_BANDWIDTH
>> + init_cfs_bandwidth(&root_task_group.cfs_bandwidth,
>> + RUNTIME_INF, sched_cfs_bandwidth_period);
>> +#endif
>
> and here
>
Done
>> init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
>> #endif /* CONFIG_FAIR_GROUP_SCHED */
>>
>> @@ -8415,6 +8505,10 @@ static void free_fair_sched_group(struct
>> {
>> int i;
>>
>> +#ifdef CONFIG_CFS_BANDWIDTH
>> + destroy_cfs_bandwidth(&tg->cfs_bandwidth);
>> +#endif
>
> and here
>
Done
>> for_each_possible_cpu(i) {
>> if (tg->cfs_rq)
>> kfree(tg->cfs_rq[i]);
>> @@ -8442,7 +8536,10 @@ int alloc_fair_sched_group(struct task_g
>> goto err;
>>
>> tg->shares = NICE_0_LOAD;
>> -
>> +#ifdef CONFIG_CFS_BANDWIDTH
>> + init_cfs_bandwidth(&tg->cfs_bandwidth, RUNTIME_INF,
>> + sched_cfs_bandwidth_period);
>> +#endif
>
> and here
>
Done
>> for_each_possible_cpu(i) {
>> rq = cpu_rq(i);
>>
>
>> @@ -9107,6 +9204,116 @@ static u64 cpu_shares_read_u64(struct cg
>>
>> return (u64) tg->shares;
>> }
>> +
>> +#ifdef CONFIG_CFS_BANDWIDTH
>> +static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
>> +{
>> + int i;
>> + static DEFINE_MUTEX(mutex);
>> +
>> + if (tg == &root_task_group)
>> + return -EINVAL;
>> +
>> + if (!period)
>> + return -EINVAL;
>> +
>> + /*
>> + * Ensure we have at least one tick of bandwidth every period. This is
>> + * to prevent reaching a state of large arrears when throttled via
>> + * entity_tick() resulting in prolonged exit starvation.
>> + */
>> + if (NS_TO_JIFFIES(quota) < 1)
>> + return -EINVAL;
>> +
>> + mutex_lock(&mutex);
>> + raw_spin_lock_irq(&tg->cfs_bandwidth.lock);
>> + tg->cfs_bandwidth.period = ns_to_ktime(period);
>> + tg->cfs_bandwidth.runtime = tg->cfs_bandwidth.quota = quota;
>> + raw_spin_unlock_irq(&tg->cfs_bandwidth.lock);
>> +
>> + for_each_possible_cpu(i) {
>> + struct cfs_rq *cfs_rq = tg->cfs_rq[i];
>> + struct rq *rq = rq_of(cfs_rq);
>> +
>> + raw_spin_lock_irq(&rq->lock);
>> + init_cfs_rq_quota(cfs_rq);
>> + raw_spin_unlock_irq(&rq->lock);
>
> Any particular reason you didn't mirror rt_rq->rt_runtime_lock?
>
>> + }
>> + mutex_unlock(&mutex);
>> +
>> + return 0;
>> +}
>
>
>> Index: tip/kernel/sched_fair.c
>> ===================================================================
>> --- tip.orig/kernel/sched_fair.c
>> +++ tip/kernel/sched_fair.c
>> @@ -88,6 +88,15 @@ const_debug unsigned int sysctl_sched_mi
>> */
>> unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL;
>>
>> +
>> +#ifdef CONFIG_CFS_BANDWIDTH
>> +/*
>> + * default period for cfs group bandwidth.
>> + * default: 0.5s, units: nanoseconds
>> + */
>> +static u64 sched_cfs_bandwidth_period = 500000000ULL;
>> +#endif
>> +
>> static const struct sched_class fair_sched_class;
>>
>> /**************************************************************
>> @@ -397,6 +406,9 @@ static void __enqueue_entity(struct cfs_
>>
>> rb_link_node(&se->run_node, parent, link);
>> rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
>> +#ifdef CONFIG_CFS_BANDWIDTH
>> + start_cfs_bandwidth(&cfs_rq->tg->cfs_bandwidth);
>> +#endif
>> }
>
> This really needs to life elsewhere, __*_entity() functions are for
> rb-tree muck.
>
Moved to enqueue_entity
>> static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists