[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <AANLkTi=QpUDvAO+MZcSU=gsAOXpi_rkQeBOnj=0p5OzV@mail.gmail.com>
Date: Thu, 24 Mar 2011 00:40:06 -0700
From: Paul Turner <pjt@...gle.com>
To: bharata@...ux.vnet.ibm.com
Cc: linux-kernel@...r.kernel.org,
Peter Zijlstra <a.p.zijlstra@...llo.nl>,
Dhaval Giani <dhaval.giani@...il.com>,
Balbir Singh <balbir@...ux.vnet.ibm.com>,
Vaidyanathan Srinivasan <svaidy@...ux.vnet.ibm.com>,
Srivatsa Vaddagiri <vatsa@...ibm.com>,
Kamalesh Babulal <kamalesh@...ux.vnet.ibm.com>,
Ingo Molnar <mingo@...e.hu>,
Pavel Emelyanov <xemul@...nvz.org>,
Nikhil Rao <ncrao@...gle.com>
Subject: Re: [patch 04/15] sched: throttle cfs_rq entities which exceed their
local quota
On Wed, Mar 23, 2011 at 11:36 PM, Bharata B Rao
<bharata@...ux.vnet.ibm.com> wrote:
> On Tue, Mar 22, 2011 at 08:03:30PM -0700, Paul Turner wrote:
>> In account_cfs_rq_quota() (via update_curr()) we track consumption versus a
>> cfs_rqs locally assigned quota and whether there is global quota available
>> to provide a refill when it runs out.
>>
>> In the case that there is no quota remaining it's necessary to throttle so
>> that execution ceases until the susbequent period. While it is at this
>> boundary that we detect (and signal for, via reshed_task) that a throttle is
>> required, the actual operation is deferred until put_prev_entity().
>>
>> At this point the cfs_rq is marked as throttled and not re-enqueued, this
>> avoids potential interactions with throttled runqueues in the event that we
>> are not immediately able to evict the running task.
>>
>> Signed-off-by: Paul Turner <pjt@...gle.com>
>> Signed-off-by: Nikhil Rao <ncrao@...gle.com>
>> Signed-off-by: Bharata B Rao <bharata@...ux.vnet.ibm.com>
>> ---
>> kernel/sched.c | 2
>> kernel/sched_fair.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++---
>> 2 files changed, 113 insertions(+), 6 deletions(-)
>>
>> Index: tip/kernel/sched.c
>> ===================================================================
>> --- tip.orig/kernel/sched.c
>> +++ tip/kernel/sched.c
>> @@ -386,7 +386,7 @@ struct cfs_rq {
>> unsigned long load_contribution;
>> #endif
>> #ifdef CONFIG_CFS_BANDWIDTH
>> - int quota_enabled;
>> + int quota_enabled, throttled;
>> s64 quota_remaining;
>> #endif
>> #endif
>> Index: tip/kernel/sched_fair.c
>> ===================================================================
>> --- tip.orig/kernel/sched_fair.c
>> +++ tip/kernel/sched_fair.c
>> @@ -321,9 +321,6 @@ find_matching_se(struct sched_entity **s
>>
>> #endif /* CONFIG_FAIR_GROUP_SCHED */
>>
>> -static void account_cfs_rq_quota(struct cfs_rq *cfs_rq,
>> - unsigned long delta_exec);
>> -
>> /**************************************************************
>> * Scheduling class tree data structure manipulation methods:
>> */
>> @@ -588,6 +585,9 @@ __update_curr(struct cfs_rq *cfs_rq, str
>> #endif
>> }
>>
>> +static void account_cfs_rq_quota(struct cfs_rq *cfs_rq,
>> + unsigned long delta_exec);
>> +
>> static void update_curr(struct cfs_rq *cfs_rq)
>> {
>> struct sched_entity *curr = cfs_rq->curr;
>> @@ -1221,6 +1221,9 @@ static struct sched_entity *pick_next_en
>> return se;
>> }
>>
>> +static void throttle_cfs_rq(struct cfs_rq *cfs_rq);
>> +static inline int within_bandwidth(struct cfs_rq *cfs_rq);
>> +
>> static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
>> {
>> /*
>> @@ -1230,6 +1233,9 @@ static void put_prev_entity(struct cfs_r
>> if (prev->on_rq)
>> update_curr(cfs_rq);
>>
>> + if (!within_bandwidth(cfs_rq))
>> + throttle_cfs_rq(cfs_rq);
>> +
>> check_spread(cfs_rq, prev);
>> if (prev->on_rq) {
>> update_stats_wait_start(cfs_rq, prev);
>> @@ -1241,6 +1247,8 @@ static void put_prev_entity(struct cfs_r
>> cfs_rq->curr = NULL;
>> }
>>
>> +static void check_cfs_rq_quota(struct cfs_rq *cfs_rq);
>> +
>> static void
>> entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
>> {
>> @@ -1249,6 +1257,9 @@ entity_tick(struct cfs_rq *cfs_rq, struc
>> */
>> update_curr(cfs_rq);
>>
>> + /* check that entity's usage is still within quota (if enabled) */
>> + check_cfs_rq_quota(cfs_rq);
>> +
>> /*
>> * Update share accounting for long-running entities.
>> */
>> @@ -1294,6 +1305,46 @@ static inline u64 sched_cfs_bandwidth_sl
>> return (u64)sysctl_sched_cfs_bandwidth_slice * NSEC_PER_USEC;
>> }
>>
>> +static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
>> +{
>> + return cfs_rq->throttled;
>> +}
>> +
>> +static inline int throttled_hierarchy(struct cfs_rq *cfs_rq)
>> +{
>> + struct task_group *tg;
>> + struct sched_entity *se;
>> +
>> + if (cfs_rq_throttled(cfs_rq))
>> + return 1;
>> +
>> + tg = cfs_rq->tg;
>> + se = tg->se[cpu_of(rq_of(cfs_rq))];
>> + if (!se)
>> + return 0;
>> +
>> + for_each_sched_entity(se) {
>> + if (cfs_rq_throttled(cfs_rq_of(se)))
>> + return 1;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static inline int within_bandwidth(struct cfs_rq *cfs_rq)
>> +{
>> + return !cfs_rq->quota_enabled || cfs_rq->quota_remaining > 0;
>> +}
>> +
>> +static void check_cfs_rq_quota(struct cfs_rq *cfs_rq)
>> +{
>> + if (within_bandwidth(cfs_rq))
>> + return;
>> +
>> +
>> + resched_task(rq_of(cfs_rq)->curr);
>> +}
>> +
>> static void request_cfs_rq_quota(struct cfs_rq *cfs_rq)
>> {
>> struct task_group *tg = cfs_rq->tg;
>> @@ -1330,6 +1381,29 @@ static void account_cfs_rq_quota(struct
>> request_cfs_rq_quota(cfs_rq);
>> }
>>
>> +static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
>> +{
>> + struct sched_entity *se;
>> +
>> + se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
>> +
>> + /* account load preceding throttle */
>> + update_cfs_load(cfs_rq, 0);
>> +
>> + for_each_sched_entity(se) {
>> + struct cfs_rq *qcfs_rq = cfs_rq_of(se);
>> + /* throttled entity or throttle-on-deactivate */
>> + if (!se->on_rq)
>> + break;
>> +
>> + dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP);
>> + if (qcfs_rq->load.weight)
>> + break;
>> + }
>> +
>> + cfs_rq->throttled = 1;
>> +}
>
> Since throttling is done from put_prev_entity(), iiuc, you will be
> doing 'put' for current entities which are not on the tree. Can you
> avoid the dequeue_entity() call here which I think will anyway bail out
> from actual dequeueing (se != cfs_rq->curr check in dequeue_entity).
>
No -- cfs_rq->curr is still wholly enqueued less residency in the
rb-tree; this includes factors such as the number of runnable entities
and contribution to load. The dequeue is necessary; a throttle is
analogous to the current task blocking, only on a group entity level.
> Regards,
> Bharata.
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists