[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20101013133053.GD3914@balbir.in.ibm.com>
Date: Wed, 13 Oct 2010 19:00:53 +0530
From: Balbir Singh <balbir@...ux.vnet.ibm.com>
To: Bharata B Rao <bharata@...ux.vnet.ibm.com>
Cc: linux-kernel@...r.kernel.org,
Dhaval Giani <dhaval.giani@...il.com>,
Vaidyanathan Srinivasan <svaidy@...ux.vnet.ibm.com>,
Srivatsa Vaddagiri <vatsa@...ibm.com>,
Kamalesh Babulal <kamalesh@...ux.vnet.ibm.com>,
Ingo Molnar <mingo@...e.hu>,
Peter Zijlstra <a.p.zijlstra@...llo.nl>,
Pavel Emelyanov <xemul@...nvz.org>,
Herbert Poetzl <herbert@...hfloor.at>,
Avi Kivity <avi@...hat.com>,
Chris Friesen <cfriesen@...tel.com>,
Paul Menage <menage@...gle.com>,
Mike Waychison <mikew@...gle.com>,
Paul Turner <pjt@...gle.com>, Nikhil Rao <ncrao@...gle.com>
Subject: Re: [PATCH v3 2/7] sched: accumulate per-cfs_rq cpu usage
* Bharata B Rao <bharata@...ux.vnet.ibm.com> [2010-10-12 13:21:09]:
> sched: accumulate per-cfs_rq cpu usage
>
> From: Paul Turner <pjt@...gle.com>
>
> Introduce account_cfs_rq_quota() to account bandwidth usage on the cfs_rq
> level versus task_groups for which bandwidth has been assigned. This is
> tracked by whether the local cfs_rq->quota_assigned is finite or infinite
> (RUNTIME_INF).
>
> For cfs_rq's that belong to a bandwidth constrained task_group we introduce
> tg_request_cfs_quota() which attempts to allocate quota from the global pool
> for use locally. Updates involving the global pool are currently protected
> under cfs_bandwidth->lock, local pools are protected by rq->lock.
>
> This patch only attempts to assign and track quota, no action is taken in the
> case that cfs_rq->quota_used exceeds cfs_rq->quota_assigned.
>
> Signed-off-by: Paul Turner <pjt@...gle.com>
> Signed-off-by: Nikhil Rao <ncrao@...gle.com>
> Signed-off-by: Bharata B Rao <bharata@...ux.vnet.ibm.com>
> ---
> include/linux/sched.h | 4 ++++
> kernel/sched.c | 13 +++++++++++++
> kernel/sched_fair.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
> kernel/sysctl.c | 10 ++++++++++
> 4 files changed, 77 insertions(+)
>
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1898,6 +1898,10 @@ int sched_rt_handler(struct ctl_table *t
> void __user *buffer, size_t *lenp,
> loff_t *ppos);
>
> +#ifdef CONFIG_CFS_BANDWIDTH
> +extern unsigned int sysctl_sched_cfs_bandwidth_slice;
> +#endif
> +
> extern unsigned int sysctl_sched_compat_yield;
>
> #ifdef CONFIG_RT_MUTEXES
> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -1929,6 +1929,19 @@ static const struct sched_class rt_sched
> * default: 0.5s
> */
> static u64 sched_cfs_bandwidth_period = 500000000ULL;
> +
> +/*
> + * default slice of quota to allocate from global tg to local cfs_rq pool on
> + * each refresh
> + * default: 10ms
> + */
> +unsigned int sysctl_sched_cfs_bandwidth_slice = 10000UL;
> +
> +static inline u64 sched_cfs_bandwidth_slice(void)
> +{
> + return (u64)sysctl_sched_cfs_bandwidth_slice * NSEC_PER_USEC;
> +}
> +
> #endif
>
> #define sched_class_highest (&rt_sched_class)
> --- a/kernel/sched_fair.c
> +++ b/kernel/sched_fair.c
> @@ -267,6 +267,16 @@ find_matching_se(struct sched_entity **s
>
> #endif /* CONFIG_FAIR_GROUP_SCHED */
>
> +#ifdef CONFIG_CFS_BANDWIDTH
> +static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
> +{
> + return &tg->cfs_bandwidth;
> +}
> +
> +static void account_cfs_rq_quota(struct cfs_rq *cfs_rq,
> + unsigned long delta_exec);
> +#endif
> +
>
> /**************************************************************
> * Scheduling class tree data structure manipulation methods:
> @@ -547,6 +557,9 @@ static void update_curr(struct cfs_rq *c
> cpuacct_charge(curtask, delta_exec);
> account_group_exec_runtime(curtask, delta_exec);
> }
> +#ifdef CONFIG_CFS_BANDWIDTH
> + account_cfs_rq_quota(cfs_rq, delta_exec);
> +#endif
> }
>
> static inline void
> @@ -1130,6 +1143,43 @@ static void yield_task_fair(struct rq *r
> }
>
> #ifdef CONFIG_CFS_BANDWIDTH
> +static u64 tg_request_cfs_quota(struct task_group *tg)
> +{
> + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
> + u64 delta = 0;
> +
> + if (cfs_b->runtime > 0 || cfs_b->quota == RUNTIME_INF) {
Quick question for cfs_b->quota == RUNTIME_INF, won't cfs_b->runtime
be always > 0?
> + raw_spin_lock(&cfs_b->lock);
> + /*
> + * it's possible a bandwidth update has changed the global
> + * pool.
> + */
> + if (cfs_b->quota == RUNTIME_INF)
> + delta = sched_cfs_bandwidth_slice();
> + else {
> + delta = min(cfs_b->runtime,
> + sched_cfs_bandwidth_slice());
> + cfs_b->runtime -= delta;
> + }
> + raw_spin_unlock(&cfs_b->lock);
> + }
> + return delta;
> +}
> +
> +static void account_cfs_rq_quota(struct cfs_rq *cfs_rq,
> + unsigned long delta_exec)
> +{
> + if (cfs_rq->quota_assigned == RUNTIME_INF)
> + return;
> +
> + cfs_rq->quota_used += delta_exec;
> +
> + if (cfs_rq->quota_used < cfs_rq->quota_assigned)
> + return;
> +
> + cfs_rq->quota_assigned += tg_request_cfs_quota(cfs_rq->tg);
> +}
> +
> static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
> {
> return 1;
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -384,6 +384,16 @@ static struct ctl_table kern_table[] = {
> .mode = 0644,
> .proc_handler = proc_dointvec,
> },
> +#ifdef CONFIG_CFS_BANDWIDTH
> + {
> + .procname = "sched_cfs_bandwidth_slice_us",
> + .data = &sysctl_sched_cfs_bandwidth_slice,
> + .maxlen = sizeof(unsigned int),
> + .mode = 0644,
> + .proc_handler = proc_dointvec_minmax,
> + .extra1 = &one,
> + },
> +#endif
> #ifdef CONFIG_PROVE_LOCKING
> {
> .procname = "prove_locking",
--
Three Cheers,
Balbir
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists