[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4B0ECB45.60906@linux.vnet.ibm.com>
Date: Thu, 26 Nov 2009 19:39:01 +0100
From: Christian Ehrhardt <ehrhardt@...ux.vnet.ibm.com>
To: Peter Zijlstra <peterz@...radead.org>
CC: Ingo Molnar <mingo@...e.hu>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
Holger.Wolf@...ibm.com, epasch@...ibm.com,
Martin Schwidefsky <schwidefsky@...ibm.com>
Subject: Re: Missing recalculation of scheduler tunables in case of cpu hot
add/remove
Peter Zijlstra wrote:
> On Thu, 2009-11-26 at 17:31 +0100, Christian Ehrhardt wrote:
>
>> [...]
>> The question for now is what we do on cpu hot add/remove?
>> Would hooking somewhere in kernel/cpu.c be the right approach - I'm not
>> quite sure about my own suggestion yet :-).
>>
>
> Something like the below might work I suppose, just needs a cleanup and
> such.
>
>
Looks very promising, I did not expect it would be so easy to hook up to
the hotplug events, but you're absolutley right the scheduler already
has hooks for that with rq_online/offline.
From looking at the patch alone I expect it will loose user updates to
sysfs. Might just need adding some feedback from the sysctl writer
functions to set the default values to setval/1+ilog2; that includes
renaming default to "normalized" or somethng like that. But I'll test
this patch in depth tomorrow morning anyway and give more detailed feedback.
Thanks a lot!
> diff --git a/kernel/sched.c b/kernel/sched.c
> index 0cbf2ef..210365f 100644
> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -814,6 +814,7 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
> * default: 0.25ms
> */
> unsigned int sysctl_sched_shares_ratelimit = 250000;
> +unsigned int default_sysctl_sched_shares_ratelimit = 250000;
>
> /*
> * Inject some fuzzyness into changing the per-cpu group shares
> @@ -1810,6 +1811,7 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
> #endif
>
> static void calc_load_account_active(struct rq *this_rq);
> +static void update_sysctl(void);
>
> #include "sched_stats.h"
> #include "sched_idletask.c"
> @@ -7019,22 +7021,24 @@ cpumask_var_t nohz_cpu_mask;
> *
> * This idea comes from the SD scheduler of Con Kolivas:
> */
> -static inline void sched_init_granularity(void)
> +#define SET_SYSCTL(name, factor) \
> + sysctl_##name = (factor) * default_sysctl_##name
> +
> +static void update_sysctl(void)
> {
> - unsigned int factor = 1 + ilog2(num_online_cpus());
> + unsigned int cpus = max(num_active_cpus(), 8);
> + unsigned int factor = 1 + ilog2(cpus);
> const unsigned long limit = 200000000;
>
> - sysctl_sched_min_granularity *= factor;
> - if (sysctl_sched_min_granularity > limit)
> - sysctl_sched_min_granularity = limit;
> -
> - sysctl_sched_latency *= factor;
> - if (sysctl_sched_latency > limit)
> - sysctl_sched_latency = limit;
> -
> - sysctl_sched_wakeup_granularity *= factor;
> + SET_SYSCTL(sched_min_granularity);
> + SET_SYSCTL(sched_latency);
> + SET_SYSCTL(sched_wakeup_granularity);
> + SET_SYSCTL(sched_shares_ratelimit);
> +}
>
> - sysctl_sched_shares_ratelimit *= factor;
> +static inline void sched_init_granularity(void)
> +{
> + update_sysctl();
> }
>
> #ifdef CONFIG_SMP
> diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
> index 0ff21af..4d429b8 100644
> --- a/kernel/sched_fair.c
> +++ b/kernel/sched_fair.c
> @@ -35,12 +35,14 @@
> * run vmstat and monitor the context-switches (cs) field)
> */
> unsigned int sysctl_sched_latency = 5000000ULL;
> +unsigned int default_sysctl_sched_latency = 5000000ULL;
>
> /*
> * Minimal preemption granularity for CPU-bound tasks:
> * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
> */
> unsigned int sysctl_sched_min_granularity = 1000000ULL;
> +unsigned int default_sysctl_sched_min_granularity = 1000000ULL;
>
> /*
> * is kept at sysctl_sched_latency / sysctl_sched_min_granularity
> @@ -70,6 +72,7 @@ unsigned int __read_mostly sysctl_sched_compat_yield;
> * have immediate wakeup/sleep latencies.
> */
> unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
> +unsigned int default_sysctl_sched_wakeup_granularity = 1000000UL;
>
> const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
>
> @@ -1905,6 +1908,17 @@ move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
>
> return 0;
> }
> +
> +static void rq_online_fair(struct rq *rq)
> +{
> + update_sysctl();
> +}
> +
> +static void rq_offline_fair(struct rq *rq)
> +{
> + update_sysctl();
> +}
> +
> #endif /* CONFIG_SMP */
>
> /*
> @@ -2052,6 +2066,8 @@ static const struct sched_class fair_sched_class = {
>
> .load_balance = load_balance_fair,
> .move_one_task = move_one_task_fair,
> + .rq_online = rq_online_fair,
> + .rq_offline = rq_offline_fair,
> #endif
>
> .set_curr_task = set_curr_task_fair,
>
>
>
--
GrĂ¼sse / regards, Christian Ehrhardt
IBM Linux Technology Center, Open Virtualization
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists