Reduce the amount of stack used in build_sched_domains by allocating all the masks at once, and setting up individual pointers. If NR_CPUS <= 128, then stack space is used instead. Based on linux-2.6.25-rc5-mm1 Cc: Ingo Molnar Signed-off-by: Mike Travis --- One checkpatch warning that I'm not sure how to remove: ERROR: Macros with complex values should be enclosed in parenthesis #61: FILE: kernel/sched.c:6656: +#define SCHED_CPU_VAR(v, a) cpumask_t *v = (cpumask_t *) \ ((unsigned long)(a) + offsetof(struct allmasks, v)) --- kernel/sched.c | 167 ++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 113 insertions(+), 54 deletions(-) --- linux-2.6.25-rc5.orig/kernel/sched.c +++ linux-2.6.25-rc5/kernel/sched.c @@ -6625,6 +6625,40 @@ SD_INIT_FUNC(CPU) #endif /* + * To minimize stack usage kmalloc room for cpumasks and share the + * space as the usage in build_sched_domains() dictates. Used only + * if the amount of space is significant. + */ +struct allmasks { + cpumask_t tmpmask; /* make this one first */ + union { + cpumask_t nodemask; + cpumask_t this_sibling_map; + cpumask_t this_core_map; + }; + cpumask_t send_covered; + +#ifdef CONFIG_NUMA + cpumask_t domainspan; + cpumask_t covered; + cpumask_t notcovered; +#endif +}; + +#if NR_CPUS > 128 +#define SCHED_CPUMASK_ALLOC 1 +#define SCHED_CPUMASK_FREE(v) kfree(v) +#define SCHED_CPUMASK_DECLARE(v) struct allmasks *v +#else +#define SCHED_CPUMASK_ALLOC 0 +#define SCHED_CPUMASK_FREE(v) +#define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v +#endif + +#define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \ + ((unsigned long)(a) + offsetof(struct allmasks, v)) + +/* * Build sched domains for a given set of cpus and attach the sched domains * to the individual cpus */ @@ -6632,7 +6666,8 @@ static int build_sched_domains(const cpu { int i; struct root_domain *rd; - cpumask_t tmpmask; + SCHED_CPUMASK_DECLARE(allmasks); + cpumask_t *tmpmask; #ifdef CONFIG_NUMA struct sched_group **sched_group_nodes = NULL; int sd_allnodes = 0; @@ -6657,6 +6692,21 @@ static int build_sched_domains(const cpu return -ENOMEM; } +#if SCHED_CPUMASK_ALLOC + /* get space for all scratch cpumask variables */ + allmasks = kmalloc(sizeof(*allmasks), GFP_KERNEL); + if (!allmasks) { + printk(KERN_WARNING "Cannot alloc cpumask array\n"); + kfree(rd); +#ifdef CONFIG_NUMA + kfree(sched_group_nodes); +#endif + return -ENOMEM; + } +#endif + tmpmask = (cpumask_t *)allmasks; + + #ifdef CONFIG_NUMA sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes; #endif @@ -6666,18 +6716,18 @@ static int build_sched_domains(const cpu */ for_each_cpu_mask(i, *cpu_map) { struct sched_domain *sd = NULL, *p; - cpumask_t nodemask = node_to_cpumask(cpu_to_node(i)); + SCHED_CPUMASK_VAR(nodemask, allmasks); - cpus_and(nodemask, nodemask, *cpu_map); + *nodemask = node_to_cpumask(cpu_to_node(i)); + cpus_and(*nodemask, *nodemask, *cpu_map); #ifdef CONFIG_NUMA if (cpus_weight(*cpu_map) > - SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { + SD_NODES_PER_DOMAIN*cpus_weight(*nodemask)) { sd = &per_cpu(allnodes_domains, i); SD_INIT(sd, ALLNODES); sd->span = *cpu_map; - cpu_to_allnodes_group(i, cpu_map, &sd->groups, - &tmpmask); + cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask); p = sd; sd_allnodes = 1; } else @@ -6695,11 +6745,11 @@ static int build_sched_domains(const cpu p = sd; sd = &per_cpu(phys_domains, i); SD_INIT(sd, CPU); - sd->span = nodemask; + sd->span = *nodemask; sd->parent = p; if (p) p->child = sd; - cpu_to_phys_group(i, cpu_map, &sd->groups, &tmpmask); + cpu_to_phys_group(i, cpu_map, &sd->groups, tmpmask); #ifdef CONFIG_SCHED_MC p = sd; @@ -6709,7 +6759,7 @@ static int build_sched_domains(const cpu cpus_and(sd->span, sd->span, *cpu_map); sd->parent = p; p->child = sd; - cpu_to_core_group(i, cpu_map, &sd->groups, &tmpmask); + cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask); #endif #ifdef CONFIG_SCHED_SMT @@ -6720,81 +6770,88 @@ static int build_sched_domains(const cpu cpus_and(sd->span, sd->span, *cpu_map); sd->parent = p; p->child = sd; - cpu_to_cpu_group(i, cpu_map, &sd->groups, &tmpmask); + cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask); #endif } #ifdef CONFIG_SCHED_SMT /* Set up CPU (sibling) groups */ for_each_cpu_mask(i, *cpu_map) { - cpumask_t this_sibling_map = per_cpu(cpu_sibling_map, i); - cpumask_t send_covered; + SCHED_CPUMASK_VAR(this_sibling_map, allmasks); + SCHED_CPUMASK_VAR(send_covered, allmasks); - cpus_and(this_sibling_map, this_sibling_map, *cpu_map); - if (i != first_cpu(this_sibling_map)) + *this_sibling_map = per_cpu(cpu_sibling_map, i); + cpus_and(*this_sibling_map, *this_sibling_map, *cpu_map); + if (i != first_cpu(*this_sibling_map)) continue; - init_sched_build_groups(&this_sibling_map, cpu_map, - &cpu_to_cpu_group, - &send_covered, &tmpmask); + init_sched_build_groups(this_sibling_map, cpu_map, + cpu_to_cpu_group, + send_covered, tmpmask); } #endif #ifdef CONFIG_SCHED_MC /* Set up multi-core groups */ for_each_cpu_mask(i, *cpu_map) { - cpumask_t this_core_map = cpu_coregroup_map(i); - cpumask_t send_covered; + SCHED_CPUMASK_VAR(this_core_map, allmasks); + SCHED_CPUMASK_VAR(send_covered, allmasks); - cpus_and(this_core_map, this_core_map, *cpu_map); - if (i != first_cpu(this_core_map)) + *this_core_map = cpu_coregroup_map(i); + cpus_and(*this_core_map, *this_core_map, *cpu_map); + if (i != first_cpu(*this_core_map)) continue; - init_sched_build_groups(&this_core_map, cpu_map, - &cpu_to_core_group, - &send_covered, &tmpmask); + + init_sched_build_groups(this_core_map, cpu_map, + &cpu_to_core_group, + send_covered, tmpmask); } #endif /* Set up physical groups */ for (i = 0; i < MAX_NUMNODES; i++) { - cpumask_t nodemask = node_to_cpumask(i); - cpumask_t send_covered; + SCHED_CPUMASK_VAR(nodemask, allmasks); + SCHED_CPUMASK_VAR(send_covered, allmasks); - cpus_and(nodemask, nodemask, *cpu_map); - if (cpus_empty(nodemask)) + *nodemask = node_to_cpumask(i); + cpus_and(*nodemask, *nodemask, *cpu_map); + if (cpus_empty(*nodemask)) continue; - init_sched_build_groups(&nodemask, cpu_map, + init_sched_build_groups(nodemask, cpu_map, &cpu_to_phys_group, - &send_covered, &tmpmask); + send_covered, tmpmask); } #ifdef CONFIG_NUMA /* Set up node groups */ if (sd_allnodes) { - cpumask_t send_covered; + SCHED_CPUMASK_VAR(send_covered, allmasks); init_sched_build_groups(cpu_map, cpu_map, &cpu_to_allnodes_group, - &send_covered, &tmpmask); + send_covered, tmpmask); } for (i = 0; i < MAX_NUMNODES; i++) { /* Set up node groups */ struct sched_group *sg, *prev; - cpumask_t nodemask = node_to_cpumask(i); - cpumask_t domainspan; - cpumask_t covered = CPU_MASK_NONE; + SCHED_CPUMASK_VAR(nodemask, allmasks); + SCHED_CPUMASK_VAR(domainspan, allmasks); + SCHED_CPUMASK_VAR(covered, allmasks); int j; - cpus_and(nodemask, nodemask, *cpu_map); - if (cpus_empty(nodemask)) { + *nodemask = node_to_cpumask(i); + *covered = CPU_MASK_NONE; + + cpus_and(*nodemask, *nodemask, *cpu_map); + if (cpus_empty(*nodemask)) { sched_group_nodes[i] = NULL; continue; } - domainspan = sched_domain_node_span(i); - cpus_and(domainspan, domainspan, *cpu_map); + *domainspan = sched_domain_node_span(i); + cpus_and(*domainspan, *domainspan, *cpu_map); sg = kmalloc_node(sizeof(struct sched_group), GFP_KERNEL, i); if (!sg) { @@ -6803,31 +6860,31 @@ static int build_sched_domains(const cpu goto error; } sched_group_nodes[i] = sg; - for_each_cpu_mask(j, nodemask) { + for_each_cpu_mask(j, *nodemask) { struct sched_domain *sd; sd = &per_cpu(node_domains, j); sd->groups = sg; } sg->__cpu_power = 0; - sg->cpumask = nodemask; + sg->cpumask = *nodemask; sg->next = sg; - cpus_or(covered, covered, nodemask); + cpus_or(*covered, *covered, *nodemask); prev = sg; for (j = 0; j < MAX_NUMNODES; j++) { - cpumask_t tmp, notcovered; + SCHED_CPUMASK_VAR(notcovered, allmasks); int n = (i + j) % MAX_NUMNODES; - node_to_cpumask_ptr(nodemask, n); - cpus_complement(notcovered, covered); - cpus_and(tmp, notcovered, *cpu_map); - cpus_and(tmp, tmp, domainspan); - if (cpus_empty(tmp)) + cpus_complement(*notcovered, *covered); + cpus_and(*tmpmask, *notcovered, *cpu_map); + cpus_and(*tmpmask, *tmpmask, *domainspan); + if (cpus_empty(*tmpmask)) break; - cpus_and(tmp, tmp, *nodemask); - if (cpus_empty(tmp)) + *nodemask = node_to_cpumask(n); + cpus_and(*tmpmask, *tmpmask, *nodemask); + if (cpus_empty(*tmpmask)) continue; sg = kmalloc_node(sizeof(struct sched_group), @@ -6838,9 +6895,9 @@ static int build_sched_domains(const cpu goto error; } sg->__cpu_power = 0; - sg->cpumask = tmp; + sg->cpumask = *tmpmask; sg->next = prev->next; - cpus_or(covered, covered, tmp); + cpus_or(*covered, *covered, *tmpmask); prev->next = sg; prev = sg; } @@ -6877,7 +6934,7 @@ static int build_sched_domains(const cpu struct sched_group *sg; cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map, &sg, - &tmpmask); + tmpmask); init_numa_sched_groups_power(sg); } #endif @@ -6895,11 +6952,13 @@ static int build_sched_domains(const cpu cpu_attach_domain(sd, rd, i); } + SCHED_CPUMASK_FREE((void *)allmasks); return 0; #ifdef CONFIG_NUMA error: - free_sched_groups(cpu_map, &tmpmask); + free_sched_groups(cpu_map, tmpmask); + SCHED_CPUMASK_FREE((void *)allmasks); return -ENOMEM; #endif } -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/