[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20080318143438.GE23866@csn.ul.ie>
Date: Tue, 18 Mar 2008 14:34:38 +0000
From: Mel Gorman <mel@....ul.ie>
To: Andi Kleen <andi@...stfloor.org>
Cc: linux-kernel@...r.kernel.org, pj@....com, linux-mm@...ck.org,
nickpiggin@...oo.com.au
Subject: Re: [PATCH] [5/18] Expand the hugetlbfs sysctls to handle arrays for all hstates
On (17/03/08 02:58), Andi Kleen didst pronounce:
> - I didn't bother with hugetlb_shm_group and treat_as_movable,
> these are still single global.
I cannot imagine why either of those would be per-pool anyway.
Potentially shm_group could become a per-mount value which is both
outside the scope of this patchset and not per-pool so unsuitable for
hstate.
> - Also improve error propagation for the sysctl handlers a bit
>
>
> Signed-off-by: Andi Kleen <ak@...e.de>
>
> ---
> include/linux/hugetlb.h | 5 +++--
> kernel/sysctl.c | 2 +-
> mm/hugetlb.c | 43 +++++++++++++++++++++++++++++++------------
> 3 files changed, 35 insertions(+), 15 deletions(-)
>
> Index: linux/include/linux/hugetlb.h
> ===================================================================
> --- linux.orig/include/linux/hugetlb.h
> +++ linux/include/linux/hugetlb.h
> @@ -32,8 +32,6 @@ int hugetlb_fault(struct mm_struct *mm,
> int hugetlb_reserve_pages(struct inode *inode, long from, long to);
> void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
>
> -extern unsigned long max_huge_pages;
> -extern unsigned long sysctl_overcommit_huge_pages;
> extern unsigned long hugepages_treat_as_movable;
> extern const unsigned long hugetlb_zero, hugetlb_infinity;
> extern int sysctl_hugetlb_shm_group;
> @@ -258,6 +256,9 @@ static inline unsigned huge_page_shift(s
> return h->order + PAGE_SHIFT;
> }
>
> +extern unsigned long max_huge_pages[HUGE_MAX_HSTATE];
> +extern unsigned long sysctl_overcommit_huge_pages[HUGE_MAX_HSTATE];
Any particular reason for moving them?
Also, offhand it's not super-clear why max_huge_pages is not part of
hstate as we only expect one hstate per pagesize anyway.
> +
> #else
> struct hstate {};
> #define hstate_file(f) NULL
> Index: linux/kernel/sysctl.c
> ===================================================================
> --- linux.orig/kernel/sysctl.c
> +++ linux/kernel/sysctl.c
> @@ -935,7 +935,7 @@ static struct ctl_table vm_table[] = {
> {
> .procname = "nr_hugepages",
> .data = &max_huge_pages,
> - .maxlen = sizeof(unsigned long),
> + .maxlen = sizeof(max_huge_pages),
> .mode = 0644,
> .proc_handler = &hugetlb_sysctl_handler,
> .extra1 = (void *)&hugetlb_zero,
> Index: linux/mm/hugetlb.c
> ===================================================================
> --- linux.orig/mm/hugetlb.c
> +++ linux/mm/hugetlb.c
> @@ -22,8 +22,8 @@
> #include "internal.h"
>
> const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
> -unsigned long max_huge_pages;
> -unsigned long sysctl_overcommit_huge_pages;
> +unsigned long max_huge_pages[HUGE_MAX_HSTATE];
> +unsigned long sysctl_overcommit_huge_pages[HUGE_MAX_HSTATE];
> static gfp_t htlb_alloc_mask = GFP_HIGHUSER;
> unsigned long hugepages_treat_as_movable;
>
> @@ -496,11 +496,11 @@ static int __init hugetlb_init_hstate(st
>
> h->hugetlb_next_nid = first_node(node_online_map);
>
> - for (i = 0; i < max_huge_pages; ++i) {
> + for (i = 0; i < max_huge_pages[h - hstates]; ++i) {
> if (!alloc_fresh_huge_page(h))
> break;
> }
> - max_huge_pages = h->free_huge_pages = h->nr_huge_pages = i;
> + max_huge_pages[h - hstates] = h->free_huge_pages = h->nr_huge_pages = i;
>
hmm ok, it looks a little weird to be working out h - hstates multiple times
in a loop when it is invariant but functionally, it's fine.
> printk(KERN_INFO "Total HugeTLB memory allocated, %ld %dMB pages\n",
> h->free_huge_pages,
> @@ -531,8 +531,9 @@ void __init huge_add_hstate(unsigned ord
>
> static int __init hugetlb_setup(char *s)
> {
> - if (sscanf(s, "%lu", &max_huge_pages) <= 0)
> - max_huge_pages = 0;
> + unsigned long *mhp = &max_huge_pages[parsed_hstate - hstates];
This looks like we are assuming there is only ever one other
parsed_hstate. For the purposes of what you aim to achieve in this set,
it's not important but a comment over parsed_hstate about this
assumption is probably necessary.
> + if (sscanf(s, "%lu", mhp) <= 0)
> + *mhp = 0;
> return 1;
> }
> __setup("hugepages=", hugetlb_setup);
> @@ -584,10 +585,12 @@ static inline void try_to_free_low(unsig
> #endif
>
> #define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages)
> -static unsigned long set_max_huge_pages(unsigned long count)
> +static unsigned long
> +set_max_huge_pages(struct hstate *h, unsigned long count, int *err)
> {
> unsigned long min_count, ret;
> - struct hstate *h = &global_hstate;
> +
> + *err = 0;
>
What is updating err to anything else in set_max_huge_pages()?
> /*
> * Increase the pool size
> @@ -659,8 +662,20 @@ int hugetlb_sysctl_handler(struct ctl_ta
> struct file *file, void __user *buffer,
> size_t *length, loff_t *ppos)
> {
> - proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
> - max_huge_pages = set_max_huge_pages(max_huge_pages);
> + int err = 0;
> + struct hstate *h;
> + int i;
> + err = proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
> + if (err)
> + return err;
> + i = 0;
> + for_each_hstate (h) {
> + max_huge_pages[i] = set_max_huge_pages(h, max_huge_pages[i],
> + &err);
hmm, this is saying when I write 10 to nr_hugepages, I am asking for 10
2MB pages and 10 1GB pages potentially. Is that what you want?
> + if (err)
> + return err;
I'm failing to see how the error handling is improved when
set_max_huge_pages() is not updating err. Maybe it happens in another
patch.
> + i++;
> + }
> return 0;
> }
>
> @@ -680,10 +695,14 @@ int hugetlb_overcommit_handler(struct ct
> struct file *file, void __user *buffer,
> size_t *length, loff_t *ppos)
> {
> - struct hstate *h = &global_hstate;
> + struct hstate *h;
> + int i = 0;
> proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
> spin_lock(&hugetlb_lock);
> - h->nr_overcommit_huge_pages = sysctl_overcommit_huge_pages;
> + for_each_hstate (h) {
> + h->nr_overcommit_huge_pages = sysctl_overcommit_huge_pages[i];
> + i++;
> + }
Similar to the other sysctl here, the overcommit value is being set for
all the huge page sizes.
> spin_unlock(&hugetlb_lock);
> return 0;
> }
>
--
Mel Gorman
Part-time Phd Student Linux Technology Center
University of Limerick IBM Dublin Software Lab
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists