[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200609132438.GE22623@dhcp22.suse.cz>
Date: Tue, 9 Jun 2020 15:24:38 +0200
From: Michal Hocko <mhocko@...nel.org>
To: js1304@...il.com
Cc: Andrew Morton <akpm@...ux-foundation.org>, linux-mm@...ck.org,
linux-kernel@...r.kernel.org, kernel-team@....com,
Vlastimil Babka <vbabka@...e.cz>,
Christoph Hellwig <hch@...radead.org>,
Roman Gushchin <guro@...com>,
Mike Kravetz <mike.kravetz@...cle.com>,
Naoya Horiguchi <n-horiguchi@...jp.nec.com>,
Joonsoo Kim <iamjoonsoo.kim@....com>
Subject: Re: [PATCH v2 03/12] mm/hugetlb: introduce alloc_control structure
to simplify migration target allocation APIs
On Wed 27-05-20 15:44:54, Joonsoo Kim wrote:
> From: Joonsoo Kim <iamjoonsoo.kim@....com>
>
> Currently, page allocation functions for migration requires some arguments.
> More worse, in the following patch, more argument will be needed to unify
> the similar functions. To simplify them, in this patch, unified data
> structure that controls allocation behaviour is introduced.
>
> For clean-up, function declarations are re-ordered.
This is really hard to review without having a clear picture of the
resulting code so bear with me. I can see some reasons why allocation
callbacks might benefit from a agragated argument but you seem to touch
the internal hugetlb dequeue_huge_page_vma which shouldn't really need
that. I wouldn't mind much but I remember the hugetlb allocation
functions layering is quite complex for hugetlb specific reasons (see
0c397daea1d4 ("mm, hugetlb: further simplify hugetlb allocation API")
for more background).
Is there any reason why the agregated argument cannot be limited only to
migration callbacks. That would be alloc_huge_page_node, alloc_huge_page_nodemask
and alloc_huge_page_vma.
> Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@....com>
> ---
> include/linux/hugetlb.h | 35 +++++++++++++++-------------
> mm/gup.c | 11 ++++++---
> mm/hugetlb.c | 62 ++++++++++++++++++++++++-------------------------
> mm/internal.h | 7 ++++++
> mm/mempolicy.c | 13 +++++++----
> mm/migrate.c | 13 +++++++----
> 6 files changed, 83 insertions(+), 58 deletions(-)
>
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 50650d0..15c8fb8 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -14,6 +14,7 @@
> struct ctl_table;
> struct user_struct;
> struct mmu_gather;
> +struct alloc_control;
>
> #ifndef is_hugepd
> typedef struct { unsigned long pd; } hugepd_t;
> @@ -502,15 +503,16 @@ struct huge_bootmem_page {
> struct hstate *hstate;
> };
>
> -struct page *alloc_huge_page(struct vm_area_struct *vma,
> - unsigned long addr, int avoid_reserve);
> -struct page *alloc_huge_page_node(struct hstate *h, int nid);
> -struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
> - nodemask_t *nmask);
> +struct page *alloc_migrate_huge_page(struct hstate *h,
> + struct alloc_control *ac);
> +struct page *alloc_huge_page_node(struct hstate *h,
> + struct alloc_control *ac);
> +struct page *alloc_huge_page_nodemask(struct hstate *h,
> + struct alloc_control *ac);
> struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
> unsigned long address);
> -struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
> - int nid, nodemask_t *nmask);
> +struct page *alloc_huge_page(struct vm_area_struct *vma,
> + unsigned long addr, int avoid_reserve);
> int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
> pgoff_t idx);
>
> @@ -752,20 +754,14 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
> #else /* CONFIG_HUGETLB_PAGE */
> struct hstate {};
>
> -static inline struct page *alloc_huge_page(struct vm_area_struct *vma,
> - unsigned long addr,
> - int avoid_reserve)
> -{
> - return NULL;
> -}
> -
> -static inline struct page *alloc_huge_page_node(struct hstate *h, int nid)
> +static inline struct page *
> +alloc_huge_page_node(struct hstate *h, struct alloc_control *ac)
> {
> return NULL;
> }
>
> static inline struct page *
> -alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask)
> +alloc_huge_page_nodemask(struct hstate *h, struct alloc_control *ac)
> {
> return NULL;
> }
> @@ -777,6 +773,13 @@ static inline struct page *alloc_huge_page_vma(struct hstate *h,
> return NULL;
> }
>
> +static inline struct page *alloc_huge_page(struct vm_area_struct *vma,
> + unsigned long addr,
> + int avoid_reserve)
> +{
> + return NULL;
> +}
> +
> static inline int __alloc_bootmem_huge_page(struct hstate *h)
> {
> return 0;
> diff --git a/mm/gup.c b/mm/gup.c
> index ee039d4..6b78f11 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -1612,16 +1612,21 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private)
> if (PageHighMem(page))
> gfp_mask |= __GFP_HIGHMEM;
>
> -#ifdef CONFIG_HUGETLB_PAGE
> if (PageHuge(page)) {
> struct hstate *h = page_hstate(page);
> + struct alloc_control ac = {
> + .nid = nid,
> + .nmask = NULL,
> + .gfp_mask = gfp_mask,
> + };
> +
> /*
> * We don't want to dequeue from the pool because pool pages will
> * mostly be from the CMA region.
> */
> - return alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
> + return alloc_migrate_huge_page(h, &ac);
> }
> -#endif
> +
> if (PageTransHuge(page)) {
> struct page *thp;
> /*
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 57ece74..453ba94 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1053,8 +1053,8 @@ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
> return page;
> }
>
> -static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask, int nid,
> - nodemask_t *nmask)
> +static struct page *dequeue_huge_page_nodemask(struct hstate *h,
> + struct alloc_control *ac)
> {
> unsigned int cpuset_mems_cookie;
> struct zonelist *zonelist;
> @@ -1062,14 +1062,15 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask,
> struct zoneref *z;
> int node = NUMA_NO_NODE;
>
> - zonelist = node_zonelist(nid, gfp_mask);
> + zonelist = node_zonelist(ac->nid, ac->gfp_mask);
>
> retry_cpuset:
> cpuset_mems_cookie = read_mems_allowed_begin();
> - for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), nmask) {
> + for_each_zone_zonelist_nodemask(zone, z, zonelist,
> + gfp_zone(ac->gfp_mask), ac->nmask) {
> struct page *page;
>
> - if (!cpuset_zone_allowed(zone, gfp_mask))
> + if (!cpuset_zone_allowed(zone, ac->gfp_mask))
> continue;
> /*
> * no need to ask again on the same node. Pool is node rather than
> @@ -1105,9 +1106,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
> {
> struct page *page;
> struct mempolicy *mpol;
> - gfp_t gfp_mask;
> - nodemask_t *nodemask;
> - int nid;
> + struct alloc_control ac = {0};
>
> /*
> * A child process with MAP_PRIVATE mappings created by their parent
> @@ -1122,9 +1121,10 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
> if (avoid_reserve && h->free_huge_pages - h->resv_huge_pages == 0)
> goto err;
>
> - gfp_mask = htlb_alloc_mask(h);
> - nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
> - page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
> + ac.gfp_mask = htlb_alloc_mask(h);
> + ac.nid = huge_node(vma, address, ac.gfp_mask, &mpol, &ac.nmask);
> +
> + page = dequeue_huge_page_nodemask(h, &ac);
> if (page && !avoid_reserve && vma_has_reserves(vma, chg)) {
> SetPagePrivate(page);
> h->resv_huge_pages--;
> @@ -1937,15 +1937,16 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
> return page;
> }
>
> -struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
> - int nid, nodemask_t *nmask)
> +struct page *alloc_migrate_huge_page(struct hstate *h,
> + struct alloc_control *ac)
> {
> struct page *page;
>
> if (hstate_is_gigantic(h))
> return NULL;
>
> - page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
> + page = alloc_fresh_huge_page(h, ac->gfp_mask,
> + ac->nid, ac->nmask, NULL);
> if (!page)
> return NULL;
>
> @@ -1979,36 +1980,37 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
> }
>
> /* page migration callback function */
> -struct page *alloc_huge_page_node(struct hstate *h, int nid)
> +struct page *alloc_huge_page_node(struct hstate *h,
> + struct alloc_control *ac)
> {
> - gfp_t gfp_mask = htlb_alloc_mask(h);
> struct page *page = NULL;
>
> - if (nid != NUMA_NO_NODE)
> - gfp_mask |= __GFP_THISNODE;
> + ac->gfp_mask = htlb_alloc_mask(h);
> + if (ac->nid != NUMA_NO_NODE)
> + ac->gfp_mask |= __GFP_THISNODE;
>
> spin_lock(&hugetlb_lock);
> if (h->free_huge_pages - h->resv_huge_pages > 0)
> - page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL);
> + page = dequeue_huge_page_nodemask(h, ac);
> spin_unlock(&hugetlb_lock);
>
> if (!page)
> - page = alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
> + page = alloc_migrate_huge_page(h, ac);
>
> return page;
> }
>
> /* page migration callback function */
> -struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
> - nodemask_t *nmask)
> +struct page *alloc_huge_page_nodemask(struct hstate *h,
> + struct alloc_control *ac)
> {
> - gfp_t gfp_mask = htlb_alloc_mask(h);
> + ac->gfp_mask = htlb_alloc_mask(h);
>
> spin_lock(&hugetlb_lock);
> if (h->free_huge_pages - h->resv_huge_pages > 0) {
> struct page *page;
>
> - page = dequeue_huge_page_nodemask(h, gfp_mask, preferred_nid, nmask);
> + page = dequeue_huge_page_nodemask(h, ac);
> if (page) {
> spin_unlock(&hugetlb_lock);
> return page;
> @@ -2016,22 +2018,20 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
> }
> spin_unlock(&hugetlb_lock);
>
> - return alloc_migrate_huge_page(h, gfp_mask, preferred_nid, nmask);
> + return alloc_migrate_huge_page(h, ac);
> }
>
> /* mempolicy aware migration callback */
> struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
> unsigned long address)
> {
> + struct alloc_control ac = {0};
> struct mempolicy *mpol;
> - nodemask_t *nodemask;
> struct page *page;
> - gfp_t gfp_mask;
> - int node;
>
> - gfp_mask = htlb_alloc_mask(h);
> - node = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
> - page = alloc_huge_page_nodemask(h, node, nodemask);
> + ac.gfp_mask = htlb_alloc_mask(h);
> + ac.nid = huge_node(vma, address, ac.gfp_mask, &mpol, &ac.nmask);
> + page = alloc_huge_page_nodemask(h, &ac);
> mpol_cond_put(mpol);
>
> return page;
> diff --git a/mm/internal.h b/mm/internal.h
> index 9886db2..6e613ce 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -613,4 +613,11 @@ static inline bool is_migrate_highatomic_page(struct page *page)
>
> void setup_zone_pageset(struct zone *zone);
> extern struct page *alloc_new_node_page(struct page *page, unsigned long node);
> +
> +struct alloc_control {
> + int nid; /* preferred node id */
> + nodemask_t *nmask;
> + gfp_t gfp_mask;
> +};
> +
> #endif /* __MM_INTERNAL_H */
> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index 3813206..3b6b551 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -1068,10 +1068,15 @@ static int migrate_page_add(struct page *page, struct list_head *pagelist,
> /* page allocation callback for NUMA node migration */
> struct page *alloc_new_node_page(struct page *page, unsigned long node)
> {
> - if (PageHuge(page))
> - return alloc_huge_page_node(page_hstate(compound_head(page)),
> - node);
> - else if (PageTransHuge(page)) {
> + if (PageHuge(page)) {
> + struct hstate *h = page_hstate(compound_head(page));
> + struct alloc_control ac = {
> + .nid = node,
> + .nmask = NULL,
> + };
> +
> + return alloc_huge_page_node(h, &ac);
> + } else if (PageTransHuge(page)) {
> struct page *thp;
>
> thp = alloc_pages_node(node,
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 824c22e..30217537 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -1544,10 +1544,15 @@ struct page *new_page_nodemask(struct page *page,
> unsigned int order = 0;
> struct page *new_page = NULL;
>
> - if (PageHuge(page))
> - return alloc_huge_page_nodemask(
> - page_hstate(compound_head(page)),
> - preferred_nid, nodemask);
> + if (PageHuge(page)) {
> + struct hstate *h = page_hstate(compound_head(page));
> + struct alloc_control ac = {
> + .nid = preferred_nid,
> + .nmask = nodemask,
> + };
> +
> + return alloc_huge_page_nodemask(h, &ac);
> + }
>
> if (PageTransHuge(page)) {
> gfp_mask |= GFP_TRANSHUGE;
> --
> 2.7.4
>
--
Michal Hocko
SUSE Labs
Powered by blists - more mailing lists