[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAMZfGtW=y=SESn_M7TC_N=hAVDQJXJkSSswmcy68XKw_KqDD2w@mail.gmail.com>
Date: Tue, 12 Jan 2021 11:24:05 +0800
From: Muchun Song <songmuchun@...edance.com>
To: Mike Kravetz <mike.kravetz@...cle.com>
Cc: LKML <linux-kernel@...r.kernel.org>,
Linux Memory Management List <linux-mm@...ck.org>,
Michal Hocko <mhocko@...nel.org>,
Naoya Horiguchi <n-horiguchi@...jp.nec.com>,
David Hildenbrand <david@...hat.com>,
Andrew Morton <akpm@...ux-foundation.org>
Subject: Re: [External] [RFC PATCH 1/3] hugetlb: use page.private for hugetlb
specific page flags
On Tue, Jan 12, 2021 at 5:04 AM Mike Kravetz <mike.kravetz@...cle.com> wrote:
>
> As hugetlbfs evolved, state information about hugetlb pages was added.
> One 'convenient' way of doing this was to use available fields in tail
> pages. Over time, it has become difficult to know the meaning or contents
> of fields simply be looking at a small bit of code. Sometimes, the
> naming is just confusing. For example: The PagePrivate flag indicates
> a huge page reservation was consumed and needs to be restored if an error
> is encountered and the page is freed before it is instantiated. The
> page.private field contains the pointer to a subpool if the page is
> associated with one.
>
> In an effort to make the code more readable, use page.private to contain
> hugetlb specific flags. These flags will have test, set and clear functions
> similar to those used for 'normal' page flags. More importantly, the
> flags will have names which actually reflect their purpose.
>
> In this patch,
> - Create infrastructure for huge page flag functions
> - Move subpool pointer to page[1].private to make way for flags
> Create routines with meaningful names to modify subpool field
> - Use new HPageRestoreReserve reserve flag instead of PagePrivate
>
> Conversion of other state information will happen in subsequent patches.
Great. We can add more meta information easily in the feature.
And it also makes the code more readable. Thanks.
>
> Signed-off-by: Mike Kravetz <mike.kravetz@...cle.com>
> ---
> fs/hugetlbfs/inode.c | 11 +++---
> include/linux/hugetlb.h | 2 ++
> mm/hugetlb.c | 80 +++++++++++++++++++++++++++++++----------
> 3 files changed, 67 insertions(+), 26 deletions(-)
>
> diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
> index b5c109703daa..8bfb80bc6e96 100644
> --- a/fs/hugetlbfs/inode.c
> +++ b/fs/hugetlbfs/inode.c
> @@ -966,14 +966,11 @@ static int hugetlbfs_migrate_page(struct address_space *mapping,
> return rc;
>
> /*
> - * page_private is subpool pointer in hugetlb pages. Transfer to
> - * new page. PagePrivate is not associated with page_private for
> - * hugetlb pages and can not be set here as only page_huge_active
> - * pages can be migrated.
> + * Transfer any subpool pointer to the new page.
> */
> - if (page_private(page)) {
> - set_page_private(newpage, page_private(page));
> - set_page_private(page, 0);
> + if (hpage_spool(page)) {
> + set_hpage_spool(newpage, hpage_spool(page));
> + set_hpage_spool(page, 0);
> }
>
> if (mode != MIGRATE_SYNC_NO_COPY)
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index ebca2ef02212..4f0159f1b9cc 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -104,6 +104,8 @@ extern int hugetlb_max_hstate __read_mostly;
> struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages,
> long min_hpages);
> void hugepage_put_subpool(struct hugepage_subpool *spool);
> +struct hugepage_subpool *hpage_spool(struct page *hpage);
> +void set_hpage_spool(struct page *hpage, struct hugepage_subpool *spool);
>
> void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
> int hugetlb_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *);
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 3b38ea958e95..3eb3b102c589 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -52,6 +52,49 @@ static struct cma *hugetlb_cma[MAX_NUMNODES];
> #endif
> static unsigned long hugetlb_cma_size __initdata;
>
> +/*
> + * hugetlb specific state flags located in hpage.private
> + */
> +enum htlb_page_flags {
> + HPAGE_RestoreReserve = 0,
IMHO, can we rename it to HPG_restore_reserve? I just want to
make the name consistent with the page flags (see enum pageflags
in include/linux/page-flags.h).
May we also should add a __NR_HPAGEFLAGS to indicate
how many bits that we already used. And add a BUILD_BUG_ON
to check whether the used bits exceed sizeof(unsigned long).
Although it is not necessary now. If we can check it, I think it
would be better.
> +};
> +
> +/*
> + * Macros to create function definitions for hpage flags
> + */
> +#define TESTHPAGEFLAG(flname) \
> +static inline int HPage##flname(struct page *page) \
> + { return test_bit(HPAGE_##flname, &(page->private)); }
> +
> +#define SETHPAGEFLAG(flname) \
> +static inline void SetHPage##flname(struct page *page) \
> + { set_bit(HPAGE_##flname, &(page->private)); }
> +
> +#define CLEARHPAGEFLAG(flname) \
> +static inline void ClearHPage##flname(struct page *page) \
> + { clear_bit(HPAGE_##flname, &(page->private)); }
> +
> +#define HPAGEFLAG(flname) \
> + TESTHPAGEFLAG(flname) \
> + SETHPAGEFLAG(flname) \
> + CLEARHPAGEFLAG(flname)
> +
> +HPAGEFLAG(RestoreReserve)
> +
> +/*
> + * hugetlb page subpool pointer located in hpage[1].private
> + */
> +struct hugepage_subpool *hpage_spool(struct page *hpage)
> +{
> + return (struct hugepage_subpool *)(hpage+1)->private;
> +}
> +
> +void set_hpage_spool(struct page *hpage,
> + struct hugepage_subpool *spool)
> +{
> + set_page_private(hpage+1, (unsigned long)spool);
> +}
> +
> /*
> * Minimum page order among possible hugepage sizes, set to a proper value
> * at boot time.
> @@ -1116,7 +1159,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
> nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
> page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
> if (page && !avoid_reserve && vma_has_reserves(vma, chg)) {
> - SetPagePrivate(page);
> + SetHPageRestoreReserve(page);
> h->resv_huge_pages--;
> }
>
> @@ -1391,20 +1434,19 @@ static void __free_huge_page(struct page *page)
> */
> struct hstate *h = page_hstate(page);
> int nid = page_to_nid(page);
> - struct hugepage_subpool *spool =
> - (struct hugepage_subpool *)page_private(page);
> + struct hugepage_subpool *spool = hpage_spool(page);
> bool restore_reserve;
>
> VM_BUG_ON_PAGE(page_count(page), page);
> VM_BUG_ON_PAGE(page_mapcount(page), page);
>
> - set_page_private(page, 0);
> + set_hpage_spool(page, 0);
> page->mapping = NULL;
> - restore_reserve = PagePrivate(page);
> - ClearPagePrivate(page);
> + restore_reserve = HPageRestoreReserve(page);
> + ClearHPageRestoreReserve(page);
>
> /*
> - * If PagePrivate() was set on page, page allocation consumed a
> + * If RestoreReserve was set on page, page allocation consumed a
> * reservation. If the page was associated with a subpool, there
> * would have been a page reserved in the subpool before allocation
> * via hugepage_subpool_get_pages(). Since we are 'restoring' the
> @@ -2213,9 +2255,9 @@ static long vma_add_reservation(struct hstate *h,
> * This routine is called to restore a reservation on error paths. In the
> * specific error paths, a huge page was allocated (via alloc_huge_page)
> * and is about to be freed. If a reservation for the page existed,
> - * alloc_huge_page would have consumed the reservation and set PagePrivate
> + * alloc_huge_page would have consumed the reservation and set RestoreReserve
> * in the newly allocated page. When the page is freed via free_huge_page,
> - * the global reservation count will be incremented if PagePrivate is set.
> + * the global reservation count will be incremented if RestoreReserve is set.
> * However, free_huge_page can not adjust the reserve map. Adjust the
> * reserve map here to be consistent with global reserve count adjustments
> * to be made by free_huge_page.
> @@ -2224,13 +2266,13 @@ static void restore_reserve_on_error(struct hstate *h,
> struct vm_area_struct *vma, unsigned long address,
> struct page *page)
> {
> - if (unlikely(PagePrivate(page))) {
> + if (unlikely(HPageRestoreReserve(page))) {
> long rc = vma_needs_reservation(h, vma, address);
>
> if (unlikely(rc < 0)) {
> /*
> * Rare out of memory condition in reserve map
> - * manipulation. Clear PagePrivate so that
> + * manipulation. Clear RestoreReserve so that
> * global reserve count will not be incremented
> * by free_huge_page. This will make it appear
> * as though the reservation for this page was
> @@ -2239,7 +2281,7 @@ static void restore_reserve_on_error(struct hstate *h,
> * is better than inconsistent global huge page
> * accounting of reserve counts.
> */
> - ClearPagePrivate(page);
> + ClearHPageRestoreReserve(page);
> } else if (rc) {
> rc = vma_add_reservation(h, vma, address);
> if (unlikely(rc < 0))
> @@ -2247,7 +2289,7 @@ static void restore_reserve_on_error(struct hstate *h,
> * See above comment about rare out of
> * memory condition.
> */
> - ClearPagePrivate(page);
> + ClearHPageRestoreReserve(page);
> } else
> vma_end_reservation(h, vma, address);
> }
> @@ -2328,7 +2370,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
> if (!page)
> goto out_uncharge_cgroup;
> if (!avoid_reserve && vma_has_reserves(vma, gbl_chg)) {
> - SetPagePrivate(page);
> + SetHPageRestoreReserve(page);
> h->resv_huge_pages--;
> }
> spin_lock(&hugetlb_lock);
> @@ -2346,7 +2388,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
>
> spin_unlock(&hugetlb_lock);
>
> - set_page_private(page, (unsigned long)spool);
> + set_hpage_spool(page, spool);
>
> map_commit = vma_commit_reservation(h, vma, addr);
> if (unlikely(map_chg > map_commit)) {
> @@ -4150,7 +4192,7 @@ static vm_fault_t hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
> spin_lock(ptl);
> ptep = huge_pte_offset(mm, haddr, huge_page_size(h));
> if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
> - ClearPagePrivate(new_page);
> + ClearHPageRestoreReserve(new_page);
>
> /* Break COW */
> huge_ptep_clear_flush(vma, haddr, ptep);
> @@ -4217,7 +4259,7 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
>
> if (err)
> return err;
> - ClearPagePrivate(page);
> + ClearHPageRestoreReserve(page);
>
> /*
> * set page dirty so that it will not be removed from cache/file
> @@ -4379,7 +4421,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
> goto backout;
>
> if (anon_rmap) {
> - ClearPagePrivate(page);
> + ClearHPageRestoreReserve(page);
> hugepage_add_new_anon_rmap(page, vma, haddr);
> } else
> page_dup_rmap(page, true);
> @@ -4693,7 +4735,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
> if (vm_shared) {
> page_dup_rmap(page, true);
> } else {
> - ClearPagePrivate(page);
> + ClearHPageRestoreReserve(page);
> hugepage_add_new_anon_rmap(page, dst_vma, dst_addr);
> }
>
> --
> 2.29.2
>
Powered by blists - more mailing lists