lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 20 Jan 2021 16:10:36 +0800
From:   Miaohe Lin <linmiaohe@...wei.com>
To:     Mike Kravetz <mike.kravetz@...cle.com>
CC:     Michal Hocko <mhocko@...nel.org>,
        Naoya Horiguchi <n-horiguchi@...jp.nec.com>,
        Muchun Song <songmuchun@...edance.com>,
        "David Hildenbrand" <david@...hat.com>,
        Oscar Salvador <osalvador@...e.de>,
        "Matthew Wilcox" <willy@...radead.org>,
        Andrew Morton <akpm@...ux-foundation.org>,
        <linux-kernel@...r.kernel.org>, <linux-mm@...ck.org>
Subject: Re: [PATCH v2 1/5] hugetlb: use page.private for hugetlb specific
 page flags

Hi:
On 2021/1/20 9:30, Mike Kravetz wrote:
> As hugetlbfs evolved, state information about hugetlb pages was added.
> One 'convenient' way of doing this was to use available fields in tail
> pages.  Over time, it has become difficult to know the meaning or contents
> of fields simply by looking at a small bit of code.  Sometimes, the
> naming is just confusing.  For example: The PagePrivate flag indicates
> a huge page reservation was consumed and needs to be restored if an error
> is encountered and the page is freed before it is instantiated.  The

This PagePrivate flag really confused me for a long time. :(

> page.private field contains the pointer to a subpool if the page is
> associated with one.
> 
> In an effort to make the code more readable, use page.private to contain
> hugetlb specific page flags.  These flags will have test, set and clear
> functions similar to those used for 'normal' page flags.  More importantly,
> an enum of flag values will be created with names that actually reflect
> their purpose.

Thanks for doing this. This would make life easier.

> 
> In this patch,
> - Create infrastructure for hugetlb specific page flag functions
> - Move subpool pointer to page[1].private to make way for flags
>   Create routines with meaningful names to modify subpool field
> - Use new HPageRestoreReserve flag instead of PagePrivate
> 
> Conversion of other state information will happen in subsequent patches.
> 
> Signed-off-by: Mike Kravetz <mike.kravetz@...cle.com>
> ---
>  fs/hugetlbfs/inode.c    | 12 ++-----
>  include/linux/hugetlb.h | 72 +++++++++++++++++++++++++++++++++++++++++
>  mm/hugetlb.c            | 45 +++++++++++++-------------
>  3 files changed, 97 insertions(+), 32 deletions(-)
> 
> diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
> index 740693d7f255..b8a661780c4a 100644
> --- a/fs/hugetlbfs/inode.c
> +++ b/fs/hugetlbfs/inode.c
> @@ -955,15 +955,9 @@ static int hugetlbfs_migrate_page(struct address_space *mapping,
>  	if (rc != MIGRATEPAGE_SUCCESS)
>  		return rc;
>  
> -	/*
> -	 * page_private is subpool pointer in hugetlb pages.  Transfer to
> -	 * new page.  PagePrivate is not associated with page_private for
> -	 * hugetlb pages and can not be set here as only page_huge_active
> -	 * pages can be migrated.
> -	 */
> -	if (page_private(page)) {
> -		set_page_private(newpage, page_private(page));
> -		set_page_private(page, 0);
> +	if (hugetlb_page_subpool(page)) {
> +		hugetlb_set_page_subpool(newpage, hugetlb_page_subpool(page));
> +		hugetlb_set_page_subpool(page, NULL);
>  	}
>  
>  	if (mode != MIGRATE_SYNC_NO_COPY)
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index ef5b144b8aac..be71a00ee2a0 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -472,6 +472,64 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
>  					unsigned long flags);
>  #endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */
>  
> +/*
> + * huegtlb page specific state flags.  These flags are located in page.private
> + * of the hugetlb head page.  Functions created via the below macros should be
> + * used to manipulate these flags.
> + *
> + * HPG_restore_reserve - Set when a hugetlb page consumes a reservation at
> + *	allocation time.  Cleared when page is fully instantiated.  Free
> + *	routine checks flag to restore a reservation on error paths.
> + */
> +enum hugetlb_page_flags {
> +	HPG_restore_reserve = 0,
> +	__NR_HPAGEFLAGS,
> +};
> +
> +/*
> + * Macros to create test, set and clear function definitions for
> + * hugetlb specific page flags.
> + */
> +#ifdef CONFIG_HUGETLB_PAGE
> +#define TESTHPAGEFLAG(uname, flname)				\
> +static inline int HPage##uname(struct page *page)		\
> +	{ BUILD_BUG_ON(sizeof_field(struct page, private) *	\
> +			BITS_PER_BYTE < __NR_HPAGEFLAGS);	\
> +	return test_bit(HPG_##flname, &(page->private)); }
> +
> +#define SETHPAGEFLAG(uname, flname)				\
> +static inline void SetHPage##uname(struct page *page)		\
> +	{ set_bit(HPG_##flname, &(page->private)); }
> +
> +#define CLEARHPAGEFLAG(uname, flname)				\
> +static inline void ClearHPage##uname(struct page *page)		\
> +	{ clear_bit(HPG_##flname, &(page->private)); }
> +#else
> +#define TESTHPAGEFLAG(uname, flname)				\
> +static inline int HPage##uname(struct page *page)		\
> +	{ BUILD_BUG_ON(sizeof_field(struct page, private) *	\
> +			BITS_PER_BYTE < __NR_HPAGEFLAGS);	\
> +	return 0 }
> +
> +#define SETHPAGEFLAG(uname, flname)				\
> +static inline void SetHPage##uname(struct page *page)		\
> +	{ }
> +
> +#define CLEARHPAGEFLAG(uname, flname)				\
> +static inline void ClearHPage##uname(struct page *page)		\
> +	{ }
> +#endif
> +
> +#define HPAGEFLAG(uname, flname)				\
> +	TESTHPAGEFLAG(uname, flname)				\
> +	SETHPAGEFLAG(uname, flname)				\
> +	CLEARHPAGEFLAG(uname, flname)				\
> +
> +/*
> + * Create functions associated with hugetlb page flags
> + */
> +HPAGEFLAG(RestoreReserve, restore_reserve)
> +
>  #ifdef CONFIG_HUGETLB_PAGE
>  
>  #define HSTATE_NAME_LEN 32
> @@ -531,6 +589,20 @@ extern unsigned int default_hstate_idx;
>  
>  #define default_hstate (hstates[default_hstate_idx])
>  
> +/*
> + * hugetlb page subpool pointer located in hpage[1].private
> + */
> +static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
> +{
> +	return (struct hugepage_subpool *)(hpage+1)->private;
> +}
> +
> +static inline void hugetlb_set_page_subpool(struct page *hpage,
> +					struct hugepage_subpool *subpool)
> +{
> +	set_page_private(hpage+1, (unsigned long)subpool);
> +}
> +
>  static inline struct hstate *hstate_file(struct file *f)
>  {
>  	return hstate_inode(file_inode(f));
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 737b2dce19e6..8bed6b5202d2 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1133,7 +1133,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
>  	nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
>  	page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
>  	if (page && !avoid_reserve && vma_has_reserves(vma, chg)) {
> -		SetPagePrivate(page);
> +		SetHPageRestoreReserve(page);
>  		h->resv_huge_pages--;
>  	}
>  
> @@ -1407,20 +1407,19 @@ static void __free_huge_page(struct page *page)
>  	 */
>  	struct hstate *h = page_hstate(page);
>  	int nid = page_to_nid(page);
> -	struct hugepage_subpool *spool =
> -		(struct hugepage_subpool *)page_private(page);
> +	struct hugepage_subpool *spool = hugetlb_page_subpool(page);
>  	bool restore_reserve;
>  
>  	VM_BUG_ON_PAGE(page_count(page), page);
>  	VM_BUG_ON_PAGE(page_mapcount(page), page);
>  
> -	set_page_private(page, 0);
> +	hugetlb_set_page_subpool(page, NULL);
>  	page->mapping = NULL;
> -	restore_reserve = PagePrivate(page);
> -	ClearPagePrivate(page);
> +	restore_reserve = HPageRestoreReserve(page);
> +	ClearHPageRestoreReserve(page);
>  
>  	/*
> -	 * If PagePrivate() was set on page, page allocation consumed a
> +	 * If HPageRestoreReserve was set on page, page allocation consumed a
>  	 * reservation.  If the page was associated with a subpool, there
>  	 * would have been a page reserved in the subpool before allocation
>  	 * via hugepage_subpool_get_pages().  Since we are 'restoring' the
> @@ -2254,24 +2253,24 @@ static long vma_add_reservation(struct hstate *h,
>   * This routine is called to restore a reservation on error paths.  In the
>   * specific error paths, a huge page was allocated (via alloc_huge_page)
>   * and is about to be freed.  If a reservation for the page existed,
> - * alloc_huge_page would have consumed the reservation and set PagePrivate
> - * in the newly allocated page.  When the page is freed via free_huge_page,
> - * the global reservation count will be incremented if PagePrivate is set.
> - * However, free_huge_page can not adjust the reserve map.  Adjust the
> - * reserve map here to be consistent with global reserve count adjustments
> - * to be made by free_huge_page.
> + * alloc_huge_page would have consumed the reservation and set
> + * HPageRestoreReserve in the newly allocated page.  When the page is freed
> + * via free_huge_page, the global reservation count will be incremented if
> + * HPageRestoreReserve is set.  However, free_huge_page can not adjust the
> + * reserve map.  Adjust the reserve map here to be consistent with global
> + * reserve count adjustments to be made by free_huge_page.
>   */
>  static void restore_reserve_on_error(struct hstate *h,
>  			struct vm_area_struct *vma, unsigned long address,
>  			struct page *page)
>  {
> -	if (unlikely(PagePrivate(page))) {
> +	if (unlikely(HPageRestoreReserve(page))) {
>  		long rc = vma_needs_reservation(h, vma, address);
>  
>  		if (unlikely(rc < 0)) {
>  			/*
>  			 * Rare out of memory condition in reserve map
> -			 * manipulation.  Clear PagePrivate so that
> +			 * manipulation.  Clear HPageRestoreReserve so that
>  			 * global reserve count will not be incremented
>  			 * by free_huge_page.  This will make it appear
>  			 * as though the reservation for this page was
> @@ -2280,7 +2279,7 @@ static void restore_reserve_on_error(struct hstate *h,
>  			 * is better than inconsistent global huge page
>  			 * accounting of reserve counts.
>  			 */
> -			ClearPagePrivate(page);
> +			ClearHPageRestoreReserve(page);
>  		} else if (rc) {
>  			rc = vma_add_reservation(h, vma, address);
>  			if (unlikely(rc < 0))
> @@ -2288,7 +2287,7 @@ static void restore_reserve_on_error(struct hstate *h,
>  				 * See above comment about rare out of
>  				 * memory condition.
>  				 */
> -				ClearPagePrivate(page);
> +				ClearHPageRestoreReserve(page);
>  		} else
>  			vma_end_reservation(h, vma, address);
>  	}
> @@ -2369,7 +2368,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
>  		if (!page)
>  			goto out_uncharge_cgroup;
>  		if (!avoid_reserve && vma_has_reserves(vma, gbl_chg)) {
> -			SetPagePrivate(page);
> +			SetHPageRestoreReserve(page);
>  			h->resv_huge_pages--;
>  		}
>  		spin_lock(&hugetlb_lock);
> @@ -2387,7 +2386,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
>  
>  	spin_unlock(&hugetlb_lock);
>  
> -	set_page_private(page, (unsigned long)spool);
> +	hugetlb_set_page_subpool(page, spool);
>  
>  	map_commit = vma_commit_reservation(h, vma, addr);
>  	if (unlikely(map_chg > map_commit)) {
> @@ -4212,7 +4211,7 @@ static vm_fault_t hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
>  	spin_lock(ptl);
>  	ptep = huge_pte_offset(mm, haddr, huge_page_size(h));
>  	if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
> -		ClearPagePrivate(new_page);
> +		ClearHPageRestoreReserve(new_page);
>  
>  		/* Break COW */
>  		huge_ptep_clear_flush(vma, haddr, ptep);
> @@ -4279,7 +4278,7 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
>  
>  	if (err)
>  		return err;
> -	ClearPagePrivate(page);
> +	ClearHPageRestoreReserve(page);
>  
>  	/*
>  	 * set page dirty so that it will not be removed from cache/file
> @@ -4441,7 +4440,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
>  		goto backout;
>  
>  	if (anon_rmap) {
> -		ClearPagePrivate(page);
> +		ClearHPageRestoreReserve(page);
>  		hugepage_add_new_anon_rmap(page, vma, haddr);
>  	} else
>  		page_dup_rmap(page, true);
> @@ -4755,7 +4754,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
>  	if (vm_shared) {
>  		page_dup_rmap(page, true);
>  	} else {
> -		ClearPagePrivate(page);
> +		ClearHPageRestoreReserve(page);
>  		hugepage_add_new_anon_rmap(page, dst_vma, dst_addr);
>  	}
>  
> 

Looks good to me.Thanks.

Reviewed-by: Miaohe Lin <linmiaohe@...wei.com>

Powered by blists - more mailing lists