lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20230914150348.GA129171@monkey>
Date:   Thu, 14 Sep 2023 08:03:48 -0700
From:   Mike Kravetz <mike.kravetz@...cle.com>
To:     Usama Arif <usama.arif@...edance.com>
Cc:     linux-mm@...ck.org, muchun.song@...ux.dev, rppt@...nel.org,
        linux-kernel@...r.kernel.org, songmuchun@...edance.com,
        fam.zheng@...edance.com, liangma@...ngbit.com,
        punit.agrawal@...edance.com
Subject: Re: [v5 4/4] mm: hugetlb: Skip initialization of gigantic tail
 struct pages if freed by HVO

On 09/13/23 11:54, Usama Arif wrote:
> The new boot flow when it comes to initialization of gigantic pages
> is as follows:
> - At boot time, for a gigantic page during __alloc_bootmem_hugepage,
> the region after the first struct page is marked as noinit.
> - This results in only the first struct page to be
> initialized in reserve_bootmem_region. As the tail struct pages are
> not initialized at this point, there can be a significant saving
> in boot time if HVO succeeds later on.
> - Later on in the boot, the head page is prepped and the first
> HUGETLB_VMEMMAP_RESERVE_SIZE / sizeof(struct page) - 1 tail struct pages
> are initialized.
> - HVO is attempted. If it is not successful, then the rest of the
> tail struct pages are initialized. If it is successful, no more
> tail struct pages need to be initialized saving significant boot time.
> 
> The WARN_ON for increased ref count in gather_bootmem_prealloc was changed
> to a VM_BUG_ON.  This is OK as there should be no speculative references
> this early in boot process. The VM_BUG_ON's are there just in case such code
> is introduced.
> 
> Signed-off-by: Usama Arif <usama.arif@...edance.com>
> ---
>  mm/hugetlb.c         | 63 +++++++++++++++++++++++++++++++++++++-------
>  mm/hugetlb_vmemmap.c |  2 +-
>  mm/hugetlb_vmemmap.h |  9 ++++---
>  mm/internal.h        |  3 +++
>  mm/mm_init.c         |  2 +-
>  5 files changed, 64 insertions(+), 15 deletions(-)

Thank you con continued changes.  Code looks good,

Reviewed-by: Mike Kravetz <mike.kravetz@...cle.com>
-- 
Mike Kravetz

> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index c32ca241df4b..ed37c6e4e952 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -3169,6 +3169,15 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid)
>  	}
>  
>  found:
> +
> +	/*
> +	 * Only initialize the head struct page in memmap_init_reserved_pages,
> +	 * rest of the struct pages will be initialized by the HugeTLB subsystem itself.
> +	 * The head struct page is used to get folio information by the HugeTLB
> +	 * subsystem like zone id and node id.
> +	 */
> +	memblock_reserved_mark_noinit(virt_to_phys((void *)m + PAGE_SIZE),
> +		huge_page_size(h) - PAGE_SIZE);
>  	/* Put them into a private list first because mem_map is not up yet */
>  	INIT_LIST_HEAD(&m->list);
>  	list_add(&m->list, &huge_boot_pages);
> @@ -3176,6 +3185,42 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid)
>  	return 1;
>  }
>  
> +/* Initialize [start_page:end_page_number] tail struct pages of a hugepage */
> +static void __init hugetlb_folio_init_tail_vmemmap(struct folio *folio,
> +						    unsigned long start_page_number,
> +						    unsigned long end_page_number)
> +{
> +	enum zone_type zone = zone_idx(folio_zone(folio));
> +	int nid = folio_nid(folio);
> +	unsigned long head_pfn = folio_pfn(folio);
> +	unsigned long pfn, end_pfn = head_pfn + end_page_number;
> +	int ret;
> +
> +	for (pfn = head_pfn + start_page_number; pfn < end_pfn; pfn++) {
> +		struct page *page = pfn_to_page(pfn);
> +
> +		__init_single_page(page, pfn, zone, nid);
> +		prep_compound_tail((struct page *)folio, pfn - head_pfn);
> +		ret = page_ref_freeze(page, 1);
> +		VM_BUG_ON(!ret);
> +	}
> +}
> +
> +static void __init hugetlb_folio_init_vmemmap(struct folio *folio, struct hstate *h,
> +					       unsigned long nr_pages)
> +{
> +	int ret;
> +
> +	/* Prepare folio head */
> +	__folio_clear_reserved(folio);
> +	__folio_set_head(folio);
> +	ret = page_ref_freeze(&folio->page, 1);
> +	VM_BUG_ON(!ret);
> +	/* Initialize the necessary tail struct pages */
> +	hugetlb_folio_init_tail_vmemmap(folio, 1, nr_pages);
> +	prep_compound_head((struct page *)folio, huge_page_order(h));
> +}
> +
>  /*
>   * Put bootmem huge pages into the standard lists after mem_map is up.
>   * Note: This only applies to gigantic (order > MAX_ORDER) pages.
> @@ -3186,19 +3231,19 @@ static void __init gather_bootmem_prealloc(void)
>  
>  	list_for_each_entry(m, &huge_boot_pages, list) {
>  		struct page *page = virt_to_page(m);
> -		struct folio *folio = page_folio(page);
> +		struct folio *folio = (void *)page;
>  		struct hstate *h = m->hstate;
>  
>  		VM_BUG_ON(!hstate_is_gigantic(h));
>  		WARN_ON(folio_ref_count(folio) != 1);
> -		if (prep_compound_gigantic_folio(folio, huge_page_order(h))) {
> -			WARN_ON(folio_test_reserved(folio));
> -			prep_new_hugetlb_folio(h, folio, folio_nid(folio));
> -			free_huge_folio(folio); /* add to the hugepage allocator */
> -		} else {
> -			/* VERY unlikely inflated ref count on a tail page */
> -			free_gigantic_folio(folio, huge_page_order(h));
> -		}
> +
> +		hugetlb_folio_init_vmemmap(folio, h, HUGETLB_VMEMMAP_RESERVE_PAGES);
> +		prep_new_hugetlb_folio(h, folio, folio_nid(folio));
> +		/* If HVO fails, initialize all tail struct pages */
> +		if (!HPageVmemmapOptimized(&folio->page))
> +			hugetlb_folio_init_tail_vmemmap(folio, HUGETLB_VMEMMAP_RESERVE_PAGES,
> +							pages_per_huge_page(h));
> +		free_huge_folio(folio); /* add to the hugepage allocator */
>  
>  		/*
>  		 * We need to restore the 'stolen' pages to totalram_pages
> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
> index 3cdb38d87a95..772a877918d7 100644
> --- a/mm/hugetlb_vmemmap.c
> +++ b/mm/hugetlb_vmemmap.c
> @@ -589,7 +589,7 @@ static int __init hugetlb_vmemmap_init(void)
>  	const struct hstate *h;
>  
>  	/* HUGETLB_VMEMMAP_RESERVE_SIZE should cover all used struct pages */
> -	BUILD_BUG_ON(__NR_USED_SUBPAGE * sizeof(struct page) > HUGETLB_VMEMMAP_RESERVE_SIZE);
> +	BUILD_BUG_ON(__NR_USED_SUBPAGE > HUGETLB_VMEMMAP_RESERVE_PAGES);
>  
>  	for_each_hstate(h) {
>  		if (hugetlb_vmemmap_optimizable(h)) {
> diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h
> index 25bd0e002431..4573899855d7 100644
> --- a/mm/hugetlb_vmemmap.h
> +++ b/mm/hugetlb_vmemmap.h
> @@ -10,15 +10,16 @@
>  #define _LINUX_HUGETLB_VMEMMAP_H
>  #include <linux/hugetlb.h>
>  
> -#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
> -int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head);
> -void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head);
> -
>  /*
>   * Reserve one vmemmap page, all vmemmap addresses are mapped to it. See
>   * Documentation/vm/vmemmap_dedup.rst.
>   */
>  #define HUGETLB_VMEMMAP_RESERVE_SIZE	PAGE_SIZE
> +#define HUGETLB_VMEMMAP_RESERVE_PAGES	(HUGETLB_VMEMMAP_RESERVE_SIZE / sizeof(struct page))
> +
> +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
> +int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head);
> +void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head);
>  
>  static inline unsigned int hugetlb_vmemmap_size(const struct hstate *h)
>  {
> diff --git a/mm/internal.h b/mm/internal.h
> index d1d4bf4e63c0..d74061aa6de7 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -1154,4 +1154,7 @@ struct vma_prepare {
>  	struct vm_area_struct *remove;
>  	struct vm_area_struct *remove2;
>  };
> +
> +void __meminit __init_single_page(struct page *page, unsigned long pfn,
> +				unsigned long zone, int nid);
>  #endif	/* __MM_INTERNAL_H */
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index 50f2f34745af..fed4370b02e1 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -555,7 +555,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
>  	node_states[N_MEMORY] = saved_node_state;
>  }
>  
> -static void __meminit __init_single_page(struct page *page, unsigned long pfn,
> +void __meminit __init_single_page(struct page *page, unsigned long pfn,
>  				unsigned long zone, int nid)
>  {
>  	mm_zero_struct_page(page);
> -- 
> 2.25.1
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ