lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 10 Feb 2021 17:16:29 -0800
From:   Mike Kravetz <mike.kravetz@...cle.com>
To:     Oscar Salvador <osalvador@...e.de>
Cc:     David Hildenbrand <david@...hat.com>,
        Muchun Song <songmuchun@...edance.com>,
        Michal Hocko <mhocko@...nel.org>, linux-mm@...ck.org,
        linux-kernel@...r.kernel.org
Subject: Re: [RFC PATCH 2/2] mm,page_alloc: Make alloc_contig_range handle
 free hugetlb pages

On 2/8/21 2:38 AM, Oscar Salvador wrote:
> Free hugetlb pages are trickier to handle as to in order to guarantee
> no userspace appplication disruption, we need to replace the
> current free hugepage with a new one.
> 
> In order to do that, a new function called alloc_and_dissolve_huge_page
> in introduced.
> This function will first try to get a new fresh hugetlb page, and if it
> succeeds, it will dissolve the old one.
> 
> Signed-off-by: Oscar Salvador <osalvador@...e.de>
> ---
>  include/linux/hugetlb.h |  6 ++++++
>  mm/compaction.c         | 11 +++++++++++
>  mm/hugetlb.c            | 35 +++++++++++++++++++++++++++++++++++
>  3 files changed, 52 insertions(+)
> 
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index ebca2ef02212..f81afcb86e89 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -505,6 +505,7 @@ struct huge_bootmem_page {
>  	struct hstate *hstate;
>  };
>  
> +bool alloc_and_dissolve_huge_page(struct page *page);
>  struct page *alloc_huge_page(struct vm_area_struct *vma,
>  				unsigned long addr, int avoid_reserve);
>  struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
> @@ -773,6 +774,11 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
>  #else	/* CONFIG_HUGETLB_PAGE */
>  struct hstate {};
>  
> +static inline bool alloc_and_dissolve_huge_page(struct page *page)
> +{
> +	return false;
> +}
> +
>  static inline struct page *alloc_huge_page(struct vm_area_struct *vma,
>  					   unsigned long addr,
>  					   int avoid_reserve)
> diff --git a/mm/compaction.c b/mm/compaction.c
> index 89cd2e60da29..7969ddc10856 100644
> --- a/mm/compaction.c
> +++ b/mm/compaction.c
> @@ -952,6 +952,17 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
>  					low_pfn += compound_nr(page) - 1;
>  					goto isolate_success_no_list;
>  				}
> +			} else {
> +				/*
> +				 * Free hugetlb page. Allocate a new one and
> +				 * dissolve this is if succeed.
> +				 */
> +				if (alloc_and_dissolve_huge_page(page)) {
> +					unsigned long order = buddy_order_unsafe(page);
> +
> +					low_pfn += (1UL << order) - 1;
> +					continue;
> +				}
>  			}
>  			goto isolate_fail;
>  		}
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 18f6ee317900..79ffbb64c4ee 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -2253,6 +2253,41 @@ static void restore_reserve_on_error(struct hstate *h,
>  	}
>  }
>  
> +bool alloc_and_dissolve_huge_page(struct page *page)
> +{
> +	NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL);
> +	struct page *head;
> +	struct hstate *h;
> +	bool ret = false;
> +	int nid;
> +
> +	if (!nodes_allowed)
> +		return ret;
> +
> +	spin_lock(&hugetlb_lock);
> +	head = compound_head(page);
> +	h = page_hstate(head);
> +	nid = page_to_nid(head);
> +	spin_unlock(&hugetlb_lock);
> +
> +	init_nodemask_of_node(nodes_allowed, nid);
> +
> +	/*
> +	 * Before dissolving the page, we need to allocate a new one,
> +	 * so the pool remains stable.
> +	 */
> +	if (alloc_pool_huge_page(h, nodes_allowed, NULL)) {
> +		/*
> +		 * Ok, we have a free hugetlb-page to replace this
> +		 * one. Dissolve the old page.
> +		 */
> +		if (!dissolve_free_huge_page(page))
> +			ret = true;

Should probably check for -EBUSY as this means someone started using
the page while we were allocating a new one.  It would complicate the
code to try and do the 'right thing'.  Right thing might be getting
dissolving the new pool page and then trying to isolate this in use
page.  Of course things could change again while you are doing that. :(

Might be good enough as is.  As noted previously, this code is racy.
-- 
Mike Kravetz

> +	}
> +
> +	return ret;
> +}
> +
>  struct page *alloc_huge_page(struct vm_area_struct *vma,
>  				    unsigned long addr, int avoid_reserve)
>  {
> 

Powered by blists - more mailing lists