linux-kernel - Re: [PATCH 0/3] mm/hugetlb: hugepage migration enhancements

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <YyzkoGJsIXVFW6ZL@monkey>
Date:   Thu, 22 Sep 2022 15:41:36 -0700
From:   Mike Kravetz <mike.kravetz@...cle.com>
To:     Doug Berger <opendmb@...il.com>
Cc:     Andrew Morton <akpm@...ux-foundation.org>,
        Muchun Song <songmuchun@...edance.com>,
        Oscar Salvador <osalvador@...e.de>,
        Michal Hocko <mhocko@...e.com>,
        David Hildenbrand <david@...hat.com>,
        Florian Fainelli <f.fainelli@...il.com>, linux-mm@...ck.org,
        linux-kernel@...r.kernel.org
Subject: Re: [PATCH 0/3] mm/hugetlb: hugepage migration enhancements

On 09/22/22 13:25, Mike Kravetz wrote:
> On 09/21/22 15:36, Doug Berger wrote:
> 
> As noted above, for pages to be migrated we first try to use an existing
> free huge page as the target.  Quite some time ago, Michal added code to
> allocate a new page from buddy as the target if no free huge pages were
> available.  This change also included a special flag to dissolve the
> source huge page when it is freed.  It seems like this is the exact
> behavior we want here?  I wonder if it might be easier just to use this
> existing code?

Totally untested, but I believe the patch below would accomplish this.

>From aa8fc11bb67bc9e67e3b6b280fab339afce37759 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@...cle.com>
Date: Thu, 22 Sep 2022 15:32:10 -0700
Subject: [PATCH] hugetlb: force alloc_contig_range hugetlb migrations to
 allocate new pages

When migrating hugetlb pages as the result of an alloc_contig_range
operation, allocate a new page from buddy for the migration target.
This guarantees that the number of hugetlb pages is not decreased by
the operation.  In addition, this will result in the special HPageTemporary
flag being set in the source page so that it will be dissolved when
freed.

Signed-off-by: Mike Kravetz <mike.kravetz@...cle.com>
---
 include/linux/hugetlb.h |  5 +++--
 mm/hugetlb.c            | 12 ++++++++++--
 mm/internal.h           |  1 +
 mm/migrate.c            |  3 ++-
 mm/page_alloc.c         |  1 +
 5 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index cfe15b32e2d4..558831bf1087 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -702,7 +702,8 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
 struct page *alloc_huge_page(struct vm_area_struct *vma,
 				unsigned long addr, int avoid_reserve);
 struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
-				nodemask_t *nmask, gfp_t gfp_mask);
+				nodemask_t *nmask, gfp_t gfp_mask,
+				bool temporary);
 struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
 				unsigned long address);
 int hugetlb_add_to_page_cache(struct page *page, struct address_space *mapping,
@@ -1003,7 +1004,7 @@ static inline struct page *alloc_huge_page(struct vm_area_struct *vma,
 
 static inline struct page *
 alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
-			nodemask_t *nmask, gfp_t gfp_mask)
+			nodemask_t *nmask, gfp_t gfp_mask, bool temporary)
 {
 	return NULL;
 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8bcaf66defc5..19de8ae79ec8 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2308,8 +2308,11 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
 
 /* page migration callback function */
 struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
-		nodemask_t *nmask, gfp_t gfp_mask)
+		nodemask_t *nmask, gfp_t gfp_mask, bool temporary)
 {
+	if (temporary)
+		goto temporary_alloc;
+
 	spin_lock_irq(&hugetlb_lock);
 	if (h->free_huge_pages - h->resv_huge_pages > 0) {
 		struct page *page;
@@ -2322,6 +2325,11 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
 	}
 	spin_unlock_irq(&hugetlb_lock);
 
+temporary_alloc:
+	/*
+	 * Try to allocate a fresh page that with special HPageTemporary
+	 * characteristics
+	 */
 	return alloc_migrate_huge_page(h, gfp_mask, preferred_nid, nmask);
 }
 
@@ -2337,7 +2345,7 @@ struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
 
 	gfp_mask = htlb_alloc_mask(h);
 	node = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
-	page = alloc_huge_page_nodemask(h, node, nodemask, gfp_mask);
+	page = alloc_huge_page_nodemask(h, node, nodemask, gfp_mask, false);
 	mpol_cond_put(mpol);
 
 	return page;
diff --git a/mm/internal.h b/mm/internal.h
index b3002e03c28f..3ebf8885e24f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -800,6 +800,7 @@ struct migration_target_control {
 	int nid;		/* preferred node id */
 	nodemask_t *nmask;
 	gfp_t gfp_mask;
+	bool alloc_contig;	/* alloc_contig_range allocation */
 };
 
 /*
diff --git a/mm/migrate.c b/mm/migrate.c
index c228afba0963..6505ba2070d7 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1610,7 +1610,8 @@ struct page *alloc_migration_target(struct page *page, unsigned long private)
 		struct hstate *h = page_hstate(&folio->page);
 
 		gfp_mask = htlb_modify_alloc_mask(h, gfp_mask);
-		return alloc_huge_page_nodemask(h, nid, mtc->nmask, gfp_mask);
+		return alloc_huge_page_nodemask(h, nid, mtc->nmask, gfp_mask,
+						mtc->alloc_contig);
 	}
 
 	if (folio_test_large(folio)) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d7b20bf09c1c..2b8a5a2b51cd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -9166,6 +9166,7 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
 	struct migration_target_control mtc = {
 		.nid = zone_to_nid(cc->zone),
 		.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
+		.alloc_contig = true,
 	};
 
 	lru_cache_disable();
-- 
2.37.3