[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <db4538c3-84f8-4fb4-8307-b4fcf46ebe38@gmail.com>
Date: Sat, 6 Dec 2025 17:03:25 +0000
From: Usama Arif <usamaarif642@...il.com>
To: Kiryl Shutsemau <kas@...nel.org>,
Andrew Morton <akpm@...ux-foundation.org>,
Muchun Song <muchun.song@...ux.dev>
Cc: David Hildenbrand <david@...nel.org>, Oscar Salvador <osalvador@...e.de>,
Mike Rapoport <rppt@...nel.org>, Vlastimil Babka <vbabka@...e.cz>,
Lorenzo Stoakes <lorenzo.stoakes@...cle.com>,
Matthew Wilcox <willy@...radead.org>, Zi Yan <ziy@...dia.com>,
Baoquan He <bhe@...hat.com>, Michal Hocko <mhocko@...e.com>,
Johannes Weiner <hannes@...xchg.org>, Jonathan Corbet <corbet@....net>,
kernel-team@...a.com, linux-mm@...ck.org, linux-kernel@...r.kernel.org,
linux-doc@...r.kernel.org
Subject: Re: [PATCH 06/11] mm/hugetlb: Remove fake head pages
On 05/12/2025 19:43, Kiryl Shutsemau wrote:
> HugeTLB optimizes vmemmap memory usage by freeing all but the first page
> of vmemmap memory for the huge page and remapping the rest of the pages
> to the first one.
>
> This only occurs if the size of the struct page is a power of 2. In
> these instances, the compound head position encoding in the tail pages
> ensures that all tail pages of the same order are identical, regardless
> of the page to which they belong.
>
> This allows for the elimination of fake head pages without significant
> memory overhead: a page full of tail struct pages is allocated per
> hstate and mapped instead of the page with the head page for all pages
> of the given hstate.
>
> Signed-off-by: Kiryl Shutsemau <kas@...nel.org>
> ---
> include/linux/hugetlb.h | 3 +++
> mm/hugetlb_vmemmap.c | 31 +++++++++++++++++++++++++++----
> mm/hugetlb_vmemmap.h | 4 ++--
> 3 files changed, 32 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 8e63e46b8e1f..75dd940fda22 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -676,6 +676,9 @@ struct hstate {
> unsigned int free_huge_pages_node[MAX_NUMNODES];
> unsigned int surplus_huge_pages_node[MAX_NUMNODES];
> char name[HSTATE_NAME_LEN];
> +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
> + struct page *vmemmap_tail;
> +#endif
> };
>
> struct cma;
> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
> index f5ee499b8563..2543bdbcae20 100644
> --- a/mm/hugetlb_vmemmap.c
> +++ b/mm/hugetlb_vmemmap.c
> @@ -18,6 +18,7 @@
> #include <asm/pgalloc.h>
> #include <asm/tlbflush.h>
> #include "hugetlb_vmemmap.h"
> +#include "internal.h"
>
> /**
> * struct vmemmap_remap_walk - walk vmemmap page table
> @@ -518,7 +519,24 @@ static bool vmemmap_should_optimize_folio(const struct hstate *h, struct folio *
> return true;
> }
>
> -static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
> +static void hugetlb_vmemmap_tail_alloc(struct hstate *h)
> +{
> + struct page *p;
> +
> + if (h->vmemmap_tail)
> + return;
> +
The above check is unnecessary as we already check for !h->vmemmap_tail in __hugetlb_vmemmap_optimize_folio?
Is it possible that we could have a race here? Where 2 threads both trying to allocate a hugetlb page when none
exist in the system, both see h->vmemmap_tail == NULL, both call alloc_page and set h->vmemmap_tail?
Also, is there a good point where we can see that the number of hstate->nr_huge_pages has gone down to 0 and free
h->vmemmap_tail? Its a single page per hstate so not a big deal, but would be nice to have cleanup for it?
> + h->vmemmap_tail = alloc_page(GFP_KERNEL | __GFP_ZERO);
> + if (!h->vmemmap_tail)
> + return;
> +
> + p = page_to_virt(h->vmemmap_tail);
> +
> + for (int i = 0; i < PAGE_SIZE / sizeof(struct page); i++)
> + prep_compound_tail(p + i, p, huge_page_order(h));
> +}
> +
> +static int __hugetlb_vmemmap_optimize_folio(struct hstate *h,
> struct folio *folio,
> struct list_head *vmemmap_pages,
> unsigned long flags)
> @@ -533,6 +551,11 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
> if (!vmemmap_should_optimize_folio(h, folio))
> return ret;
>
> + if (!h->vmemmap_tail)
> + hugetlb_vmemmap_tail_alloc(h);
> + if (!h->vmemmap_tail)
> + return -ENOMEM;
> +
> static_branch_inc(&hugetlb_optimize_vmemmap_key);
>
> if (flags & VMEMMAP_SYNCHRONIZE_RCU)
> @@ -562,7 +585,7 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
> list_add(&vmemmap_head->lru, vmemmap_pages);
> memmap_pages_add(1);
>
> - vmemmap_tail = vmemmap_head;
> + vmemmap_tail = h->vmemmap_tail;
> vmemmap_start = (unsigned long)folio;
> vmemmap_end = vmemmap_start + hugetlb_vmemmap_size(h);
>
> @@ -594,7 +617,7 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
> * can use folio_test_hugetlb_vmemmap_optimized(@folio) to detect if @folio's
> * vmemmap pages have been optimized.
> */
> -void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio)
> +void hugetlb_vmemmap_optimize_folio(struct hstate *h, struct folio *folio)
> {
> LIST_HEAD(vmemmap_pages);
>
> @@ -868,7 +891,7 @@ static const struct ctl_table hugetlb_vmemmap_sysctls[] = {
>
> static int __init hugetlb_vmemmap_init(void)
> {
> - const struct hstate *h;
> + struct hstate *h;
>
> /* HUGETLB_VMEMMAP_RESERVE_SIZE should cover all used struct pages */
> BUILD_BUG_ON(__NR_USED_SUBPAGE > HUGETLB_VMEMMAP_RESERVE_PAGES);
> diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h
> index 18b490825215..f44e40c44a21 100644
> --- a/mm/hugetlb_vmemmap.h
> +++ b/mm/hugetlb_vmemmap.h
> @@ -24,7 +24,7 @@ int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio);
> long hugetlb_vmemmap_restore_folios(const struct hstate *h,
> struct list_head *folio_list,
> struct list_head *non_hvo_folios);
> -void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio);
> +void hugetlb_vmemmap_optimize_folio(struct hstate *h, struct folio *folio);
> void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list);
> void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, struct list_head *folio_list);
> #ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT
> @@ -64,7 +64,7 @@ static inline long hugetlb_vmemmap_restore_folios(const struct hstate *h,
> return 0;
> }
>
> -static inline void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio)
> +static inline void hugetlb_vmemmap_optimize_folio(struct hstate *h, struct folio *folio)
> {
> }
>
Powered by blists - more mailing lists