[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <e3aac0fc-458f-4453-86a6-1bf92dc5fbd0@linux.alibaba.com>
Date: Mon, 26 Feb 2024 17:59:37 +0800
From: Baolin Wang <baolin.wang@...ux.alibaba.com>
To: Oscar Salvador <osalvador@...e.de>
Cc: akpm@...ux-foundation.org, muchun.song@...ux.dev, david@...hat.com,
linmiaohe@...wei.com, naoya.horiguchi@....com, mhocko@...nel.org,
linux-mm@...ck.org, linux-kernel@...r.kernel.org
Subject: Re: [RFC PATCH 2/3] mm: hugetlb: make the hugetlb migration strategy
consistent
On 2024/2/26 17:17, Oscar Salvador wrote:
> On Mon, Feb 26, 2024 at 11:34:51AM +0800, Baolin Wang wrote:
>> IMO, I'm not sure whether it's appropriate to decouple
>> dequeue_hugetlb_folio_nodemask() from alloc_hugetlb_folio_nodemask() into
>> two separate functions for the users to call, because these details should
>> be hidden within the hugetlb core implementation.
>>
>> Instead, I can move the gfp_mask fiddling into a new helper, and move the
>> helper into alloc_migrate_hugetlb_folio(). Temporary hugetlb allocation has
>> its own gfp strategy seems reasonable to me.
>
> An alternative would be to do the following, which does not futher carry
> the "reason" argument into hugetlb code.
> (Not even compile tested, just a PoC)
>
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index c1ee640d87b1..8a89a1007dcb 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -970,6 +970,24 @@ static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
> return modified_mask;
> }
>
> +static inline bool htlb_allow_fallback(int reason)
> +{
> + bool allowed_fallback = false;
> +
> + switch (reason) {
> + case MR_MEMORY_HOTPLUG:
> + case MR_MEMORY_FAILURE:
> + case MR_SYSCALL:
> + case MR_MEMPOLICY_MBIND:
> + allowed_fallback = true;
> + break;
> + default:
> + break;
> + }
> +
> + return allowed_fallback;
> +}
> +
Thanks for providing an alternative implementation. However, I still
prefer to hide these details into hugetlb core, since users do not need
to pay excessive attention to these hugetlb details. So something like:
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 418d66953224..e8eb08bbc688 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2567,13 +2567,38 @@ static struct folio
*alloc_surplus_hugetlb_folio(struct hstate *h,
}
static struct folio *alloc_migrate_hugetlb_folio(struct hstate *h,
gfp_t gfp_mask,
- int nid, nodemask_t *nmask)
+ int nid, nodemask_t *nmask, int reason)
{
struct folio *folio;
+ bool allowed_fallback = false;
if (hstate_is_gigantic(h))
return NULL;
+ if (gfp_mask & __GFP_THISNODE)
+ goto alloc_new;
+
+ /*
+ * Note: the memory offline, memory failure and migration
syscalls will
+ * be allowed to fallback to other nodes due to lack of a better
chioce,
+ * that might break the per-node hugetlb pool. While other cases
will
+ * set the __GFP_THISNODE to avoid breaking the per-node hugetlb
pool.
+ */
+ switch (reason) {
+ case MR_MEMORY_HOTPLUG:
+ case MR_MEMORY_FAILURE:
+ case MR_MEMORY_FAILURE:
+ case MR_SYSCALL:
+ case MR_MEMPOLICY_MBIND:
+ allowed_fallback = true;
+ break;
+ default:
+ break;
+ }
+
+ if (!allowed_fallback)
+ gfp_mask |= __GFP_THISNODE;
+
+alloc_new:
folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
if (!folio)
return NULL;
@@ -2621,7 +2646,7 @@ struct folio
*alloc_buddy_hugetlb_folio_with_mpol(struct hstate *h,
/* folio migration callback function */
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int
preferred_nid,
- nodemask_t *nmask, gfp_t gfp_mask)
return NULL;
@@ -2621,7 +2646,7 @@ struct folio
*alloc_buddy_hugetlb_folio_with_mpol(struct hstate *h,
/* folio migration callback function */
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int
preferred_nid,
- nodemask_t *nmask, gfp_t gfp_mask)
+ nodemask_t *nmask, gfp_t gfp_mask, int reason)
{
spin_lock_irq(&hugetlb_lock);
if (available_huge_pages(h)) {
@@ -2636,7 +2661,7 @@ struct folio *alloc_hugetlb_folio_nodemask(struct
hstate *h, int preferred_nid,
}
spin_unlock_irq(&hugetlb_lock);
- return alloc_migrate_hugetlb_folio(h, gfp_mask, preferred_nid,
nmask);
+ return alloc_migrate_hugetlb_folio(h, gfp_mask, preferred_nid,
nmask, reason);
}
/*
@@ -6653,7 +6678,13 @@ static struct folio
*alloc_hugetlb_folio_vma(struct hstate *h,
gfp_mask = htlb_alloc_mask(h);
node = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
- folio = alloc_hugetlb_folio_nodemask(h, node, nodemask, gfp_mask);
+ /*
+ * This is used to allocate a temporary hugetlb to hold the copied
+ * content, which will then be copied again to the final hugetlb
+ * consuming a reservation. Set the migrate reason to -1 to indicate
+ * that breaking the per-node hugetlb pool is not allowed in
this case.
+ */
+ folio = alloc_hugetlb_folio_nodemask(h, node, nodemask,
gfp_mask, -1);
mpol_cond_put(mpol);
return folio;
What do you think? Thanks.
Powered by blists - more mailing lists