[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <166E73A3-02CE-4023-AE0E-022C9C618C33@nvidia.com>
Date: Mon, 19 May 2025 11:08:59 -0400
From: Zi Yan <ziy@...dia.com>
To: David Hildenbrand <david@...hat.com>
Cc: Oscar Salvador <osalvador@...e.de>, Johannes Weiner <hannes@...xchg.org>,
linux-mm@...ck.org, Andrew Morton <akpm@...ux-foundation.org>,
Vlastimil Babka <vbabka@...e.cz>,
Baolin Wang <baolin.wang@...ux.alibaba.com>,
"Kirill A . Shutemov" <kirill.shutemov@...ux.intel.com>,
Mel Gorman <mgorman@...hsingularity.net>,
Suren Baghdasaryan <surenb@...gle.com>, Michal Hocko <mhocko@...e.com>,
Brendan Jackman <jackmanb@...gle.com>, Richard Chang <richardycc@...gle.com>,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v4 1/4] mm/page_isolation: make page isolation a
standalone bit.
On 19 May 2025, at 4:08, David Hildenbrand wrote:
> On 09.05.25 22:01, Zi Yan wrote:
>> During page isolation, the original migratetype is overwritten, since
>> MIGRATE_* are enums and stored in pageblock bitmaps. Change
>> MIGRATE_ISOLATE to be stored a standalone bit, PB_migrate_isolate, like
>> PB_migrate_skip, so that migratetype is not lost during pageblock
>> isolation. pageblock bits needs to be word aligned, so expand
>> the number of pageblock bits from 4 to 8 and make PB_migrate_isolate bit 7.
>>
>> Signed-off-by: Zi Yan <ziy@...dia.com>
>> ---
>> include/linux/mmzone.h | 15 ++++++++------
>> include/linux/pageblock-flags.h | 9 ++++++++-
>> mm/page_alloc.c | 36 ++++++++++++++++++++++++++++++++-
>> mm/page_isolation.c | 11 ++++++++++
>> 4 files changed, 63 insertions(+), 8 deletions(-)
>>
>> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
>> index b19a98c20de8..7ef01fe148ce 100644
>> --- a/include/linux/mmzone.h
>> +++ b/include/linux/mmzone.h
>> @@ -106,14 +106,17 @@ static inline bool migratetype_is_mergeable(int mt)
>> extern int page_group_by_mobility_disabled;
>> -#define MIGRATETYPE_MASK ((1UL << PB_migratetype_bits) - 1)
>> +#ifdef CONFIG_MEMORY_ISOLATION
>> +#define MIGRATETYPE_MASK ((BIT(PB_migratetype_bits) - 1) | PB_migrate_isolate_bit)
>> +#else
>> +#define MIGRATETYPE_MASK (BIT(PB_migratetype_bits) - 1)
>> +#endif
>> +
>> +unsigned long get_pageblock_migratetype(const struct page *page);
>> -#define get_pageblock_migratetype(page) \
>> - get_pfnblock_flags_mask(page, page_to_pfn(page), MIGRATETYPE_MASK)
>> +#define folio_migratetype(folio) \
>> + get_pageblock_migratetype(&folio->page)
>> -#define folio_migratetype(folio) \
>> - get_pfnblock_flags_mask(&folio->page, folio_pfn(folio), \
>> - MIGRATETYPE_MASK)
>> struct free_area {
>> struct list_head free_list[MIGRATE_TYPES];
>> unsigned long nr_free;
>> diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
>> index 0c4963339f0b..00040e7df8c8 100644
>> --- a/include/linux/pageblock-flags.h
>> +++ b/include/linux/pageblock-flags.h
>> @@ -20,7 +20,10 @@ enum pageblock_bits {
>> PB_migrate_end = PB_migrate + PB_migratetype_bits - 1,
>> /* 3 bits required for migrate types */
>> PB_migrate_skip,/* If set the block is skipped by compaction */
>> -
>> +#ifdef CONFIG_MEMORY_ISOLATION
>> + PB_migrate_isolate = 7, /* If set the block is isolated */
>> + /* set it to 7 to make pageblock bit word aligned */
>
> I think what we want to do here is align NR_PAGEBLOCK_BITS up to 4 bits at relevant places. Or go to the next power-of-2.
>
> Could we simply to that using something like
>
> #ifdef CONFIG_MEMORY_ISOLATION
> PB_migrate_isolate, /* If set the block is isolated */
> #endif
> __NR_PAGEBLOCK_BITS
> };
>
> /* We always want the bits to be a power of 2. */
> #define NR_PAGEBLOCK_BITS (roundup_pow_of_two(__NR_PAGEBLOCK_BITS))
>
>
> Would something like that work?
Yes, it builds and boots on x86_64 for MEMROY_ISOLATION and !MEMORY_ISOLATION.
Will add this change.
>
>> +#endif
>> /*
>> * Assume the bits will always align on a word. If this assumption
>> * changes then get/set pageblock needs updating.
>> @@ -28,6 +31,10 @@ enum pageblock_bits {
>> NR_PAGEBLOCK_BITS
>> };>
>> +#ifdef CONFIG_MEMORY_ISOLATION
>> +#define PB_migrate_isolate_bit BIT(PB_migrate_isolate)
>> +#endif
>> +
>
> I assume we should first change users ot "1 << (PB_migrate_skip)" to PB_migrate_skip_bit to keep it similar.
Will add this.
>
>> #if defined(CONFIG_PAGE_BLOCK_ORDER)
>> #define PAGE_BLOCK_ORDER CONFIG_PAGE_BLOCK_ORDER
>> #else
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index c77592b22256..04e301fb4879 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -381,10 +381,31 @@ unsigned long get_pfnblock_flags_mask(const struct page *page,
>> return (word >> bitidx) & mask;
>> }
>> +unsigned long get_pageblock_migratetype(const struct page *page)
>> +{
>> + unsigned long flags;
>> +
>> + flags = get_pfnblock_flags_mask(page, page_to_pfn(page),
>> + MIGRATETYPE_MASK);
>
> When calling functions, we usually indent up to the beginning of the parameters. Same for the other cases below.
OK, will follow this. I was confused, since I see various indentations across files.
There is .clang-format and clang-format indeed indent parameters like you said,
then I will use clang-format.
>
> ... or just exceed the 80 chars a bit in this case. :)
>
>> +#ifdef CONFIG_MEMORY_ISOLATION
>> + if (flags & PB_migrate_isolate_bit)
>> + return MIGRATE_ISOLATE;
>> +#endif
>> + return flags;
>> +}
>> +
>> static __always_inline int get_pfnblock_migratetype(const struct page *page,
>> unsigned long pfn)
>> {
>> - return get_pfnblock_flags_mask(page, pfn, MIGRATETYPE_MASK);
>> + unsigned long flags;
>> +
>> + flags = get_pfnblock_flags_mask(page, pfn,
>> + MIGRATETYPE_MASK);
>
> This should fit into a single line.
Sure.
>
>> +#ifdef CONFIG_MEMORY_ISOLATION
>> + if (flags & PB_migrate_isolate_bit)
>> + return MIGRATE_ISOLATE;
>> +#endif
>
> If you call get_pfnblock_flags_mask() with MIGRATETYPE_MASK, how could you ever get PB_migrate_isolate_bit?
MIGRATETYPE_MASK is ((BIT(PB_migratetype_bits) - 1) | PB_migrate_isolate_bit),
so it gets PB_migrate_isolate_bit.
>
>
> I think what we should do is
>
> 1) Rename get_pfnblock_flags_mask() to get_pfnblock_flags()
>
> 2) Remove the mask parameter
>
> 3) Perform the masking in all callers.
get_pfnblock_flags_mask() is also used by get_pageblock_skip() to
get PB_migrate_skip. I do not think we want to include PB_migrate_skip
in the mask to confuse readers.
>
>
>
> Maybe, we should convert set_pfnblock_flags_mask() to
>
> void set_clear_pfnblock_flags(struct page *page, unsigned long
> set_flags, unsigned long clear_flags);
>
> And better, splitting it up (or providing helpers)
>
> set_pfnblock_flags(struct page *page, unsigned long flags);
> clear_pfnblock_flags(struct page *page, unsigned long flags);
>
>
> This implies some more code cleanups first that make the code easier to extend.
>
The same due to PB_migrate_skip.
Based on your suggestion, we could make {set,get}_pfnblock_flags_mask()
internal APIs by prepending "__". They are only used by the new
{get, set, clear}_pfnblock_flags() and {get, set, clear}_pageblock_{skip, isolate}().
Then use {get, set, clear}_pfnblock_flags() for all migratetype operations.
WDYT?
>> + return flags;
>> }
>> /**
>> @@ -402,8 +423,14 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
>> unsigned long bitidx, word_bitidx;
>> unsigned long word;
>> +#ifdef CONFIG_MEMORY_ISOLATION
>> + BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 8);
>> + /* extra one for MIGRATE_ISOLATE */
>> + BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits) + 1);
>> +#else
>> BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
>> BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
>> +#endif
>> bitmap = get_pageblock_bitmap(page, pfn);
>> bitidx = pfn_to_bitidx(page, pfn);
>> @@ -426,6 +453,13 @@ void set_pageblock_migratetype(struct page *page, int migratetype)
>> migratetype < MIGRATE_PCPTYPES))
>> migratetype = MIGRATE_UNMOVABLE;
>> +#ifdef CONFIG_MEMORY_ISOLATION
>> + if (migratetype == MIGRATE_ISOLATE) {
>> + set_pfnblock_flags_mask(page, PB_migrate_isolate_bit,
>> + page_to_pfn(page), PB_migrate_isolate_bit);
>> + return;
>> + }
>> +#endif
>> set_pfnblock_flags_mask(page, (unsigned long)migratetype,
>> page_to_pfn(page), MIGRATETYPE_MASK);
>> }
>> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
>> index b2fc5266e3d2..751e21f6d85e 100644
>> --- a/mm/page_isolation.c
>> +++ b/mm/page_isolation.c
>> @@ -15,6 +15,17 @@
>> #define CREATE_TRACE_POINTS
>> #include <trace/events/page_isolation.h>
>> +static inline bool __maybe_unused get_pageblock_isolate(struct page *page)
>> +{
>> + return get_pfnblock_flags_mask(page, page_to_pfn(page),
>> + PB_migrate_isolate_bit);
>> +}
>> +static inline void clear_pageblock_isolate(struct page *page)
>> +{
>> + set_pfnblock_flags_mask(page, 0, page_to_pfn(page),
>> + PB_migrate_isolate_bit);
>> +}
>
> Should these reside in include/linux/pageblock-flags.h, just like the
> CONFIG_COMPACTION "skip" variants?
They are only used inside mm/page_isolation.c, so I would leave them
here until other users come out.
--
Best Regards,
Yan, Zi
Powered by blists - more mailing lists