linux-kernel - Re: [PATCH 10/10] mm: page_alloc: consolidate free page accounting

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <7b3b7f2e-7109-4e72-b1cf-259cb56f3629@linux.alibaba.com>
Date: Sun, 7 Apr 2024 18:19:28 +0800
From: Baolin Wang <baolin.wang@...ux.alibaba.com>
To: Johannes Weiner <hannes@...xchg.org>,
 Andrew Morton <akpm@...ux-foundation.org>
Cc: Vlastimil Babka <vbabka@...e.cz>, Mel Gorman
 <mgorman@...hsingularity.net>, Zi Yan <ziy@...dia.com>,
 "Huang, Ying" <ying.huang@...el.com>, David Hildenbrand <david@...hat.com>,
 linux-mm@...ck.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 10/10] mm: page_alloc: consolidate free page accounting



On 2024/3/21 02:02, Johannes Weiner wrote:
> Free page accounting currently happens a bit too high up the call
> stack, where it has to deal with guard pages, compaction capturing,
> block stealing and even page isolation. This is subtle and fragile,
> and makes it difficult to hack on the code.
> 
> Now that type violations on the freelists have been fixed, push the
> accounting down to where pages enter and leave the freelist.
> 
> Signed-off-by: Johannes Weiner <hannes@...xchg.org>
> ---
>   include/linux/mm.h     |  18 ++--
>   include/linux/vmstat.h |   8 --
>   mm/debug_page_alloc.c  |  12 +--
>   mm/internal.h          |   5 --
>   mm/page_alloc.c        | 194 +++++++++++++++++++++++------------------
>   mm/page_isolation.c    |   3 +-
>   6 files changed, 120 insertions(+), 120 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 8147b1302413..bd2e94391c7e 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3781,24 +3781,22 @@ static inline bool page_is_guard(struct page *page)
>   	return PageGuard(page);
>   }
>   
> -bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order,
> -		      int migratetype);
> +bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order);
>   static inline bool set_page_guard(struct zone *zone, struct page *page,
> -				  unsigned int order, int migratetype)
> +				  unsigned int order)
>   {
>   	if (!debug_guardpage_enabled())
>   		return false;
> -	return __set_page_guard(zone, page, order, migratetype);
> +	return __set_page_guard(zone, page, order);
>   }
>   
> -void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order,
> -			int migratetype);
> +void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order);
>   static inline void clear_page_guard(struct zone *zone, struct page *page,
> -				    unsigned int order, int migratetype)
> +				    unsigned int order)
>   {
>   	if (!debug_guardpage_enabled())
>   		return;
> -	__clear_page_guard(zone, page, order, migratetype);
> +	__clear_page_guard(zone, page, order);
>   }
>   
>   #else	/* CONFIG_DEBUG_PAGEALLOC */
> @@ -3808,9 +3806,9 @@ static inline unsigned int debug_guardpage_minorder(void) { return 0; }
>   static inline bool debug_guardpage_enabled(void) { return false; }
>   static inline bool page_is_guard(struct page *page) { return false; }
>   static inline bool set_page_guard(struct zone *zone, struct page *page,
> -			unsigned int order, int migratetype) { return false; }
> +			unsigned int order) { return false; }
>   static inline void clear_page_guard(struct zone *zone, struct page *page,
> -				unsigned int order, int migratetype) {}
> +				unsigned int order) {}
>   #endif	/* CONFIG_DEBUG_PAGEALLOC */
>   
>   #ifdef __HAVE_ARCH_GATE_AREA
> diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
> index 343906a98d6e..735eae6e272c 100644
> --- a/include/linux/vmstat.h
> +++ b/include/linux/vmstat.h
> @@ -487,14 +487,6 @@ static inline void node_stat_sub_folio(struct folio *folio,
>   	mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio));
>   }
>   
> -static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages,
> -					     int migratetype)
> -{
> -	__mod_zone_page_state(zone, NR_FREE_PAGES, nr_pages);
> -	if (is_migrate_cma(migratetype))
> -		__mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages);
> -}
> -
>   extern const char * const vmstat_text[];
>   
>   static inline const char *zone_stat_name(enum zone_stat_item item)
> diff --git a/mm/debug_page_alloc.c b/mm/debug_page_alloc.c
> index 6755f0c9d4a3..d46acf989dde 100644
> --- a/mm/debug_page_alloc.c
> +++ b/mm/debug_page_alloc.c
> @@ -32,8 +32,7 @@ static int __init debug_guardpage_minorder_setup(char *buf)
>   }
>   early_param("debug_guardpage_minorder", debug_guardpage_minorder_setup);
>   
> -bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order,
> -		      int migratetype)
> +bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order)
>   {
>   	if (order >= debug_guardpage_minorder())
>   		return false;
> @@ -41,19 +40,12 @@ bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order,
>   	__SetPageGuard(page);
>   	INIT_LIST_HEAD(&page->buddy_list);
>   	set_page_private(page, order);
> -	/* Guard pages are not available for any usage */
> -	if (!is_migrate_isolate(migratetype))
> -		__mod_zone_freepage_state(zone, -(1 << order), migratetype);
>   
>   	return true;
>   }
>   
> -void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order,
> -		      int migratetype)
> +void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order)
>   {
>   	__ClearPageGuard(page);
> -
>   	set_page_private(page, 0);
> -	if (!is_migrate_isolate(migratetype))
> -		__mod_zone_freepage_state(zone, (1 << order), migratetype);
>   }
> diff --git a/mm/internal.h b/mm/internal.h
> index d6e6c7d9f04e..0a4007b03d0d 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -1036,11 +1036,6 @@ static inline bool is_migrate_highatomic(enum migratetype migratetype)
>   	return migratetype == MIGRATE_HIGHATOMIC;
>   }
>   
> -static inline bool is_migrate_highatomic_page(struct page *page)
> -{
> -	return get_pageblock_migratetype(page) == MIGRATE_HIGHATOMIC;
> -}
> -
>   void setup_zone_pageset(struct zone *zone);
>   
>   struct migration_target_control {
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index efb2581ac142..c46491f83ac2 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -642,42 +642,72 @@ compaction_capture(struct capture_control *capc, struct page *page,
>   }
>   #endif /* CONFIG_COMPACTION */
>   
> -/* Used for pages not on another list */
> -static inline void add_to_free_list(struct page *page, struct zone *zone,
> -				    unsigned int order, int migratetype)
> +static inline void account_freepages(struct page *page, struct zone *zone,
> +				     int nr_pages, int migratetype)
>   {
> -	struct free_area *area = &zone->free_area[order];
> +	if (is_migrate_isolate(migratetype))
> +		return;
>   
> -	list_add(&page->buddy_list, &area->free_list[migratetype]);
> -	area->nr_free++;
> +	__mod_zone_page_state(zone, NR_FREE_PAGES, nr_pages);
> +
> +	if (is_migrate_cma(migratetype))
> +		__mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages);
>   }
>   
>   /* Used for pages not on another list */
> -static inline void add_to_free_list_tail(struct page *page, struct zone *zone,
> -					 unsigned int order, int migratetype)
> +static inline void __add_to_free_list(struct page *page, struct zone *zone,
> +				      unsigned int order, int migratetype,
> +				      bool tail)
>   {
>   	struct free_area *area = &zone->free_area[order];
>   
> -	list_add_tail(&page->buddy_list, &area->free_list[migratetype]);
> +	VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype,
> +		     "page type is %lu, passed migratetype is %d (nr=%d)\n",
> +		     get_pageblock_migratetype(page), migratetype, 1 << order);
> +
> +	if (tail)
> +		list_add_tail(&page->buddy_list, &area->free_list[migratetype]);
> +	else
> +		list_add(&page->buddy_list, &area->free_list[migratetype]);
>   	area->nr_free++;
>   }
>   
> +static inline void add_to_free_list(struct page *page, struct zone *zone,
> +				    unsigned int order, int migratetype,
> +				    bool tail)
> +{
> +	__add_to_free_list(page, zone, order, migratetype, tail);
> +	account_freepages(page, zone, 1 << order, migratetype);
> +}
> +
>   /*
>    * Used for pages which are on another list. Move the pages to the tail
>    * of the list - so the moved pages won't immediately be considered for
>    * allocation again (e.g., optimization for memory onlining).
>    */
>   static inline void move_to_free_list(struct page *page, struct zone *zone,
> -				     unsigned int order, int migratetype)
> +				     unsigned int order, int old_mt, int new_mt)
>   {
>   	struct free_area *area = &zone->free_area[order];
>   
> -	list_move_tail(&page->buddy_list, &area->free_list[migratetype]);
> +	/* Free page moving can fail, so it happens before the type update */
> +	VM_WARN_ONCE(get_pageblock_migratetype(page) != old_mt,
> +		     "page type is %lu, passed migratetype is %d (nr=%d)\n",
> +		     get_pageblock_migratetype(page), old_mt, 1 << order);
> +
> +	list_move_tail(&page->buddy_list, &area->free_list[new_mt]);
> +
> +	account_freepages(page, zone, -(1 << order), old_mt);
> +	account_freepages(page, zone, 1 << order, new_mt);
>   }
>   
> -static inline void del_page_from_free_list(struct page *page, struct zone *zone,
> -					   unsigned int order)
> +static inline void __del_page_from_free_list(struct page *page, struct zone *zone,
> +					     unsigned int order, int migratetype)
>   {
> +        VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype,
> +		     "page type is %lu, passed migratetype is %d (nr=%d)\n",
> +		     get_pageblock_migratetype(page), migratetype, 1 << order);
> +
>   	/* clear reported state and update reported page count */
>   	if (page_reported(page))
>   		__ClearPageReported(page);
> @@ -688,6 +718,13 @@ static inline void del_page_from_free_list(struct page *page, struct zone *zone,
>   	zone->free_area[order].nr_free--;
>   }
>   
> +static inline void del_page_from_free_list(struct page *page, struct zone *zone,
> +					   unsigned int order, int migratetype)
> +{
> +	__del_page_from_free_list(page, zone, order, migratetype);
> +	account_freepages(page, zone, -(1 << order), migratetype);
> +}
> +
>   static inline struct page *get_page_from_free_area(struct free_area *area,
>   					    int migratetype)
>   {
> @@ -759,18 +796,16 @@ static inline void __free_one_page(struct page *page,
>   	VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
>   
>   	VM_BUG_ON(migratetype == -1);
> -	if (likely(!is_migrate_isolate(migratetype)))
> -		__mod_zone_freepage_state(zone, 1 << order, migratetype);
> -
>   	VM_BUG_ON_PAGE(pfn & ((1 << order) - 1), page);
>   	VM_BUG_ON_PAGE(bad_range(zone, page), page);
>   
> +	account_freepages(page, zone, 1 << order, migratetype);
> +
>   	while (order < MAX_PAGE_ORDER) {
> -		if (compaction_capture(capc, page, order, migratetype)) {
> -			__mod_zone_freepage_state(zone, -(1 << order),
> -								migratetype);
> +		int buddy_mt = migratetype;
> +
> +		if (compaction_capture(capc, page, order, migratetype))
>   			return;
> -		}

IIUC, if the released page is captured by compaction, then the 
statistics for free pages should be correspondingly decreased, 
otherwise, there will be a slight regression for my thpcompact benchmark.

thpcompact Percentage Faults Huge
                           k6.9-rc2-base        base + patch10 + 2 fixes	
Percentage huge-1        78.18 (   0.00%)       71.92 (  -8.01%)
Percentage huge-3        86.70 (   0.00%)       86.07 (  -0.73%)
Percentage huge-5        90.26 (   0.00%)       78.02 ( -13.57%)
Percentage huge-7        92.34 (   0.00%)       78.67 ( -14.81%)
Percentage huge-12       91.18 (   0.00%)       81.04 ( -11.12%)
Percentage huge-18       89.00 (   0.00%)       79.57 ( -10.60%)
Percentage huge-24       90.52 (   0.00%)       80.07 ( -11.54%)
Percentage huge-30       94.44 (   0.00%)       96.28 (   1.95%)
Percentage huge-32       93.09 (   0.00%)       99.39 (   6.77%)

I add below fix based on your fix 2, then the thpcompact Percentage 
looks good. How do you think for the fix?

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8330c5c2de6b..2facf844ef84 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -805,8 +805,10 @@ static inline void __free_one_page(struct page *page,
         while (order < MAX_PAGE_ORDER) {
                 int buddy_mt = migratetype;

-               if (compaction_capture(capc, page, order, migratetype))
+               if (compaction_capture(capc, page, order, migratetype)) {
+                       account_freepages(zone, -(1 << order), migratetype);
                         return;
+               }

                 buddy = find_buddy_page_pfn(page, pfn, order, &buddy_pfn);
                 if (!buddy)

With my fix, the THP percentage looks better:
                       k6.9-rc2-base          base + patch10 + 2 fixes	+ 
my fix
Percentage huge-1        78.18 (   0.00%)       82.83 (   5.94%)
Percentage huge-3        86.70 (   0.00%)       93.47 (   7.81%)
Percentage huge-5        90.26 (   0.00%)       94.73 (   4.95%)
Percentage huge-7        92.34 (   0.00%)       95.22 (   3.12%)
Percentage huge-12       91.18 (   0.00%)       92.40 (   1.34%)
Percentage huge-18       89.00 (   0.00%)       85.39 (  -4.06%)
Percentage huge-24       90.52 (   0.00%)       94.70 (   4.61%)
Percentage huge-30       94.44 (   0.00%)       97.00 (   2.71%)
Percentage huge-32       93.09 (   0.00%)       92.87 (  -0.24%)