lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a0879316-31de-4fec-ad1f-caabbfff2e48@suse.cz>
Date: Tue, 26 Mar 2024 12:28:37 +0100
From: Vlastimil Babka <vbabka@...e.cz>
To: Johannes Weiner <hannes@...xchg.org>,
 Andrew Morton <akpm@...ux-foundation.org>
Cc: Mel Gorman <mgorman@...hsingularity.net>, Zi Yan <ziy@...dia.com>,
 "Huang, Ying" <ying.huang@...el.com>, David Hildenbrand <david@...hat.com>,
 linux-mm@...ck.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 06/10] mm: page_alloc: fix freelist movement during block
 conversion

On 3/20/24 7:02 PM, Johannes Weiner wrote:
> Currently, page block type conversion during fallbacks, atomic
> reservations and isolation can strand various amounts of free pages on
> incorrect freelists.
> 
> For example, fallback stealing moves free pages in the block to the
> new type's freelists, but then may not actually claim the block for
> that type if there aren't enough compatible pages already allocated.
> 
> In all cases, free page moving might fail if the block straddles more
> than one zone, in which case no free pages are moved at all, but the
> block type is changed anyway.
> 
> This is detrimental to type hygiene on the freelists. It encourages
> incompatible page mixing down the line (ask for one type, get another)
> and thus contributes to long-term fragmentation.
> 
> Split the process into a proper transaction: check first if conversion
> will happen, then try to move the free pages, and only if that was
> successful convert the block to the new type.
> 
> Tested-by: "Huang, Ying" <ying.huang@...el.com>
> Signed-off-by: Johannes Weiner <hannes@...xchg.org>

Reviewed-by: Vlastimil Babka <vbabka@...e.cz>

Nit below:

> @@ -1743,33 +1770,37 @@ static inline bool boost_watermark(struct zone *zone)
>  }
>  
>  /*
> - * This function implements actual steal behaviour. If order is large enough,
> - * we can steal whole pageblock. If not, we first move freepages in this
> - * pageblock to our migratetype and determine how many already-allocated pages
> - * are there in the pageblock with a compatible migratetype. If at least half
> - * of pages are free or compatible, we can change migratetype of the pageblock
> - * itself, so pages freed in the future will be put on the correct free list.
> + * This function implements actual steal behaviour. If order is large enough, we
> + * can claim the whole pageblock for the requested migratetype. If not, we check
> + * the pageblock for constituent pages; if at least half of the pages are free
> + * or compatible, we can still claim the whole block, so pages freed in the
> + * future will be put on the correct free list. Otherwise, we isolate exactly
> + * the order we need from the fallback block and leave its migratetype alone.
>   */
> -static void steal_suitable_fallback(struct zone *zone, struct page *page,
> -		unsigned int alloc_flags, int start_type, bool whole_block)
> +static struct page *
> +steal_suitable_fallback(struct zone *zone, struct page *page,
> +			int current_order, int order, int start_type,
> +			unsigned int alloc_flags, bool whole_block)
>  {
> -	unsigned int current_order = buddy_order(page);
>  	int free_pages, movable_pages, alike_pages;
> -	int old_block_type;
> +	unsigned long start_pfn, end_pfn;
> +	int block_type;
>  
> -	old_block_type = get_pageblock_migratetype(page);
> +	block_type = get_pageblock_migratetype(page);
>  
>  	/*
>  	 * This can happen due to races and we want to prevent broken
>  	 * highatomic accounting.
>  	 */
> -	if (is_migrate_highatomic(old_block_type))
> +	if (is_migrate_highatomic(block_type))
>  		goto single_page;
>  
>  	/* Take ownership for orders >= pageblock_order */
>  	if (current_order >= pageblock_order) {
> +		del_page_from_free_list(page, zone, current_order);
>  		change_pageblock_range(page, current_order, start_type);
> -		goto single_page;
> +		expand(zone, page, order, current_order, start_type);
> +		return page;

Is the exact order here important (AFAIK shouldn't be?) or we could just
change_pageblock_range(); block_type = start_type; goto single_page?

>  	}
>  
>  	/*
> @@ -1784,10 +1815,9 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,
>  	if (!whole_block)
>  		goto single_page;
>  
> -	free_pages = move_freepages_block(zone, page, start_type,
> -						&movable_pages);
>  	/* moving whole block can fail due to zone boundary conditions */
> -	if (!free_pages)
> +	if (!prep_move_freepages_block(zone, page, &start_pfn, &end_pfn,
> +				       &free_pages, &movable_pages))
>  		goto single_page;
>  
>  	/*
> @@ -1805,7 +1835,7 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,
>  		 * vice versa, be conservative since we can't distinguish the
>  		 * exact migratetype of non-movable pages.
>  		 */
> -		if (old_block_type == MIGRATE_MOVABLE)
> +		if (block_type == MIGRATE_MOVABLE)
>  			alike_pages = pageblock_nr_pages
>  						- (free_pages + movable_pages);
>  		else
> @@ -1816,13 +1846,16 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,
>  	 * compatible migratability as our allocation, claim the whole block.
>  	 */
>  	if (free_pages + alike_pages >= (1 << (pageblock_order-1)) ||
> -			page_group_by_mobility_disabled)
> +			page_group_by_mobility_disabled) {
> +		move_freepages(zone, start_pfn, end_pfn, start_type);
>  		set_pageblock_migratetype(page, start_type);
> -
> -	return;
> +		return __rmqueue_smallest(zone, order, start_type);
> +	}
>  
>  single_page:
> -	move_to_free_list(page, zone, current_order, start_type);
> +	del_page_from_free_list(page, zone, current_order);
> +	expand(zone, page, order, current_order, block_type);
> +	return page;
>  }


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ