linux-kernel - Re: [PATCH 09/22] mm, compaction: Use free lists to quickly locate a migration source

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4a6ae9fc-a52b-4300-0edb-a0f4169c314a@suse.cz>
Date:   Thu, 31 Jan 2019 14:55:01 +0100
From:   Vlastimil Babka <vbabka@...e.cz>
To:     Mel Gorman <mgorman@...hsingularity.net>,
        Andrew Morton <akpm@...ux-foundation.org>
Cc:     David Rientjes <rientjes@...gle.com>,
        Andrea Arcangeli <aarcange@...hat.com>,
        Linux List Kernel Mailing <linux-kernel@...r.kernel.org>,
        Linux-MM <linux-mm@...ck.org>
Subject: Re: [PATCH 09/22] mm, compaction: Use free lists to quickly locate a
 migration source

On 1/18/19 6:51 PM, Mel Gorman wrote:
...

> +	for (order = cc->order - 1;
> +	     order >= PAGE_ALLOC_COSTLY_ORDER && pfn == cc->migrate_pfn && nr_scanned < limit;
> +	     order--) {
> +		struct free_area *area = &cc->zone->free_area[order];
> +		struct list_head *freelist;
> +		unsigned long flags;
> +		struct page *freepage;
> +
> +		if (!area->nr_free)
> +			continue;
> +
> +		spin_lock_irqsave(&cc->zone->lock, flags);
> +		freelist = &area->free_list[MIGRATE_MOVABLE];
> +		list_for_each_entry(freepage, freelist, lru) {
> +			unsigned long free_pfn;
> +
> +			nr_scanned++;
> +			free_pfn = page_to_pfn(freepage);
> +			if (free_pfn < high_pfn) {
> +				update_fast_start_pfn(cc, free_pfn);

Shouldn't this update go below checking pageblock skip bit? We might be
caching pageblocks that will be skipped, and also potentially going
backwards from the original cc->migrate_pfn, which could perhaps explain
the reported kcompactd loops?

> +
> +				/*
> +				 * Avoid if skipped recently. Ideally it would
> +				 * move to the tail but even safe iteration of
> +				 * the list assumes an entry is deleted, not
> +				 * reordered.
> +				 */
> +				if (get_pageblock_skip(freepage)) {
> +					if (list_is_last(freelist, &freepage->lru))
> +						break;
> +
> +					continue;
> +				}
> +
> +				/* Reorder to so a future search skips recent pages */
> +				move_freelist_tail(freelist, freepage);
> +
> +				pfn = pageblock_start_pfn(free_pfn);
> +				cc->fast_search_fail = 0;
> +				set_pageblock_skip(freepage);
> +				break;
> +			}
> +
> +			if (nr_scanned >= limit) {
> +				cc->fast_search_fail++;
> +				move_freelist_tail(freelist, freepage);
> +				break;
> +			}
> +		}
> +		spin_unlock_irqrestore(&cc->zone->lock, flags);
> +	}
> +
> +	cc->total_migrate_scanned += nr_scanned;
> +
> +	/*
> +	 * If fast scanning failed then use a cached entry for a page block
> +	 * that had free pages as the basis for starting a linear scan.
> +	 */
> +	if (pfn == cc->migrate_pfn)
> +		reinit_migrate_pfn(cc);

This will set cc->migrate_pfn to the lowest pfn encountered, yet return
pfn initialized by original cc->migrate_pfn.
AFAICS isolate_migratepages() will use the returned pfn for the linear
scan and then overwrite cc->migrate_pfn with wherever it advanced from
there. So whatever we stored here into cc->migrate_pfn will never get
actually used, except when isolate_migratepages() returns with
ISOLATED_ABORT.
So maybe the infinite kcompactd loop is linked to ISOLATED_ABORT?

> +
> +	return pfn;
> +}
> +
>  /*
>   * Isolate all pages that can be migrated from the first suitable block,
>   * starting at the block pointed to by the migrate scanner pfn within
> @@ -1222,16 +1381,25 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
>  	const isolate_mode_t isolate_mode =
>  		(sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
>  		(cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0);
> +	bool fast_find_block;
>  
>  	/*
>  	 * Start at where we last stopped, or beginning of the zone as
> -	 * initialized by compact_zone()
> +	 * initialized by compact_zone(). The first failure will use
> +	 * the lowest PFN as the starting point for linear scanning.
>  	 */
> -	low_pfn = cc->migrate_pfn;
> +	low_pfn = fast_find_migrateblock(cc);
>  	block_start_pfn = pageblock_start_pfn(low_pfn);
>  	if (block_start_pfn < zone->zone_start_pfn)
>  		block_start_pfn = zone->zone_start_pfn;
>  
> +	/*
> +	 * fast_find_migrateblock marks a pageblock skipped so to avoid
> +	 * the isolation_suitable check below, check whether the fast
> +	 * search was successful.
> +	 */
> +	fast_find_block = low_pfn != cc->migrate_pfn && !cc->fast_search_fail;
> +
>  	/* Only scan within a pageblock boundary */
>  	block_end_pfn = pageblock_end_pfn(low_pfn);
>  
> @@ -1240,6 +1408,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
>  	 * Do not cross the free scanner.
>  	 */
>  	for (; block_end_pfn <= cc->free_pfn;
> +			fast_find_block = false,
>  			low_pfn = block_end_pfn,
>  			block_start_pfn = block_end_pfn,
>  			block_end_pfn += pageblock_nr_pages) {
> @@ -1259,7 +1428,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
>  			continue;
>  
>  		/* If isolation recently failed, do not retry */
> -		if (!isolation_suitable(cc, page))
> +		if (!isolation_suitable(cc, page) && !fast_find_block)
>  			continue;
>  
>  		/*
> @@ -1550,6 +1719,7 @@ static enum compact_result compact_zone(struct compact_control *cc)
>  	 * want to compact the whole zone), but check that it is initialised
>  	 * by ensuring the values are within zone boundaries.
>  	 */
> +	cc->fast_start_pfn = 0;
>  	if (cc->whole_zone) {
>  		cc->migrate_pfn = start_pfn;
>  		cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
> diff --git a/mm/internal.h b/mm/internal.h
> index 9b32f4cab0ae..983cb975545f 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -188,9 +188,11 @@ struct compact_control {
>  	unsigned int nr_migratepages;	/* Number of pages to migrate */
>  	unsigned long free_pfn;		/* isolate_freepages search base */
>  	unsigned long migrate_pfn;	/* isolate_migratepages search base */
> +	unsigned long fast_start_pfn;	/* a pfn to start linear scan from */
>  	struct zone *zone;
>  	unsigned long total_migrate_scanned;
>  	unsigned long total_free_scanned;
> +	unsigned int fast_search_fail;	/* failures to use free list searches */
>  	const gfp_t gfp_mask;		/* gfp mask of a direct compactor */
>  	int order;			/* order a direct compactor needs */
>  	int migratetype;		/* migratetype of direct compactor */
>