linux-kernel - Re: [PATCH 5/6] mm/page_alloc: Free pages in a single pass during bulk free

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <Yg83rppnpmXPbln4@ziqianlu-nuc9qn>
Date:   Fri, 18 Feb 2022 14:07:42 +0800
From:   Aaron Lu <aaron.lu@...el.com>
To:     Mel Gorman <mgorman@...hsingularity.net>
CC:     Andrew Morton <akpm@...ux-foundation.org>,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        Vlastimil Babka <vbabka@...e.cz>,
        Michal Hocko <mhocko@...nel.org>,
        Jesper Dangaard Brouer <brouer@...hat.com>,
        LKML <linux-kernel@...r.kernel.org>,
        Linux-MM <linux-mm@...ck.org>
Subject: Re: [PATCH 5/6] mm/page_alloc: Free pages in a single pass during
 bulk free

On Thu, Feb 17, 2022 at 12:22:26AM +0000, Mel Gorman wrote:

... ...

> ---
>  mm/page_alloc.c | 56 +++++++++++++++++++------------------------------
>  1 file changed, 21 insertions(+), 35 deletions(-)
> 
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 635a4e0f70b4..68e2132717c5 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1455,8 +1455,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
>  	unsigned int order;
>  	int prefetch_nr = READ_ONCE(pcp->batch);
>  	bool isolated_pageblocks;
> -	struct page *page, *tmp;
> -	LIST_HEAD(head);
> +	struct page *page;
>  
>  	/*
>  	 * Ensure proper count is passed which otherwise would stuck in the
> @@ -1467,6 +1466,13 @@ static void free_pcppages_bulk(struct zone *zone, int count,
>  	/* Ensure requested pindex is drained first. */
>  	pindex = pindex - 1;
>  
> +	/*
> +	 * local_lock_irq held so equivalent to spin_lock_irqsave for
> +	 * both PREEMPT_RT and non-PREEMPT_RT configurations.
> +	 */
> +	spin_lock(&zone->lock);
> +	isolated_pageblocks = has_isolate_pageblock(zone);
> +
>  	while (count > 0) {
>  		struct list_head *list;
>  		int nr_pages;
> @@ -1489,7 +1495,11 @@ static void free_pcppages_bulk(struct zone *zone, int count,
>  		nr_pages = 1 << order;
>  		BUILD_BUG_ON(MAX_ORDER >= (1<<NR_PCP_ORDER_WIDTH));
>  		do {
> +			int mt;
> +
>  			page = list_last_entry(list, struct page, lru);
> +			mt = get_pcppage_migratetype(page);
> +
>  			/* must delete to avoid corrupting pcp list */
>  			list_del(&page->lru);
>  			count -= nr_pages;
> @@ -1498,12 +1508,6 @@ static void free_pcppages_bulk(struct zone *zone, int count,
>  			if (bulkfree_pcp_prepare(page))
>  				continue;
>  
> -			/* Encode order with the migratetype */
> -			page->index <<= NR_PCP_ORDER_WIDTH;
> -			page->index |= order;
> -
> -			list_add_tail(&page->lru, &head);
> -
>  			/*
>  			 * We are going to put the page back to the global
>  			 * pool, prefetch its buddy to speed up later access
> @@ -1517,36 +1521,18 @@ static void free_pcppages_bulk(struct zone *zone, int count,
>  				prefetch_buddy(page, order);
>  				prefetch_nr--;
>  			}

The comment above 'if (prefetch_nr)' says: "We are going to put the page
back to the global pool, prefetch its buddy to speed up later access
under zone->lock..." will have to be modified as the prefetch is now
done inside the lock.

I remember prefetch_buddy()'s original intent is to fetch the buddy
page's 'struct page' before acquiring the zone lock to speed up
operations inside the locked region. Now that the zone lock is acquired
early, whether to still keep the prefetch_buddy() inside the lock
becomes questionable.

After the nr_task=4/16/64 tests finished, I'll also test the effect of
removing prefetch_buddy() here.

Thanks,
Aaron

> -		} while (count > 0 && !list_empty(list));
> -	}
>  
> -	/*
> -	 * local_lock_irq held so equivalent to spin_lock_irqsave for
> -	 * both PREEMPT_RT and non-PREEMPT_RT configurations.
> -	 */
> -	spin_lock(&zone->lock);
> -	isolated_pageblocks = has_isolate_pageblock(zone);
> +			/* MIGRATE_ISOLATE page should not go to pcplists */
> +			VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
> +			/* Pageblock could have been isolated meanwhile */
> +			if (unlikely(isolated_pageblocks))
> +				mt = get_pageblock_migratetype(page);
>  
> -	/*
> -	 * Use safe version since after __free_one_page(),
> -	 * page->lru.next will not point to original list.
> -	 */
> -	list_for_each_entry_safe(page, tmp, &head, lru) {
> -		int mt = get_pcppage_migratetype(page);
> -
> -		/* mt has been encoded with the order (see above) */
> -		order = mt & NR_PCP_ORDER_MASK;
> -		mt >>= NR_PCP_ORDER_WIDTH;
> -
> -		/* MIGRATE_ISOLATE page should not go to pcplists */
> -		VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
> -		/* Pageblock could have been isolated meanwhile */
> -		if (unlikely(isolated_pageblocks))
> -			mt = get_pageblock_migratetype(page);
> -
> -		__free_one_page(page, page_to_pfn(page), zone, order, mt, FPI_NONE);
> -		trace_mm_page_pcpu_drain(page, order, mt);
> +			__free_one_page(page, page_to_pfn(page), zone, order, mt, FPI_NONE);
> +			trace_mm_page_pcpu_drain(page, order, mt);
> +		} while (count > 0 && !list_empty(list));
>  	}
> +
>  	spin_unlock(&zone->lock);
>  }
>  
> -- 
> 2.31.1
>