linux-kernel - Re: [PATCH 2/2] mm/page_alloc: Allow high-order pages to be stored on the per-cpu lists

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20210603123437.GW30378@techsingularity.net>
Date:   Thu, 3 Jun 2021 13:34:37 +0100
From:   Mel Gorman <mgorman@...hsingularity.net>
To:     Vlastimil Babka <vbabka@...e.cz>
Cc:     Andrew Morton <akpm@...ux-foundation.org>,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        Michal Hocko <mhocko@...nel.org>,
        Jesper Dangaard Brouer <brouer@...hat.com>,
        LKML <linux-kernel@...r.kernel.org>,
        Linux-MM <linux-mm@...ck.org>
Subject: Re: [PATCH 2/2] mm/page_alloc: Allow high-order pages to be stored
 on the per-cpu lists

On Thu, Jun 03, 2021 at 01:12:10PM +0200, Vlastimil Babka wrote:
> > From a functional point of view, a patch like this is necessary to
> > make bulk allocation of high-order pages work with similar performance
> > to order-0 bulk allocations. The bulk allocator is not updated in this
> > series as it would have to be determined by bulk allocation users how
> > they want to track the order of pages allocated with the bulk allocator.
> > 
> > Signed-off-by: Mel Gorman <mgorman@...hsingularity.net>
> 
> Acked-by: Vlastimil Babka <vbabka@...e.cz>
> 

Thanks!

> > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> > index 0ed61f32d898..1ceaa5f44db6 100644
> > --- a/include/linux/mmzone.h
> > +++ b/include/linux/mmzone.h
> > @@ -333,6 +333,24 @@ enum zone_watermarks {
> >  	NR_WMARK
> >  };
> >  
> > +/*
> > + * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER plus one additional
> > + * for pageblock size for THP if configured.
> > + */
> > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> > +#define NR_PCP_THP 1
> > +#else
> > +#define NR_PCP_THP 0
> > +#endif
> > +#define NR_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1 + NR_PCP_THP))
> > +
> > +/*
> > + * Shift to encode migratetype in order in the least significant bits and
> > + * migratetype in the higher bits.
> 
> Hard for me to understand that comment. I would describe what the code does as
> e,g, "Shift to encode migratetype and order in the same integer, with order in
> the least significant bit ..." etc.

That comment is a lot better :)

> 
> > + */
> > +#define NR_PCP_ORDER_SHIFT 8
> 
> Also ORDER_SHIFT is a bit misnomer, it's more precisely an ORDER_WIDTH, and we
> are shifting migratetype with it, not order. I'm just comparing with how we name
> nid/zid/etc bits in page flags.
> 

Ok, I can change that.

> > @@ -687,10 +687,53 @@ static void bad_page(struct page *page, const char *reason)
> >  	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
> >  }
> >  
> > +static inline unsigned int order_to_pindex(int migratetype, int order)
> > +{
> > +	int base = order;
> > +
> > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> > +	if (order > PAGE_ALLOC_COSTLY_ORDER) {
> > +		VM_BUG_ON(order != pageblock_order);
> > +		base = PAGE_ALLOC_COSTLY_ORDER + 1;
> > +	}
> > +#else
> > +	VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER);
> > +#endif
> > +
> > +	return (MIGRATE_PCPTYPES * base) + migratetype;
> > +}
> > +
> > +static inline int pindex_to_order(unsigned int pindex)
> > +{
> > +	int order = pindex / PAGE_ALLOC_COSTLY_ORDER;
> 
> This seems wrong, shouldn't we divide by MIGRATE_PCPTYPES?
> It just happens to be the same number, so testing won't flag this.
> 

Whoops, yes, basic algebra says you're right so well spotted. If
PAGE_ALLOC_COSTLY_ORDER ever changed for some reason, this would blow up.

So this?


diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index bbe05289b121..f1bed5b847ec 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -345,11 +345,11 @@ enum zone_watermarks {
 #define NR_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1 + NR_PCP_THP))
 
 /*
- * Shift to encode migratetype in order in the least significant bits and
- * migratetype in the higher bits.
+ * Shift to encode migratetype and order in the same integer, with order
+ * in the least significant bits.
  */
-#define NR_PCP_ORDER_SHIFT 8
-#define NR_PCP_ORDER_MASK ((1<<NR_PCP_ORDER_SHIFT) - 1)
+#define NR_PCP_ORDER_WIDTH 8
+#define NR_PCP_ORDER_MASK ((1<<NR_PCP_ORDER_WIDTH) - 1)
 
 #define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost)
 #define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 49f3c7dcdfca..7be71f9ad9f0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -694,7 +694,7 @@ static inline unsigned int order_to_pindex(int migratetype, int order)
 
 static inline int pindex_to_order(unsigned int pindex)
 {
-	int order = pindex / PAGE_ALLOC_COSTLY_ORDER;
+	int order = pindex / MIGRATE_PCPTYPES;
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	if (order > PAGE_ALLOC_COSTLY_ORDER) {
@@ -1487,7 +1487,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 			batch_free = count;
 
 		order = pindex_to_order(pindex);
-		BUILD_BUG_ON(MAX_ORDER >= (1<<NR_PCP_ORDER_SHIFT));
+		BUILD_BUG_ON(MAX_ORDER >= (1<<NR_PCP_ORDER_WIDTH));
 		do {
 			page = list_last_entry(list, struct page, lru);
 			/* must delete to avoid corrupting pcp list */
@@ -1499,7 +1499,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 				continue;
 
 			/* Encode order with the migratetype */
-			page->index <<= NR_PCP_ORDER_SHIFT;
+			page->index <<= NR_PCP_ORDER_WIDTH;
 			page->index |= order;
 
 			list_add_tail(&page->lru, &head);
@@ -1537,7 +1537,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 
 		/* mt has been encoded with the order (see above) */
 		order = mt & NR_PCP_ORDER_MASK;
-		mt >>= NR_PCP_ORDER_SHIFT;
+		mt >>= NR_PCP_ORDER_WIDTH;
 
 		/* MIGRATE_ISOLATE page should not go to pcplists */
 		VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);



-- 
Mel Gorman
SUSE Labs