linux-kernel - [PATCH 6/6] mm/page_alloc: Reduce duration that IRQs are disabled for VM counters

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-Id: <20210329120648.19040-7-mgorman@techsingularity.net>
Date:   Mon, 29 Mar 2021 13:06:48 +0100
From:   Mel Gorman <mgorman@...hsingularity.net>
To:     Linux-MM <linux-mm@...ck.org>
Cc:     Linux-RT-Users <linux-rt-users@...r.kernel.org>,
        LKML <linux-kernel@...r.kernel.org>,
        Chuck Lever <chuck.lever@...cle.com>,
        Jesper Dangaard Brouer <brouer@...hat.com>,
        Matthew Wilcox <willy@...radead.org>,
        Mel Gorman <mgorman@...hsingularity.net>
Subject: [PATCH 6/6] mm/page_alloc: Reduce duration that IRQs are disabled for VM counters

IRQs are left disabled for the zone and node VM event counters. On some
architectures this is unnecessary and it confuses what the scope of the
locking for per-cpu lists and VM counters are.

This patch reduces the scope of IRQs being disabled via local_[lock|unlock]
and relies on preemption disabling for the per-cpu counters. This
is not completely free on all architectures as architectures
without HAVE_CMPXCHG_DOUBLE will disable/enable IRQs again for the
mod_zone_freepage_state call. However, it clarifies what the per-cpu
pages lock protects and how zone stats may need IRQs disabled if ever
called from an IRQ context.

Signed-off-by: Mel Gorman <mgorman@...hsingularity.net>
---
 mm/page_alloc.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 32c64839c145..25d9351e75d8 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3461,11 +3461,17 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
 	pcp = this_cpu_ptr(zone->per_cpu_pageset);
 	list = &pcp->lists[migratetype];
 	page = __rmqueue_pcplist(zone,  migratetype, alloc_flags, pcp, list);
+	local_unlock_irqrestore(&pagesets.lock, flags);
 	if (page) {
+		/*
+		 * per-cpu counter updates are not preempt-safe but is
+		 * acceptable to race versus interrupts.
+		 */
+		preempt_disable();
 		__count_zid_vm_events(PGALLOC, page_zonenum(page), 1);
 		zone_statistics(preferred_zone, zone, 1);
+		preempt_enable();
 	}
-	local_unlock_irqrestore(&pagesets.lock, flags);
 	return page;
 }
 
@@ -3517,15 +3523,17 @@ struct page *rmqueue(struct zone *preferred_zone,
 		if (!page)
 			page = __rmqueue(zone, order, migratetype, alloc_flags);
 	} while (page && check_new_pages(page, order));
-	spin_unlock(&zone->lock);
+	spin_unlock_irqrestore(&zone->lock, flags);
+
 	if (!page)
 		goto failed;
+
+	preempt_disable();
 	__mod_zone_freepage_state(zone, -(1 << order),
 				  get_pcppage_migratetype(page));
-
 	__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
 	zone_statistics(preferred_zone, zone, 1);
-	local_irq_restore(flags);
+	preempt_enable();
 
 out:
 	/* Separate test+clear to avoid unnecessary atomics */
@@ -5090,10 +5098,12 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 		nr_populated++;
 	}
 
+	local_unlock_irqrestore(&pagesets.lock, flags);
+
+	preempt_disable();
 	__count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account);
 	zone_statistics(ac.preferred_zoneref->zone, zone, nr_account);
-
-	local_unlock_irqrestore(&pagesets.lock, flags);
+	preempt_enable();
 
 	return nr_populated;
 
-- 
2.26.2