lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250220052027.58847-23-byungchul@sk.com>
Date: Thu, 20 Feb 2025 14:20:23 +0900
From: Byungchul Park <byungchul@...com>
To: linux-kernel@...r.kernel.org,
	linux-mm@...ck.org
Cc: kernel_team@...ynix.com,
	akpm@...ux-foundation.org,
	ying.huang@...el.com,
	vernhao@...cent.com,
	mgorman@...hsingularity.net,
	hughd@...gle.com,
	willy@...radead.org,
	david@...hat.com,
	peterz@...radead.org,
	luto@...nel.org,
	tglx@...utronix.de,
	mingo@...hat.com,
	bp@...en8.de,
	dave.hansen@...ux.intel.com,
	rjgolo@...il.com
Subject: [RFC PATCH v12 22/26] mm/page_alloc: not allow to tlb shootdown if !preemptable() && non_luf_pages_ok()

Do not perform tlb shootdown if the context is in preempt disable and
there are already enough non luf pages, not to hurt preemptibility.

Signed-off-by: Byungchul Park <byungchul@...com>
---
 mm/compaction.c     |  6 +++---
 mm/internal.h       |  5 +++--
 mm/page_alloc.c     | 27 +++++++++++++++------------
 mm/page_isolation.c |  2 +-
 mm/page_reporting.c |  4 ++--
 5 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/mm/compaction.c b/mm/compaction.c
index a7f17867decae..8fa9de6db2441 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -605,7 +605,7 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
 
 	page = pfn_to_page(blockpfn);
 
-	luf_takeoff_start();
+	luf_takeoff_start(cc->zone);
 	/* Isolate free pages. */
 	for (; blockpfn < end_pfn; blockpfn += stride, page += stride) {
 		int isolated;
@@ -1601,7 +1601,7 @@ static void fast_isolate_freepages(struct compact_control *cc)
 		if (!area->nr_free)
 			continue;
 
-		can_shootdown = luf_takeoff_start();
+		can_shootdown = luf_takeoff_start(cc->zone);
 		spin_lock_irqsave(&cc->zone->lock, flags);
 		freelist = &area->free_list[MIGRATE_MOVABLE];
 retry:
@@ -2413,7 +2413,7 @@ static enum compact_result compact_finished(struct compact_control *cc)
 	 * luf_takeoff_{start,end}() is required to identify whether
 	 * this compaction context is tlb shootdownable for luf'd pages.
 	 */
-	luf_takeoff_start();
+	luf_takeoff_start(cc->zone);
 	ret = __compact_finished(cc);
 	luf_takeoff_end(cc->zone);
 
diff --git a/mm/internal.h b/mm/internal.h
index e634eaf220f00..fba19c283ac48 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1594,7 +1594,7 @@ static inline void accept_page(struct page *page)
 #endif /* CONFIG_UNACCEPTED_MEMORY */
 #if defined(CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH)
 extern struct luf_batch luf_batch[];
-bool luf_takeoff_start(void);
+bool luf_takeoff_start(struct zone *zone);
 void luf_takeoff_end(struct zone *zone);
 bool luf_takeoff_no_shootdown(void);
 bool luf_takeoff_check(struct zone *zone, struct page *page);
@@ -1608,6 +1608,7 @@ static inline bool non_luf_pages_ok(struct zone *zone)
 
 	return nr_free - nr_luf_pages > min_wm;
 }
+
 unsigned short fold_unmap_luf(void);
 
 /*
@@ -1694,7 +1695,7 @@ static inline bool can_luf_vma(struct vm_area_struct *vma)
 	return true;
 }
 #else /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
-static inline bool luf_takeoff_start(void) { return false; }
+static inline bool luf_takeoff_start(struct zone *zone) { return false; }
 static inline void luf_takeoff_end(struct zone *zone) {}
 static inline bool luf_takeoff_no_shootdown(void) { return true; }
 static inline bool luf_takeoff_check(struct zone *zone, struct page *page) { return true; }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b81931c6f2cfd..ccbe49b78190a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -623,22 +623,25 @@ compaction_capture(struct capture_control *capc, struct page *page,
 #endif /* CONFIG_COMPACTION */
 
 #if defined(CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH)
-static bool no_shootdown_context(void)
+static bool no_shootdown_context(struct zone *zone)
 {
 	/*
-	 * If it performs with irq disabled, that might cause a deadlock.
-	 * Avoid tlb shootdown in this case.
+	 * Tries to avoid tlb shootdown if !preemptible().  However, it
+	 * should be allowed under heavy memory pressure.
 	 */
+	if (zone && non_luf_pages_ok(zone))
+		return !(preemptible() && in_task());
+
 	return !(!irqs_disabled() && in_task());
 }
 
 /*
  * Can be called with zone lock released and irq enabled.
  */
-bool luf_takeoff_start(void)
+bool luf_takeoff_start(struct zone *zone)
 {
 	unsigned long flags;
-	bool no_shootdown = no_shootdown_context();
+	bool no_shootdown = no_shootdown_context(zone);
 
 	local_irq_save(flags);
 
@@ -2588,7 +2591,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
 		 * luf_takeoff_{start,end}() is required for
 		 * get_page_from_free_area() to use luf_takeoff_check().
 		 */
-		luf_takeoff_start();
+		luf_takeoff_start(zone);
 		spin_lock_irqsave(&zone->lock, flags);
 		for (order = 0; order < NR_PAGE_ORDERS; order++) {
 			struct free_area *area = &(zone->free_area[order]);
@@ -2829,7 +2832,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 	unsigned long flags;
 	int i;
 
-	luf_takeoff_start();
+	luf_takeoff_start(zone);
 	spin_lock_irqsave(&zone->lock, flags);
 	for (i = 0; i < count; ++i) {
 		struct page *page = __rmqueue(zone, order, migratetype,
@@ -3455,7 +3458,7 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
 
 	do {
 		page = NULL;
-		luf_takeoff_start();
+		luf_takeoff_start(zone);
 		spin_lock_irqsave(&zone->lock, flags);
 		if (alloc_flags & ALLOC_HIGHATOMIC)
 			page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
@@ -3600,7 +3603,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
 	struct page *page;
 	unsigned long __maybe_unused UP_flags;
 
-	luf_takeoff_start();
+	luf_takeoff_start(NULL);
 	/* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */
 	pcp_trylock_prepare(UP_flags);
 	pcp = pcp_spin_trylock(zone->per_cpu_pageset);
@@ -5229,7 +5232,7 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
 	if (unlikely(!zone))
 		goto failed;
 
-	luf_takeoff_start();
+	luf_takeoff_start(NULL);
 	/* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */
 	pcp_trylock_prepare(UP_flags);
 	pcp = pcp_spin_trylock(zone->per_cpu_pageset);
@@ -7418,7 +7421,7 @@ unsigned long __offline_isolated_pages(unsigned long start_pfn,
 
 	offline_mem_sections(pfn, end_pfn);
 	zone = page_zone(pfn_to_page(pfn));
-	luf_takeoff_start();
+	luf_takeoff_start(zone);
 	spin_lock_irqsave(&zone->lock, flags);
 	while (pfn < end_pfn) {
 		page = pfn_to_page(pfn);
@@ -7536,7 +7539,7 @@ bool take_page_off_buddy(struct page *page)
 	unsigned int order;
 	bool ret = false;
 
-	luf_takeoff_start();
+	luf_takeoff_start(zone);
 	spin_lock_irqsave(&zone->lock, flags);
 	for (order = 0; order < NR_PAGE_ORDERS; order++) {
 		struct page *page_head = page - (pfn & ((1 << order) - 1));
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index eae33d188762b..ccd36838f9cff 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -211,7 +211,7 @@ static void unset_migratetype_isolate(struct page *page, int migratetype)
 	struct page *buddy;
 
 	zone = page_zone(page);
-	luf_takeoff_start();
+	luf_takeoff_start(zone);
 	spin_lock_irqsave(&zone->lock, flags);
 	if (!is_migrate_isolate_page(page))
 		goto out;
diff --git a/mm/page_reporting.c b/mm/page_reporting.c
index b23d3ed34ec07..83b66e7f0d257 100644
--- a/mm/page_reporting.c
+++ b/mm/page_reporting.c
@@ -170,7 +170,7 @@ page_reporting_cycle(struct page_reporting_dev_info *prdev, struct zone *zone,
 	if (free_area_empty(area, mt))
 		return err;
 
-	can_shootdown = luf_takeoff_start();
+	can_shootdown = luf_takeoff_start(zone);
 	spin_lock_irq(&zone->lock);
 
 	/*
@@ -250,7 +250,7 @@ page_reporting_cycle(struct page_reporting_dev_info *prdev, struct zone *zone,
 		/* update budget to reflect call to report function */
 		budget--;
 
-		luf_takeoff_start();
+		luf_takeoff_start(zone);
 
 		/* reacquire zone lock and resume processing */
 		spin_lock_irq(&zone->lock);
-- 
2.17.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ