linux-kernel - [PATCH RFC 11/11] mm/page_alloc: Add support for ASI-unmapping pages

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250313-asi-page-alloc-v1-11-04972e046cea@google.com>
Date: Thu, 13 Mar 2025 18:11:30 +0000
From: Brendan Jackman <jackmanb@...gle.com>
To: Thomas Gleixner <tglx@...utronix.de>, Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>, 
	Dave Hansen <dave.hansen@...ux.intel.com>, x86@...nel.org, 
	Andrew Morton <akpm@...ux-foundation.org>, David Rientjes <rientjes@...gle.com>, 
	Vlastimil Babka <vbabka@...e.cz>, David Hildenbrand <david@...hat.com>
Cc: linux-kernel@...r.kernel.org, linux-mm@...ck.org, 
	Mike Rapoport <rppt@...nel.org>, Junaid Shahid <junaids@...gle.com>, Reiji Watanabe <reijiw@...gle.com>, 
	Patrick Bellasi <derkling@...gle.com>, Brendan Jackman <jackmanb@...gle.com>, 
	Yosry Ahmed <yosry.ahmed@...ux.dev>
Subject: [PATCH RFC 11/11] mm/page_alloc: Add support for ASI-unmapping pages

While calling asi_map() is pretty easy, to unmap pages we need to ensure
a TLB shootdown is complete before we allow them to be allocated.

Therefore, treat this as a special case of buddy allocation. Allocate an
entire block, release the zone lock and enable interrupts, then do the
unmap and TLB shootdown. Once that's complete, return any unwanted pages
within the block to the freelists.

Signed-off-by: Brendan Jackman <jackmanb@...gle.com>
---
 mm/internal.h   |  5 ++++
 mm/page_alloc.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 86 insertions(+), 7 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index b82ab345fb994b7c4971e550556e24bb68f315f6..7904be86fa2c7fded62c100d84fe572c15407ccf 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1189,6 +1189,11 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
 #endif
 #define ALLOC_HIGHATOMIC	0x200 /* Allows access to MIGRATE_HIGHATOMIC */
 #define ALLOC_KSWAPD		0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
+#ifdef CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION
+#define ALLOC_ASI_UNMAP	       0x1000 /* allow asi_unmap(), requiring TLB shootdown. */
+#else
+#define ALLOC_ASI_UNMAP		0x0
+#endif
 
 /* Flags that allow allocations below the min watermark. */
 #define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0d8bbad8675c99282f308c4a4122d5d9c4b14dae..9ac883d7a71387d291bc04bad675e2545dd7ba0f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1627,6 +1627,7 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
  * The other migratetypes do not have fallbacks.
  *
  * Note there are no fallbacks from sensitive to nonsensitive migratetypes.
+ * That's instead handled as a totally special case in __rmqueue_asi_unmap().
  */
 #ifdef CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION
 #define TERMINATE_FALLBACK -1
@@ -2790,7 +2791,77 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z,
 #endif
 }
 
-static __always_inline
+#ifdef CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION
+/*
+ * Allocate a page by converting some memory to sensitive, by doing an ASI
+ * unmap. This can't be done via __rmqueue_fallback because that unmap requires
+ * a TLB flush which can only be done with interrupts on.
+ */
+static inline
+struct page *__rmqueue_asi_unmap(struct zone *zone, unsigned int request_order,
+				 unsigned int alloc_flags, int migratetype)
+{
+	struct page *page;
+	int alloc_order;
+	int i;
+
+	lockdep_assert_irqs_enabled();
+
+	if (!(alloc_flags & ALLOC_ASI_UNMAP))
+		return NULL;
+
+	VM_WARN_ON_ONCE(migratetype == MIGRATE_UNMOVABLE_NONSENSITIVE);
+
+	/*
+	 * Need to unmap a whole pageblock (otherwise it might require
+	 * allocating pagetables). Can't do that with the zone lock held, but we
+	 * also can't flip the block's migratetype until the flush is complete,
+	 * otherwise any concurrent sensitive allocations could momentarily leak
+	 * data into the restricted address space. As a simple workaround,
+	 * "allocate" at least the whole block, unmap it (with IRQs enabled),
+	 * then free any remainder of the block again.
+	 *
+	 * An alternative to this could be to synchronize an intermediate state
+	 * on the pageblock (since since this code can't be called in an IRQ,
+	 * this shouldn't be too bad - it's likely OK to just busy-wait until
+	 * any conurrent TLB flush completes.).
+	 */
+
+	alloc_order = max(request_order, pageblock_order);
+	spin_lock_irq(&zone->lock);
+	page = __rmqueue_smallest(zone, alloc_order, MIGRATE_UNMOVABLE_NONSENSITIVE);
+	spin_unlock_irq(&zone->lock);
+	if (!page)
+		return NULL;
+
+	asi_unmap(page, 1 << alloc_order);
+
+	change_pageblock_range(page, alloc_order, migratetype);
+
+	if (request_order >= alloc_order)
+		return page;
+
+	spin_lock_irq(&zone->lock);
+	for (i = request_order; i < alloc_order; i++) {
+		struct page *page_to_free = page + (1 << i);
+
+		__free_one_page(page_to_free, page_to_pfn(page_to_free), zone, i,
+			migratetype, FPI_SKIP_REPORT_NOTIFY);
+	}
+	spin_unlock_irq(&zone->lock);
+
+	return page;
+}
+#else
+static inline
+struct page *__rmqueue_asi_unmap(struct zone *zone, unsigned int order,
+				unsigned int alloc_flags, int migratetype)
+{
+	return NULL;
+}
+#endif
+
+static noinline
 struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
 			   unsigned int order, unsigned int alloc_flags,
 			   int migratetype)
@@ -2814,13 +2885,14 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
 			 */
 			if (!page && (alloc_flags & (ALLOC_OOM|ALLOC_NON_BLOCK)))
 				page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
-
-			if (!page) {
-				spin_unlock_irqrestore(&zone->lock, flags);
-				return NULL;
-			}
 		}
 		spin_unlock_irqrestore(&zone->lock, flags);
+
+		if (!page)
+			page = __rmqueue_asi_unmap(zone, order, alloc_flags, migratetype);
+
+		if (!page)
+			return NULL;
 	} while (check_new_pages(page, order));
 
 	__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
@@ -3356,6 +3428,8 @@ static inline unsigned int gfp_to_alloc_flags_cma(gfp_t gfp_mask,
 	if (gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
 		alloc_flags |= ALLOC_CMA;
 #endif
+	if (gfp_mask & __GFP_SENSITIVE && gfpflags_allow_blocking(gfp_mask))
+		alloc_flags |= ALLOC_ASI_UNMAP;
 	return alloc_flags;
 }
 
@@ -4382,7 +4456,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	reserve_flags = __gfp_pfmemalloc_flags(gfp_mask);
 	if (reserve_flags)
 		alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, reserve_flags) |
-					  (alloc_flags & ALLOC_KSWAPD);
+					  (alloc_flags & (ALLOC_KSWAPD | ALLOC_ASI_UNMAP));
 
 	/*
 	 * Reset the nodemask and zonelist iterators if memory policies can be

-- 
2.49.0.rc1.451.g8f38331e32-goog