[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250924-b4-asi-page-alloc-v1-19-2d861768041f@google.com>
Date: Wed, 24 Sep 2025 14:59:54 +0000
From: Brendan Jackman <jackmanb@...gle.com>
To: jackmanb@...gle.com, Andy Lutomirski <luto@...nel.org>,
Lorenzo Stoakes <lorenzo.stoakes@...cle.com>, "Liam R. Howlett" <Liam.Howlett@...cle.com>,
Suren Baghdasaryan <surenb@...gle.com>, Michal Hocko <mhocko@...e.com>,
Johannes Weiner <hannes@...xchg.org>, Zi Yan <ziy@...dia.com>,
Axel Rasmussen <axelrasmussen@...gle.com>, Yuanchu Xie <yuanchu@...gle.com>,
Roman Gushchin <roman.gushchin@...ux.dev>
Cc: peterz@...radead.org, bp@...en8.de, dave.hansen@...ux.intel.com,
mingo@...hat.com, tglx@...utronix.de, akpm@...ux-foundation.org,
david@...hat.com, derkling@...gle.com, junaids@...gle.com,
linux-kernel@...r.kernel.org, linux-mm@...ck.org, reijiw@...gle.com,
rientjes@...gle.com, rppt@...nel.org, vbabka@...e.cz, x86@...nel.org,
yosry.ahmed@...ux.dev
Subject: [PATCH 19/21] mm/asi: bad_page() when ASI mappings are wrong
Add bad_page() checks that fire when the page allocator thinks a page is
mapped/unmapped in ASI restricted address space, but the pagetables
disagree.
This requires adding an accessor for set_memory.c to walk the page
tables and report the state.
This is implemented with the assumption that the mapping is at pageblock
granularity. That means it doesn't need to be repeated for each order-0
page. As a result of this special order-awareness, it can't go into
free_page_is_bad() and needs to be separately integrated into
free_pages_prepare(). The alloc side is easier - there it just goes into
check_new_pages().
Signed-off-by: Brendan Jackman <jackmanb@...gle.com>
---
arch/x86/include/asm/set_memory.h | 3 +++
arch/x86/mm/pat/set_memory.c | 31 +++++++++++++++++++++++++++
include/linux/set_memory.h | 2 ++
mm/page_alloc.c | 45 ++++++++++++++++++++++++++++++++++-----
4 files changed, 76 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index 396580693e7d1317537148c0c219296e2b7c13fd..3870fa8cf51c0ece0dedf4d7876c4d14111deffd 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -94,12 +94,15 @@ bool kernel_page_present(struct page *page);
#ifdef CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION
int set_direct_map_sensitive(struct page *page, int num_pageblocks, bool sensitive);
+bool direct_map_sensitive(struct page *page);
#else /* CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION */
static inline
int set_direct_map_sensitive(struct page *page, int num_pageblocks, bool sensitive)
{
return 0;
}
+
+static inline bool direct_map_sensitive(struct page *page) { return false; }
#endif /* CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION */
extern int kernel_set_to_readonly;
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 88fb65574d4fa0089fa31a9a06fe096c408991e6..d4c3219374f889f9a60c459f0559e5ffb472073d 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -2721,6 +2721,37 @@ int set_direct_map_sensitive(struct page *page, int num_pageblocks, bool sensiti
return __change_page_attr_set_clr(&cpa, 1);
}
+
+/*
+ * Walk the pagetable to check if the page is mapped into all ASI restricted
+ * address spaces.
+ */
+bool direct_map_sensitive(struct page *page)
+{
+ unsigned long addr = (unsigned long)page_address(page);
+ pgd_t *pgd = pgd_offset_pgd(asi_nonsensitive_pgd, addr);
+ unsigned int level;
+ bool nx, rw;
+ pte_t *pte = lookup_address_in_pgd_attr(pgd, addr, &level, &nx, &rw);
+
+ switch (level) {
+ case PG_LEVEL_4K:
+ /*
+ * lookup_address_in_pgd_attr() still returns the PTE for
+ * non-present 4K pages.
+ */
+ return !pte_present(*pte);
+ case PG_LEVEL_2M:
+ /*
+ * pmd_present() checks PSE to deal with some hugetlb
+ * logic. That's not relevant for the direct map so just
+ * explicitly check the real P bit.
+ */
+ return !(pmd_flags(*(pmd_t *)pte) & _PAGE_PRESENT);
+ default:
+ return !pte;
+ }
+}
#endif /* CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION */
#ifdef CONFIG_DEBUG_PAGEALLOC
diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h
index db4225c046c47c114293af8b504886b103dc94ce..6f42d6a35feceeae4623c2da50cfac54e3533228 100644
--- a/include/linux/set_memory.h
+++ b/include/linux/set_memory.h
@@ -50,6 +50,8 @@ static inline int set_direct_map_sensitive(struct page *page,
return 0;
}
+static inline bool direct_map_sensitive(struct page *page) { return false; }
+
#else /* CONFIG_ARCH_HAS_SET_DIRECT_MAP */
/*
* Some architectures, e.g. ARM64 can disable direct map modifications at
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a8e3556643b0ff2fe1d35a678937270356006d34..68bc3cc5ed7e7f1adb8dda90edc2e001f9a1c3c5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -15,6 +15,7 @@
* (lots of bits borrowed from Ingo Molnar & Andrew Morton)
*/
+#include <linux/asi.h>
#include <linux/stddef.h>
#include <linux/mm.h>
#include <linux/highmem.h>
@@ -1161,6 +1162,33 @@ static const char *page_bad_reason(struct page *page, unsigned long flags)
return bad_reason;
}
+static bool page_asi_mapping_bad(struct page *page, unsigned int order, bool sensitive)
+{
+#ifdef CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION
+ if (asi_enabled_static()) {
+ struct page *block_page = page;
+
+ /*
+ * ASI mappings are at pageblock granularity. Check they match
+ * the requested sensitivity.
+ */
+ while (block_page < page + (1 << order)) {
+ if (direct_map_sensitive(block_page) != sensitive) {
+ bad_page(page,
+ sensitive ?
+ "page unexpectedly nonsensitive" :
+ "page unexpectedly sensitive");
+ return true;
+ }
+
+ block_page += pageblock_nr_pages;
+ }
+ }
+#endif /* CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION */
+
+ return false;
+}
+
static inline bool free_page_is_bad(struct page *page)
{
if (likely(page_expected_state(page, PAGE_FLAGS_CHECK_AT_FREE)))
@@ -1471,8 +1499,14 @@ __always_inline bool free_pages_prepare(struct page *page,
page->page_type = UINT_MAX;
if (is_check_pages_enabled()) {
+ freetype_t ft = get_pageblock_freetype(page);
+
if (free_page_is_bad(page))
bad++;
+
+ if (!bad)
+ bad += page_asi_mapping_bad(page, order,
+ freetype_sensitive(ft));
if (bad)
return false;
}
@@ -1840,7 +1874,8 @@ static bool check_new_page(struct page *page)
return true;
}
-static inline bool check_new_pages(struct page *page, unsigned int order)
+static inline bool check_new_pages(struct page *page, unsigned int order,
+ bool sensitive)
{
if (!is_check_pages_enabled())
return false;
@@ -1852,7 +1887,7 @@ static inline bool check_new_pages(struct page *page, unsigned int order)
return true;
}
- return false;
+ return page_asi_mapping_bad(page, order, sensitive);
}
static inline bool should_skip_kasan_unpoison(gfp_t flags)
@@ -3393,7 +3428,7 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
if (!page)
return NULL;
- } while (check_new_pages(page, order));
+ } while (check_new_pages(page, order, freetype_sensitive(freetype)));
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
zone_statistics(preferred_zone, zone, 1);
@@ -3478,7 +3513,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
page = list_first_entry(list, struct page, pcp_list);
list_del(&page->pcp_list);
pcp->count -= 1 << order;
- } while (check_new_pages(page, order));
+ } while (check_new_pages(page, order, freetype_sensitive(freetype)));
return page;
}
@@ -7231,7 +7266,7 @@ int alloc_contig_range_noprof(unsigned long start, unsigned long end,
} else if (start == outer_start && end == outer_end && is_power_of_2(end - start)) {
struct page *head = pfn_to_page(start);
- check_new_pages(head, order);
+ check_new_pages(head, order, gfp_mask & __GFP_SENSITIVE);
prep_new_page(head, order, gfp_mask, 0);
set_page_refcounted(head);
} else {
--
2.50.1
Powered by blists - more mailing lists