[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250506133207.1009676-1-kirill.shutemov@linux.intel.com>
Date: Tue, 6 May 2025 16:32:07 +0300
From: "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
To: bp@...en8.de
Cc: akpm@...ux-foundation.org,
ast@...nel.org,
david@...hat.com,
hannes@...xchg.org,
jackmanb@...gle.com,
kirill.shutemov@...ux.intel.com,
linux-coco@...ts.linux.dev,
linux-kernel@...r.kernel.org,
linux-mm@...ck.org,
mhocko@...e.com,
stable@...r.kernel.org,
surenb@...gle.com,
tglx@...utronix.de,
vbabka@...e.cz
Subject: [PATCHv2] mm/page_alloc: Fix race condition in unaccepted memory handling
The page allocator tracks the number of zones that have unaccepted
memory using static_branch_enc/dec() and uses that static branch in hot
paths to determine if it needs to deal with unaccepted memory.
Borislav and Thomas pointed out that the tracking is racy: operations on
static_branch are not serialized against adding/removing unaccepted pages
to/from the zone.
Sanity checks inside static_branch machinery detects it:
WARNING: CPU: 0 PID: 10 at kernel/jump_label.c:276 __static_key_slow_dec_cpuslocked+0x8e/0xa0
The comment around the WARN() explains the problem:
/*
* Warn about the '-1' case though; since that means a
* decrement is concurrent with a first (0->1) increment. IOW
* people are trying to disable something that wasn't yet fully
* enabled. This suggests an ordering problem on the user side.
*/
The effect of this static_branch optimization is only visible on
microbenchmark.
Instead of adding more complexity around it, remove it altogether.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@...ux.intel.com>
Fixes: dcdfdd40fa82 ("mm: Add support for unaccepted memory")
Link: https://lore.kernel.org/all/20250506092445.GBaBnVXXyvnazly6iF@fat_crate.local
Reported-by: Borislav Petkov <bp@...en8.de>
Tested-by: Borislav Petkov (AMD) <bp@...en8.de>
Reported-by: Thomas Gleixner <tglx@...utronix.de>
Cc: stable@...r.kernel.org # v6.5+
Cc: Andrew Morton <akpm@...ux-foundation.org>
Cc: Vlastimil Babka <vbabka@...e.cz>
Cc: Suren Baghdasaryan <surenb@...gle.com>
Cc: Michal Hocko <mhocko@...e.com>
Cc: Brendan Jackman <jackmanb@...gle.com>
Cc: Johannes Weiner <hannes@...xchg.org>
---
v2:
- Update commit message;
- Apply Borislav's Tested-by tag;
---
mm/internal.h | 1 -
mm/mm_init.c | 1 -
mm/page_alloc.c | 47 -----------------------------------------------
3 files changed, 49 deletions(-)
diff --git a/mm/internal.h b/mm/internal.h
index e9695baa5922..50c2f590b2d0 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1595,7 +1595,6 @@ unsigned long move_page_tables(struct pagetable_move_control *pmc);
#ifdef CONFIG_UNACCEPTED_MEMORY
void accept_page(struct page *page);
-void unaccepted_cleanup_work(struct work_struct *work);
#else /* CONFIG_UNACCEPTED_MEMORY */
static inline void accept_page(struct page *page)
{
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 9659689b8ace..84f14fa12d0d 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1441,7 +1441,6 @@ static void __meminit zone_init_free_lists(struct zone *zone)
#ifdef CONFIG_UNACCEPTED_MEMORY
INIT_LIST_HEAD(&zone->unaccepted_pages);
- INIT_WORK(&zone->unaccepted_cleanup, unaccepted_cleanup_work);
#endif
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5fccf5fce084..a4a4df2daedb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7175,16 +7175,8 @@ bool has_managed_dma(void)
#ifdef CONFIG_UNACCEPTED_MEMORY
-/* Counts number of zones with unaccepted pages. */
-static DEFINE_STATIC_KEY_FALSE(zones_with_unaccepted_pages);
-
static bool lazy_accept = true;
-void unaccepted_cleanup_work(struct work_struct *work)
-{
- static_branch_dec(&zones_with_unaccepted_pages);
-}
-
static int __init accept_memory_parse(char *p)
{
if (!strcmp(p, "lazy")) {
@@ -7209,11 +7201,7 @@ static bool page_contains_unaccepted(struct page *page, unsigned int order)
static void __accept_page(struct zone *zone, unsigned long *flags,
struct page *page)
{
- bool last;
-
list_del(&page->lru);
- last = list_empty(&zone->unaccepted_pages);
-
account_freepages(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
__mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES);
__ClearPageUnaccepted(page);
@@ -7222,28 +7210,6 @@ static void __accept_page(struct zone *zone, unsigned long *flags,
accept_memory(page_to_phys(page), PAGE_SIZE << MAX_PAGE_ORDER);
__free_pages_ok(page, MAX_PAGE_ORDER, FPI_TO_TAIL);
-
- if (last) {
- /*
- * There are two corner cases:
- *
- * - If allocation occurs during the CPU bring up,
- * static_branch_dec() cannot be used directly as
- * it causes a deadlock on cpu_hotplug_lock.
- *
- * Instead, use schedule_work() to prevent deadlock.
- *
- * - If allocation occurs before workqueues are initialized,
- * static_branch_dec() should be called directly.
- *
- * Workqueues are initialized before CPU bring up, so this
- * will not conflict with the first scenario.
- */
- if (system_wq)
- schedule_work(&zone->unaccepted_cleanup);
- else
- unaccepted_cleanup_work(&zone->unaccepted_cleanup);
- }
}
void accept_page(struct page *page)
@@ -7280,20 +7246,12 @@ static bool try_to_accept_memory_one(struct zone *zone)
return true;
}
-static inline bool has_unaccepted_memory(void)
-{
- return static_branch_unlikely(&zones_with_unaccepted_pages);
-}
-
static bool cond_accept_memory(struct zone *zone, unsigned int order,
int alloc_flags)
{
long to_accept, wmark;
bool ret = false;
- if (!has_unaccepted_memory())
- return false;
-
if (list_empty(&zone->unaccepted_pages))
return false;
@@ -7331,22 +7289,17 @@ static bool __free_unaccepted(struct page *page)
{
struct zone *zone = page_zone(page);
unsigned long flags;
- bool first = false;
if (!lazy_accept)
return false;
spin_lock_irqsave(&zone->lock, flags);
- first = list_empty(&zone->unaccepted_pages);
list_add_tail(&page->lru, &zone->unaccepted_pages);
account_freepages(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
__mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES);
__SetPageUnaccepted(page);
spin_unlock_irqrestore(&zone->lock, flags);
- if (first)
- static_branch_inc(&zones_with_unaccepted_pages);
-
return true;
}
--
2.47.2
Powered by blists - more mailing lists