[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAJuCfpELB-LX55qZYBfrSVx1+EHE0TWLiHd2AUuU1BK-JTWntQ@mail.gmail.com>
Date: Wed, 15 Oct 2025 11:54:19 -0700
From: Suren Baghdasaryan <surenb@...gle.com>
To: Vlastimil Babka <vbabka@...e.cz>
Cc: Andrew Morton <akpm@...ux-foundation.org>, Michal Hocko <mhocko@...e.com>,
Brendan Jackman <jackmanb@...gle.com>, Johannes Weiner <hannes@...xchg.org>, Zi Yan <ziy@...dia.com>,
Mel Gorman <mgorman@...hsingularity.net>, linux-mm@...ck.org,
linux-kernel@...r.kernel.org, Joshua Hahn <joshua.hahnjy@...il.com>
Subject: Re: [PATCH v2] mm/page_alloc: simplify and cleanup pcp locking
On Wed, Oct 15, 2025 at 10:50 AM Vlastimil Babka <vbabka@...e.cz> wrote:
>
> The pcp locking relies on pcp_spin_trylock() which has to be used
> together with pcp_trylock_prepare()/pcp_trylock_finish() to work
> properly on !SMP !RT configs. This is tedious and error-prone.
>
> We can remove pcp_spin_lock() and underlying pcpu_spin_lock() because we
> don't use it. Afterwards pcp_spin_unlock() is only used together with
> pcp_spin_trylock(). Therefore we can add the UP_flags parameter to them
> both and handle pcp_trylock_prepare()/finish() within.
>
> Additionally for the configs where pcp_trylock_prepare()/finish() are
> no-op (SMP || RT) make them pass &UP_flags to a no-op inline function.
> This ensures typechecking and makes the local variable "used" so we can
> remove the __maybe_unused attributes.
>
> In my compile testing, bloat-o-meter reported no change on SMP config,
> so the compiler is capable of optimizing away the no-ops same as before,
> and we have simplified the code using pcp_spin_trylock().
>
> Reviewed-by: Joshua Hahn <joshua.hahnjy@...il.com>
> Signed-off-by: Vlastimil Babka <vbabka@...e.cz>
You are fast :)
Reviewed-by: Suren Baghdasaryan <surenb@...gle.com>
> ---
> based on mm-new
> Changed my mind and did as Joshua suggested, for consistency. Thanks!
> ---
> Changes in v2:
> - Convert also pcp_trylock_finish() to noop function, per Joshua.
> - Link to v1: https://lore.kernel.org/r/20251015-b4-pcp-lock-cleanup-v1-1-878e0e7dcfb2@suse.cz
> ---
> mm/page_alloc.c | 99 +++++++++++++++++++++++----------------------------------
> 1 file changed, 40 insertions(+), 59 deletions(-)
>
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 0155a66d7367..fb91c566327c 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -99,9 +99,12 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
> /*
> * On SMP, spin_trylock is sufficient protection.
> * On PREEMPT_RT, spin_trylock is equivalent on both SMP and UP.
> + * Pass flags to a no-op inline function to typecheck and silence the unused
> + * variable warning.
> */
> -#define pcp_trylock_prepare(flags) do { } while (0)
> -#define pcp_trylock_finish(flag) do { } while (0)
> +static inline void __pcp_trylock_noop(unsigned long *flags) { }
> +#define pcp_trylock_prepare(flags) __pcp_trylock_noop(&(flags))
> +#define pcp_trylock_finish(flags) __pcp_trylock_noop(&(flags))
> #else
>
> /* UP spin_trylock always succeeds so disable IRQs to prevent re-entrancy. */
> @@ -129,15 +132,6 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
> * Generic helper to lookup and a per-cpu variable with an embedded spinlock.
> * Return value should be used with equivalent unlock helper.
> */
> -#define pcpu_spin_lock(type, member, ptr) \
> -({ \
> - type *_ret; \
> - pcpu_task_pin(); \
> - _ret = this_cpu_ptr(ptr); \
> - spin_lock(&_ret->member); \
> - _ret; \
> -})
> -
> #define pcpu_spin_trylock(type, member, ptr) \
> ({ \
> type *_ret; \
> @@ -157,14 +151,21 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
> })
>
> /* struct per_cpu_pages specific helpers. */
> -#define pcp_spin_lock(ptr) \
> - pcpu_spin_lock(struct per_cpu_pages, lock, ptr)
> -
> -#define pcp_spin_trylock(ptr) \
> - pcpu_spin_trylock(struct per_cpu_pages, lock, ptr)
> +#define pcp_spin_trylock(ptr, UP_flags) \
> +({ \
> + struct per_cpu_pages *__ret; \
> + pcp_trylock_prepare(UP_flags); \
> + __ret = pcpu_spin_trylock(struct per_cpu_pages, lock, ptr); \
> + if (!__ret) \
> + pcp_trylock_finish(UP_flags); \
> + __ret; \
> +})
>
> -#define pcp_spin_unlock(ptr) \
> - pcpu_spin_unlock(lock, ptr)
> +#define pcp_spin_unlock(ptr, UP_flags) \
> +({ \
> + pcpu_spin_unlock(lock, ptr); \
> + pcp_trylock_finish(UP_flags); \
> +})
>
> #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
> DEFINE_PER_CPU(int, numa_node);
> @@ -2887,13 +2888,10 @@ static bool free_frozen_page_commit(struct zone *zone,
> if (to_free == 0 || pcp->count == 0)
> break;
>
> - pcp_spin_unlock(pcp);
> - pcp_trylock_finish(*UP_flags);
> + pcp_spin_unlock(pcp, *UP_flags);
>
> - pcp_trylock_prepare(*UP_flags);
> - pcp = pcp_spin_trylock(zone->per_cpu_pageset);
> + pcp = pcp_spin_trylock(zone->per_cpu_pageset, *UP_flags);
> if (!pcp) {
> - pcp_trylock_finish(*UP_flags);
> ret = false;
> break;
> }
> @@ -2904,8 +2902,7 @@ static bool free_frozen_page_commit(struct zone *zone,
> * returned in an unlocked state.
> */
> if (smp_processor_id() != cpu) {
> - pcp_spin_unlock(pcp);
> - pcp_trylock_finish(*UP_flags);
> + pcp_spin_unlock(pcp, *UP_flags);
> ret = false;
> break;
> }
> @@ -2937,7 +2934,7 @@ static bool free_frozen_page_commit(struct zone *zone,
> static void __free_frozen_pages(struct page *page, unsigned int order,
> fpi_t fpi_flags)
> {
> - unsigned long __maybe_unused UP_flags;
> + unsigned long UP_flags;
> struct per_cpu_pages *pcp;
> struct zone *zone;
> unsigned long pfn = page_to_pfn(page);
> @@ -2973,17 +2970,15 @@ static void __free_frozen_pages(struct page *page, unsigned int order,
> add_page_to_zone_llist(zone, page, order);
> return;
> }
> - pcp_trylock_prepare(UP_flags);
> - pcp = pcp_spin_trylock(zone->per_cpu_pageset);
> + pcp = pcp_spin_trylock(zone->per_cpu_pageset, UP_flags);
> if (pcp) {
> if (!free_frozen_page_commit(zone, pcp, page, migratetype,
> order, fpi_flags, &UP_flags))
> return;
> - pcp_spin_unlock(pcp);
> + pcp_spin_unlock(pcp, UP_flags);
> } else {
> free_one_page(zone, page, pfn, order, fpi_flags);
> }
> - pcp_trylock_finish(UP_flags);
> }
>
> void free_frozen_pages(struct page *page, unsigned int order)
> @@ -2996,7 +2991,7 @@ void free_frozen_pages(struct page *page, unsigned int order)
> */
> void free_unref_folios(struct folio_batch *folios)
> {
> - unsigned long __maybe_unused UP_flags;
> + unsigned long UP_flags;
> struct per_cpu_pages *pcp = NULL;
> struct zone *locked_zone = NULL;
> int i, j;
> @@ -3039,8 +3034,7 @@ void free_unref_folios(struct folio_batch *folios)
> if (zone != locked_zone ||
> is_migrate_isolate(migratetype)) {
> if (pcp) {
> - pcp_spin_unlock(pcp);
> - pcp_trylock_finish(UP_flags);
> + pcp_spin_unlock(pcp, UP_flags);
> locked_zone = NULL;
> pcp = NULL;
> }
> @@ -3059,10 +3053,8 @@ void free_unref_folios(struct folio_batch *folios)
> * trylock is necessary as folios may be getting freed
> * from IRQ or SoftIRQ context after an IO completion.
> */
> - pcp_trylock_prepare(UP_flags);
> - pcp = pcp_spin_trylock(zone->per_cpu_pageset);
> + pcp = pcp_spin_trylock(zone->per_cpu_pageset, UP_flags);
> if (unlikely(!pcp)) {
> - pcp_trylock_finish(UP_flags);
> free_one_page(zone, &folio->page, pfn,
> order, FPI_NONE);
> continue;
> @@ -3085,10 +3077,8 @@ void free_unref_folios(struct folio_batch *folios)
> }
> }
>
> - if (pcp) {
> - pcp_spin_unlock(pcp);
> - pcp_trylock_finish(UP_flags);
> - }
> + if (pcp)
> + pcp_spin_unlock(pcp, UP_flags);
> folio_batch_reinit(folios);
> }
>
> @@ -3339,15 +3329,12 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
> struct per_cpu_pages *pcp;
> struct list_head *list;
> struct page *page;
> - unsigned long __maybe_unused UP_flags;
> + unsigned long UP_flags;
>
> /* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */
> - pcp_trylock_prepare(UP_flags);
> - pcp = pcp_spin_trylock(zone->per_cpu_pageset);
> - if (!pcp) {
> - pcp_trylock_finish(UP_flags);
> + pcp = pcp_spin_trylock(zone->per_cpu_pageset, UP_flags);
> + if (!pcp)
> return NULL;
> - }
>
> /*
> * On allocation, reduce the number of pages that are batch freed.
> @@ -3357,8 +3344,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
> pcp->free_count >>= 1;
> list = &pcp->lists[order_to_pindex(migratetype, order)];
> page = __rmqueue_pcplist(zone, order, migratetype, alloc_flags, pcp, list);
> - pcp_spin_unlock(pcp);
> - pcp_trylock_finish(UP_flags);
> + pcp_spin_unlock(pcp, UP_flags);
> if (page) {
> __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
> zone_statistics(preferred_zone, zone, 1);
> @@ -5045,7 +5031,7 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
> struct page **page_array)
> {
> struct page *page;
> - unsigned long __maybe_unused UP_flags;
> + unsigned long UP_flags;
> struct zone *zone;
> struct zoneref *z;
> struct per_cpu_pages *pcp;
> @@ -5139,10 +5125,9 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
> goto failed;
>
> /* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */
> - pcp_trylock_prepare(UP_flags);
> - pcp = pcp_spin_trylock(zone->per_cpu_pageset);
> + pcp = pcp_spin_trylock(zone->per_cpu_pageset, UP_flags);
> if (!pcp)
> - goto failed_irq;
> + goto failed;
>
> /* Attempt the batch allocation */
> pcp_list = &pcp->lists[order_to_pindex(ac.migratetype, 0)];
> @@ -5159,8 +5144,8 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
> if (unlikely(!page)) {
> /* Try and allocate at least one page */
> if (!nr_account) {
> - pcp_spin_unlock(pcp);
> - goto failed_irq;
> + pcp_spin_unlock(pcp, UP_flags);
> + goto failed;
> }
> break;
> }
> @@ -5171,8 +5156,7 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
> page_array[nr_populated++] = page;
> }
>
> - pcp_spin_unlock(pcp);
> - pcp_trylock_finish(UP_flags);
> + pcp_spin_unlock(pcp, UP_flags);
>
> __count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account);
> zone_statistics(zonelist_zone(ac.preferred_zoneref), zone, nr_account);
> @@ -5180,9 +5164,6 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
> out:
> return nr_populated;
>
> -failed_irq:
> - pcp_trylock_finish(UP_flags);
> -
> failed:
> page = __alloc_pages_noprof(gfp, 0, preferred_nid, nodemask);
> if (page)
>
> ---
> base-commit: 550b531346a7e4e7ad31813d0d1d6a6d8c10a06f
> change-id: 20251015-b4-pcp-lock-cleanup-9b70b417a20e
>
> Best regards,
> --
> Vlastimil Babka <vbabka@...e.cz>
>
Powered by blists - more mailing lists