lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAJuCfpELB-LX55qZYBfrSVx1+EHE0TWLiHd2AUuU1BK-JTWntQ@mail.gmail.com>
Date: Wed, 15 Oct 2025 11:54:19 -0700
From: Suren Baghdasaryan <surenb@...gle.com>
To: Vlastimil Babka <vbabka@...e.cz>
Cc: Andrew Morton <akpm@...ux-foundation.org>, Michal Hocko <mhocko@...e.com>, 
	Brendan Jackman <jackmanb@...gle.com>, Johannes Weiner <hannes@...xchg.org>, Zi Yan <ziy@...dia.com>, 
	Mel Gorman <mgorman@...hsingularity.net>, linux-mm@...ck.org, 
	linux-kernel@...r.kernel.org, Joshua Hahn <joshua.hahnjy@...il.com>
Subject: Re: [PATCH v2] mm/page_alloc: simplify and cleanup pcp locking

On Wed, Oct 15, 2025 at 10:50 AM Vlastimil Babka <vbabka@...e.cz> wrote:
>
> The pcp locking relies on pcp_spin_trylock() which has to be used
> together with pcp_trylock_prepare()/pcp_trylock_finish() to work
> properly on !SMP !RT configs. This is tedious and error-prone.
>
> We can remove pcp_spin_lock() and underlying pcpu_spin_lock() because we
> don't use it. Afterwards pcp_spin_unlock() is only used together with
> pcp_spin_trylock(). Therefore we can add the UP_flags parameter to them
> both and handle pcp_trylock_prepare()/finish() within.
>
> Additionally for the configs where pcp_trylock_prepare()/finish() are
> no-op (SMP || RT) make them pass &UP_flags to a no-op inline function.
> This ensures typechecking and makes the local variable "used" so we can
> remove the __maybe_unused attributes.
>
> In my compile testing, bloat-o-meter reported no change on SMP config,
> so the compiler is capable of optimizing away the no-ops same as before,
> and we have simplified the code using pcp_spin_trylock().
>
> Reviewed-by: Joshua Hahn <joshua.hahnjy@...il.com>
> Signed-off-by: Vlastimil Babka <vbabka@...e.cz>

You are fast :)

Reviewed-by: Suren Baghdasaryan <surenb@...gle.com>

> ---
> based on mm-new
> Changed my mind and did as Joshua suggested, for consistency. Thanks!
> ---
> Changes in v2:
> - Convert also pcp_trylock_finish() to noop function, per Joshua.
> - Link to v1: https://lore.kernel.org/r/20251015-b4-pcp-lock-cleanup-v1-1-878e0e7dcfb2@suse.cz
> ---
>  mm/page_alloc.c | 99 +++++++++++++++++++++++----------------------------------
>  1 file changed, 40 insertions(+), 59 deletions(-)
>
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 0155a66d7367..fb91c566327c 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -99,9 +99,12 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
>  /*
>   * On SMP, spin_trylock is sufficient protection.
>   * On PREEMPT_RT, spin_trylock is equivalent on both SMP and UP.
> + * Pass flags to a no-op inline function to typecheck and silence the unused
> + * variable warning.
>   */
> -#define pcp_trylock_prepare(flags)     do { } while (0)
> -#define pcp_trylock_finish(flag)       do { } while (0)
> +static inline void __pcp_trylock_noop(unsigned long *flags) { }
> +#define pcp_trylock_prepare(flags)     __pcp_trylock_noop(&(flags))
> +#define pcp_trylock_finish(flags)      __pcp_trylock_noop(&(flags))
>  #else
>
>  /* UP spin_trylock always succeeds so disable IRQs to prevent re-entrancy. */
> @@ -129,15 +132,6 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
>   * Generic helper to lookup and a per-cpu variable with an embedded spinlock.
>   * Return value should be used with equivalent unlock helper.
>   */
> -#define pcpu_spin_lock(type, member, ptr)                              \
> -({                                                                     \
> -       type *_ret;                                                     \
> -       pcpu_task_pin();                                                \
> -       _ret = this_cpu_ptr(ptr);                                       \
> -       spin_lock(&_ret->member);                                       \
> -       _ret;                                                           \
> -})
> -
>  #define pcpu_spin_trylock(type, member, ptr)                           \
>  ({                                                                     \
>         type *_ret;                                                     \
> @@ -157,14 +151,21 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
>  })
>
>  /* struct per_cpu_pages specific helpers. */
> -#define pcp_spin_lock(ptr)                                             \
> -       pcpu_spin_lock(struct per_cpu_pages, lock, ptr)
> -
> -#define pcp_spin_trylock(ptr)                                          \
> -       pcpu_spin_trylock(struct per_cpu_pages, lock, ptr)
> +#define pcp_spin_trylock(ptr, UP_flags)                                        \
> +({                                                                     \
> +       struct per_cpu_pages *__ret;                                    \
> +       pcp_trylock_prepare(UP_flags);                                  \
> +       __ret = pcpu_spin_trylock(struct per_cpu_pages, lock, ptr);     \
> +       if (!__ret)                                                     \
> +               pcp_trylock_finish(UP_flags);                           \
> +       __ret;                                                          \
> +})
>
> -#define pcp_spin_unlock(ptr)                                           \
> -       pcpu_spin_unlock(lock, ptr)
> +#define pcp_spin_unlock(ptr, UP_flags)                                 \
> +({                                                                     \
> +       pcpu_spin_unlock(lock, ptr);                                    \
> +       pcp_trylock_finish(UP_flags);                                   \
> +})
>
>  #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
>  DEFINE_PER_CPU(int, numa_node);
> @@ -2887,13 +2888,10 @@ static bool free_frozen_page_commit(struct zone *zone,
>                 if (to_free == 0 || pcp->count == 0)
>                         break;
>
> -               pcp_spin_unlock(pcp);
> -               pcp_trylock_finish(*UP_flags);
> +               pcp_spin_unlock(pcp, *UP_flags);
>
> -               pcp_trylock_prepare(*UP_flags);
> -               pcp = pcp_spin_trylock(zone->per_cpu_pageset);
> +               pcp = pcp_spin_trylock(zone->per_cpu_pageset, *UP_flags);
>                 if (!pcp) {
> -                       pcp_trylock_finish(*UP_flags);
>                         ret = false;
>                         break;
>                 }
> @@ -2904,8 +2902,7 @@ static bool free_frozen_page_commit(struct zone *zone,
>                  * returned in an unlocked state.
>                  */
>                 if (smp_processor_id() != cpu) {
> -                       pcp_spin_unlock(pcp);
> -                       pcp_trylock_finish(*UP_flags);
> +                       pcp_spin_unlock(pcp, *UP_flags);
>                         ret = false;
>                         break;
>                 }
> @@ -2937,7 +2934,7 @@ static bool free_frozen_page_commit(struct zone *zone,
>  static void __free_frozen_pages(struct page *page, unsigned int order,
>                                 fpi_t fpi_flags)
>  {
> -       unsigned long __maybe_unused UP_flags;
> +       unsigned long UP_flags;
>         struct per_cpu_pages *pcp;
>         struct zone *zone;
>         unsigned long pfn = page_to_pfn(page);
> @@ -2973,17 +2970,15 @@ static void __free_frozen_pages(struct page *page, unsigned int order,
>                 add_page_to_zone_llist(zone, page, order);
>                 return;
>         }
> -       pcp_trylock_prepare(UP_flags);
> -       pcp = pcp_spin_trylock(zone->per_cpu_pageset);
> +       pcp = pcp_spin_trylock(zone->per_cpu_pageset, UP_flags);
>         if (pcp) {
>                 if (!free_frozen_page_commit(zone, pcp, page, migratetype,
>                                                 order, fpi_flags, &UP_flags))
>                         return;
> -               pcp_spin_unlock(pcp);
> +               pcp_spin_unlock(pcp, UP_flags);
>         } else {
>                 free_one_page(zone, page, pfn, order, fpi_flags);
>         }
> -       pcp_trylock_finish(UP_flags);
>  }
>
>  void free_frozen_pages(struct page *page, unsigned int order)
> @@ -2996,7 +2991,7 @@ void free_frozen_pages(struct page *page, unsigned int order)
>   */
>  void free_unref_folios(struct folio_batch *folios)
>  {
> -       unsigned long __maybe_unused UP_flags;
> +       unsigned long UP_flags;
>         struct per_cpu_pages *pcp = NULL;
>         struct zone *locked_zone = NULL;
>         int i, j;
> @@ -3039,8 +3034,7 @@ void free_unref_folios(struct folio_batch *folios)
>                 if (zone != locked_zone ||
>                     is_migrate_isolate(migratetype)) {
>                         if (pcp) {
> -                               pcp_spin_unlock(pcp);
> -                               pcp_trylock_finish(UP_flags);
> +                               pcp_spin_unlock(pcp, UP_flags);
>                                 locked_zone = NULL;
>                                 pcp = NULL;
>                         }
> @@ -3059,10 +3053,8 @@ void free_unref_folios(struct folio_batch *folios)
>                          * trylock is necessary as folios may be getting freed
>                          * from IRQ or SoftIRQ context after an IO completion.
>                          */
> -                       pcp_trylock_prepare(UP_flags);
> -                       pcp = pcp_spin_trylock(zone->per_cpu_pageset);
> +                       pcp = pcp_spin_trylock(zone->per_cpu_pageset, UP_flags);
>                         if (unlikely(!pcp)) {
> -                               pcp_trylock_finish(UP_flags);
>                                 free_one_page(zone, &folio->page, pfn,
>                                               order, FPI_NONE);
>                                 continue;
> @@ -3085,10 +3077,8 @@ void free_unref_folios(struct folio_batch *folios)
>                 }
>         }
>
> -       if (pcp) {
> -               pcp_spin_unlock(pcp);
> -               pcp_trylock_finish(UP_flags);
> -       }
> +       if (pcp)
> +               pcp_spin_unlock(pcp, UP_flags);
>         folio_batch_reinit(folios);
>  }
>
> @@ -3339,15 +3329,12 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
>         struct per_cpu_pages *pcp;
>         struct list_head *list;
>         struct page *page;
> -       unsigned long __maybe_unused UP_flags;
> +       unsigned long UP_flags;
>
>         /* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */
> -       pcp_trylock_prepare(UP_flags);
> -       pcp = pcp_spin_trylock(zone->per_cpu_pageset);
> -       if (!pcp) {
> -               pcp_trylock_finish(UP_flags);
> +       pcp = pcp_spin_trylock(zone->per_cpu_pageset, UP_flags);
> +       if (!pcp)
>                 return NULL;
> -       }
>
>         /*
>          * On allocation, reduce the number of pages that are batch freed.
> @@ -3357,8 +3344,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
>         pcp->free_count >>= 1;
>         list = &pcp->lists[order_to_pindex(migratetype, order)];
>         page = __rmqueue_pcplist(zone, order, migratetype, alloc_flags, pcp, list);
> -       pcp_spin_unlock(pcp);
> -       pcp_trylock_finish(UP_flags);
> +       pcp_spin_unlock(pcp, UP_flags);
>         if (page) {
>                 __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
>                 zone_statistics(preferred_zone, zone, 1);
> @@ -5045,7 +5031,7 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
>                         struct page **page_array)
>  {
>         struct page *page;
> -       unsigned long __maybe_unused UP_flags;
> +       unsigned long UP_flags;
>         struct zone *zone;
>         struct zoneref *z;
>         struct per_cpu_pages *pcp;
> @@ -5139,10 +5125,9 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
>                 goto failed;
>
>         /* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */
> -       pcp_trylock_prepare(UP_flags);
> -       pcp = pcp_spin_trylock(zone->per_cpu_pageset);
> +       pcp = pcp_spin_trylock(zone->per_cpu_pageset, UP_flags);
>         if (!pcp)
> -               goto failed_irq;
> +               goto failed;
>
>         /* Attempt the batch allocation */
>         pcp_list = &pcp->lists[order_to_pindex(ac.migratetype, 0)];
> @@ -5159,8 +5144,8 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
>                 if (unlikely(!page)) {
>                         /* Try and allocate at least one page */
>                         if (!nr_account) {
> -                               pcp_spin_unlock(pcp);
> -                               goto failed_irq;
> +                               pcp_spin_unlock(pcp, UP_flags);
> +                               goto failed;
>                         }
>                         break;
>                 }
> @@ -5171,8 +5156,7 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
>                 page_array[nr_populated++] = page;
>         }
>
> -       pcp_spin_unlock(pcp);
> -       pcp_trylock_finish(UP_flags);
> +       pcp_spin_unlock(pcp, UP_flags);
>
>         __count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account);
>         zone_statistics(zonelist_zone(ac.preferred_zoneref), zone, nr_account);
> @@ -5180,9 +5164,6 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
>  out:
>         return nr_populated;
>
> -failed_irq:
> -       pcp_trylock_finish(UP_flags);
> -
>  failed:
>         page = __alloc_pages_noprof(gfp, 0, preferred_nid, nodemask);
>         if (page)
>
> ---
> base-commit: 550b531346a7e4e7ad31813d0d1d6a6d8c10a06f
> change-id: 20251015-b4-pcp-lock-cleanup-9b70b417a20e
>
> Best regards,
> --
> Vlastimil Babka <vbabka@...e.cz>
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ