[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <78e646af-e8b5-4596-8fbf-17b139cfdddd@redhat.com>
Date: Wed, 22 May 2024 16:09:41 +0200
From: David Hildenbrand <david@...hat.com>
To: Brendan Jackman <jackmanb@...gle.com>, Oscar Salvador
<osalvador@...e.de>, Andrew Morton <akpm@...ux-foundation.org>,
Mike Rapoport <rppt@...nel.org>
Cc: Michal Hocko <mhocko@...e.com>,
Anshuman Khandual <anshuman.khandual@....com>,
Vlastimil Babka <vbabka@...e.cz>, Pavel Tatashin
<pasha.tatashin@...een.com>, linux-mm@...ck.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 1/2] mm,memory_hotplug: Remove un-taken lock
On 21.05.24 14:57, Brendan Jackman wrote:
> It seems that [1] was acked, and the a v2 was written[2] which improved
> upon it, but got bogged down in discussion of other topics, so the
> improvements were not included. Then [1] got merged as commit
> 27cacaad16c5 ("mm,memory_hotplug: drop unneeded locking") and we ended
> up with locks that get taken for read but never for write.
>
> So, let's remove the read locking.
>
> Compared to Oscar's original v2[2], I have added a READ_ONCE in
> page_outside_zone_boundaries; this is a substitute for the compiler
> barrier that was implied by read_seqretry(). I believe this is necessary
> to insure against UB, although the value being read here is only used
> for a printk so the stakes seem very low (and this is all debug code
> anyway). I believe a compiler barrier is also needed in zone_spans_pfn,
> but I'll address that in a separate patch.
>
> That read_seqretry() also impleied a CPU-level memory barrier, which I
> don't think needs replacing: page_outside_zone_boundaries() is used in
> the alloc and free paths, but you can't allocate or free pages from
> the span that is in the middle of being added/removed by hotplug.
>
> In other words, page_outside_zone_boundaries() doesn't require a
> strictly up-to-date view of spanned_pages, but I think it does require
> a value that was once/will eventually be correct, hence READ_ONCE.
>
> [1] https://lore.kernel.org/all/20210531093958.15021-1-osalvador@suse.de/T/#u
> [2] https://lore.kernel.org/linux-mm/20210602091457.17772-3-osalvador@suse.de/#t
>
> Cc: David Hildenbrand <david@...hat.com>
> Cc: Michal Hocko <mhocko@...e.com>
> Cc: Anshuman Khandual <anshuman.khandual@....com>
> Cc: Vlastimil Babka <vbabka@...e.cz>
> Cc: Pavel Tatashin <pasha.tatashin@...een.com>
> Co-developed-by: Oscar Salvador <osalvador@...e.de>
> Signed-off-by: Oscar Salvador <osalvador@...e.de>
> Signed-off-by: Brendan Jackman <jackmanb@...gle.com>
> ---
> include/linux/memory_hotplug.h | 35 -----------------------------------
> include/linux/mmzone.h | 23 +++++------------------
> mm/mm_init.c | 1 -
> mm/page_alloc.c | 10 +++-------
> 4 files changed, 8 insertions(+), 61 deletions(-)
>
> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
> index 7a9ff464608d..f9577e67e5ee 100644
> --- a/include/linux/memory_hotplug.h
> +++ b/include/linux/memory_hotplug.h
> @@ -141,31 +141,7 @@ bool mhp_supports_memmap_on_memory(void);
>
> /*
> * Zone resizing functions
> - *
> - * Note: any attempt to resize a zone should has pgdat_resize_lock()
> - * zone_span_writelock() both held. This ensure the size of a zone
> - * can't be changed while pgdat_resize_lock() held.
> */
> -static inline unsigned zone_span_seqbegin(struct zone *zone)
> -{
> - return read_seqbegin(&zone->span_seqlock);
> -}
> -static inline int zone_span_seqretry(struct zone *zone, unsigned iv)
> -{
> - return read_seqretry(&zone->span_seqlock, iv);
> -}
> -static inline void zone_span_writelock(struct zone *zone)
> -{
> - write_seqlock(&zone->span_seqlock);
> -}
> -static inline void zone_span_writeunlock(struct zone *zone)
> -{
> - write_sequnlock(&zone->span_seqlock);
> -}
> -static inline void zone_seqlock_init(struct zone *zone)
> -{
> - seqlock_init(&zone->span_seqlock);
> -}
> extern void adjust_present_page_count(struct page *page,
> struct memory_group *group,
> long nr_pages);
> @@ -251,17 +227,6 @@ static inline void pgdat_kswapd_lock_init(pg_data_t *pgdat)
> ___page; \
> })
>
> -static inline unsigned zone_span_seqbegin(struct zone *zone)
> -{
> - return 0;
> -}
> -static inline int zone_span_seqretry(struct zone *zone, unsigned iv)
> -{
> - return 0;
> -}
> -static inline void zone_span_writelock(struct zone *zone) {}
> -static inline void zone_span_writeunlock(struct zone *zone) {}
> -static inline void zone_seqlock_init(struct zone *zone) {}
>
> static inline int try_online_node(int nid)
> {
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 8f9c9590a42c..194ef7fed9d6 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -14,7 +14,6 @@
> #include <linux/threads.h>
> #include <linux/numa.h>
> #include <linux/init.h>
> -#include <linux/seqlock.h>
> #include <linux/nodemask.h>
> #include <linux/pageblock-flags.h>
> #include <linux/page-flags-layout.h>
> @@ -896,18 +895,11 @@ struct zone {
> *
> * Locking rules:
> *
> - * zone_start_pfn and spanned_pages are protected by span_seqlock.
> - * It is a seqlock because it has to be read outside of zone->lock,
> - * and it is done in the main allocator path. But, it is written
> - * quite infrequently.
> - *
> - * The span_seq lock is declared along with zone->lock because it is
> - * frequently read in proximity to zone->lock. It's good to
> - * give them a chance of being in the same cacheline.
> - *
> - * Write access to present_pages at runtime should be protected by
> - * mem_hotplug_begin/done(). Any reader who can't tolerant drift of
> - * present_pages should use get_online_mems() to get a stable value.
> + * Besides system initialization functions, memory-hotplug is the only
> + * user that can change zone's {spanned,present} pages at runtime, and
> + * it does so by holding the mem_hotplug_lock lock. Any readers who
> + * can't tolerate drift values should use {get,put}_online_mems to get
> + * a stable value.
> */
> atomic_long_t managed_pages;
> unsigned long spanned_pages;
> @@ -930,11 +922,6 @@ struct zone {
> unsigned long nr_isolate_pageblock;
> #endif
>
> -#ifdef CONFIG_MEMORY_HOTPLUG
> - /* see spanned/present_pages for more description */
> - seqlock_t span_seqlock;
> -#endif
> -
> int initialized;
>
> /* Write-intensive fields used from the page allocator */
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index f72b852bd5b8..c725618aeb58 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -1383,7 +1383,6 @@ static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx,
> zone->name = zone_names[idx];
> zone->zone_pgdat = NODE_DATA(nid);
> spin_lock_init(&zone->lock);
> - zone_seqlock_init(zone);
> zone_pcp_init(zone);
> }
>
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 2e22ce5675ca..5116a2b9ea6e 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -426,16 +426,12 @@ void set_pageblock_migratetype(struct page *page, int migratetype)
> static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
> {
> int ret;
> - unsigned seq;
> unsigned long pfn = page_to_pfn(page);
> unsigned long sp, start_pfn;
>
> - do {
> - seq = zone_span_seqbegin(zone);
> - start_pfn = zone->zone_start_pfn;
> - sp = zone->spanned_pages;
> - ret = !zone_spans_pfn(zone, pfn);
> - } while (zone_span_seqretry(zone, seq));
> + start_pfn = zone->zone_start_pfn;
> + sp = READ_ONCE(zone->spanned_pages);
> + ret = !zone_spans_pfn(zone, pfn);
The old seqlock guaranteed that we would have obtained consistent values
here. start + spanned_pages defines a range. For example, growing a zone
to the beginning implies that both ranges must be changed.
I do wonder if it might be better to instead have zone->zone_start_pfn
and zone->zone_end_pfn. That way, both can be changed individually, not
requiring adjustment of both to grow/shrink a zone at the beginning.
Then, using READ_ONCE() on both might actually make sense ...
--
Cheers,
David / dhildenb
Powered by blists - more mailing lists