[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aP5LB5NQ4lhhAA6Y@kernel.org>
Date: Sun, 26 Oct 2025 18:23:35 +0200
From: Mike Rapoport <rppt@...nel.org>
To: Pasha Tatashin <pasha.tatashin@...een.com>
Cc: akpm@...ux-foundation.org, brauner@...nel.org, corbet@....net,
graf@...zon.com, jgg@...pe.ca, linux-kernel@...r.kernel.org,
linux-kselftest@...r.kernel.org, linux-mm@...ck.org,
masahiroy@...nel.org, ojeda@...nel.org, pratyush@...nel.org,
rdunlap@...radead.org, tj@...nel.org
Subject: Re: [PATCH v8 3/8] kho: drop notifiers
On Fri, Oct 24, 2025 at 12:09:57PM -0400, Pasha Tatashin wrote:
> From: "Mike Rapoport (Microsoft)" <rppt@...nel.org>
>
> The KHO framework uses a notifier chain as the mechanism for clients to
> participate in the finalization process. While this works for a single,
> central state machine, it is too restrictive for kernel-internal
> components like pstore/reserve_mem or IMA. These components need a
> simpler, direct way to register their state for preservation (e.g.,
> during their initcall) without being part of a complex,
> shutdown-time notifier sequence. The notifier model forces all
> participants into a single finalization flow and makes direct
> preservation from an arbitrary context difficult.
> This patch refactors the client participation model by removing the
> notifier chain and introducing a direct API for managing FDT subtrees.
>
> The core kho_finalize() and kho_abort() state machine remains, but
> clients now register their data with KHO beforehand.
>
> Signed-off-by: Mike Rapoport (Microsoft) <rppt@...nel.org>
> Co-developed-by: Pasha Tatashin <pasha.tatashin@...een.com>
> Signed-off-by: Pasha Tatashin <pasha.tatashin@...een.com>
> ---
> include/linux/kexec_handover.h | 28 +-----
> kernel/kexec_handover.c | 166 +++++++++++++++++--------------
> kernel/kexec_handover_debugfs.c | 17 ++--
> kernel/kexec_handover_internal.h | 5 +-
> lib/test_kho.c | 33 +-----
> mm/memblock.c | 62 +++---------
> 6 files changed, 126 insertions(+), 185 deletions(-)
> diff --git a/lib/test_kho.c b/lib/test_kho.c
> index 60cd899ea745..1c6c4ce83666 100644
> --- a/lib/test_kho.c
> +++ b/lib/test_kho.c
> @@ -120,6 +93,7 @@ static int kho_test_prepare_fdt(struct kho_test_state *state)
>
> fdt = folio_address(state->fdt);
>
> + err |= kho_preserve_folio(state->fdt);
We should bail out here, no point creating an fdt if it won't be preserved.
> err |= fdt_create(fdt, fdt_size);
> err |= fdt_finish_reservemap(fdt);
>
> @@ -131,6 +105,7 @@ static int kho_test_prepare_fdt(struct kho_test_state *state)
>
> err |= fdt_finish(fdt);
>
> + err = kho_add_subtree(KHO_TEST_FDT, folio_address(state->fdt));
> if (err)
> folio_put(state->fdt);
>
> @@ -203,7 +178,7 @@ static int kho_test_save(void)
> if (err)
> goto err_free_folios;
>
> - err = register_kho_notifier(&kho_test_nb);
> + err = kho_add_subtree(KHO_TEST_FDT, folio_address(state->fdt));
This is the second time we add the same subtree, isn't it?
> if (err)
> goto err_free_fdt;
>
> @@ -326,7 +301,7 @@ static void kho_test_cleanup(void)
>
> static void __exit kho_test_exit(void)
> {
> - unregister_kho_notifier(&kho_test_nb);
> + kho_remove_subtree(folio_address(kho_test_state.fdt));
> kho_test_cleanup();
> }
> module_exit(kho_test_exit);
> diff --git a/mm/memblock.c b/mm/memblock.c
> index e23e16618e9b..e3bef9b35d63 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> static int __init prepare_kho_fdt(void)
> {
> int err = 0, i;
> + struct page *fdt_page;
> void *fdt;
>
> - kho_fdt = alloc_page(GFP_KERNEL);
> - if (!kho_fdt)
> + fdt_page = alloc_page(GFP_KERNEL);
> + if (!fdt_page)
> return -ENOMEM;
>
> - fdt = page_to_virt(kho_fdt);
> + fdt = page_to_virt(fdt_page);
>
> err |= fdt_create(fdt, PAGE_SIZE);
> err |= fdt_finish_reservemap(fdt);
> @@ -2499,7 +2464,10 @@ static int __init prepare_kho_fdt(void)
> err |= fdt_property_string(fdt, "compatible", MEMBLOCK_KHO_NODE_COMPATIBLE);
> for (i = 0; i < reserved_mem_count; i++) {
> struct reserve_mem_table *map = &reserved_mem_table[i];
> + struct page *page = phys_to_page(map->start);
> + unsigned int nr_pages = map->size >> PAGE_SHIFT;
>
> + err |= kho_preserve_pages(page, nr_pages);
> err |= fdt_begin_node(fdt, map->name);
> err |= fdt_property_string(fdt, "compatible", RESERVE_MEM_KHO_NODE_COMPATIBLE);
> err |= fdt_property(fdt, "start", &map->start, sizeof(map->start));
> @@ -2507,13 +2475,16 @@ static int __init prepare_kho_fdt(void)
> err |= fdt_end_node(fdt);
> }
> err |= fdt_end_node(fdt);
> -
> err |= fdt_finish(fdt);
>
> + err |= kho_preserve_folio(page_folio(fdt_page));
When looking at the end result after patch 8 it becomes a total mess.
Let's move this right after the allocation and make it
err = kho_preserve_folio(page_folio(fdt_page);
if (err)
goto err_free_fdt;
> +
> + if (!err)
> + err = kho_add_subtree(MEMBLOCK_KHO_FDT, fdt);
and replace this pattern with usual kernel
if (err)
goto err_free_fdt;
err = kho_add_subtree(MEMBLOCK_KHO_FDT, fdt);
if (err)
goto err_free_fdt;
so that only fdt operations will be a part of
err |= fdt_<function>
sequence.
> if (err) {
> pr_err("failed to prepare memblock FDT for KHO: %d\n", err);
> - put_page(kho_fdt);
> - kho_fdt = NULL;
> + put_page(fdt_page);
> }
>
> return err;
> @@ -2529,13 +2500,6 @@ static int __init reserve_mem_init(void)
> err = prepare_kho_fdt();
> if (err)
> return err;
> -
> - err = register_kho_notifier(&reserve_mem_kho_nb);
> - if (err) {
> - put_page(kho_fdt);
> - kho_fdt = NULL;
> - }
> -
> return err;
> }
> late_initcall(reserve_mem_init);
> --
> 2.51.1.821.gb6fe4d2222-goog
>
>
--
Sincerely yours,
Mike.
Powered by blists - more mailing lists