[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <mafs0qzu05wz1.fsf@kernel.org>
Date: Fri, 14 Nov 2025 18:45:54 +0100
From: Pratyush Yadav <pratyush@...nel.org>
To: Pasha Tatashin <pasha.tatashin@...een.com>
Cc: akpm@...ux-foundation.org, bhe@...hat.com, rppt@...nel.org,
jasonmiu@...gle.com, arnd@...db.de, coxu@...hat.com,
dave@...ilevsky.ca, ebiggers@...gle.com, graf@...zon.com,
kees@...nel.org, linux-kernel@...r.kernel.org,
kexec@...ts.infradead.org, linux-mm@...ck.org
Subject: Re: [PATCH v1 13/13] kho: Introduce high-level memory allocation API
On Fri, Nov 14 2025, Pasha Tatashin wrote:
> Currently, clients of KHO must manually allocate memory (e.g., via
> alloc_pages), calculate the page order, and explicitly call
> kho_preserve_folio(). Similarly, cleanup requires separate calls to
> unpreserve and free the memory.
>
> Introduce a high-level API to streamline this common pattern:
>
> - kho_alloc_preserve(size): Allocates physically contiguous, zeroed
> memory and immediately marks it for preservation.
> - kho_free_unpreserve(ptr, size): Unpreserves and frees the memory
> in the current kernel.
> - kho_free_restore(ptr, size): Restores the struct page state of
> preserved memory in the new kernel and immediately frees it to the
> page allocator.
Nit: kho_unpreserve_free() and kho_restore_free() make more sense to me
since that is the order of operations. Having them the other way round
is kind of confusing.
Also, why do the free functions need size? They can get the order from
folio_order(). This would save users of the API from having to store the
size somewhere and make things simpler.
>
> Signed-off-by: Pasha Tatashin <pasha.tatashin@...een.com>
> ---
> include/linux/kexec_handover.h | 22 +++++--
> kernel/liveupdate/kexec_handover.c | 101 +++++++++++++++++++++++++++++
> 2 files changed, 116 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
> index 80ece4232617..76c496e01877 100644
> --- a/include/linux/kexec_handover.h
> +++ b/include/linux/kexec_handover.h
> @@ -2,8 +2,9 @@
> #ifndef LINUX_KEXEC_HANDOVER_H
> #define LINUX_KEXEC_HANDOVER_H
>
> -#include <linux/types.h>
> +#include <linux/err.h>
> #include <linux/errno.h>
> +#include <linux/types.h>
>
> struct kho_scratch {
> phys_addr_t addr;
> @@ -48,6 +49,9 @@ int kho_preserve_pages(struct page *page, unsigned int nr_pages);
> int kho_unpreserve_pages(struct page *page, unsigned int nr_pages);
> int kho_preserve_vmalloc(void *ptr, struct kho_vmalloc *preservation);
> int kho_unpreserve_vmalloc(struct kho_vmalloc *preservation);
> +void *kho_alloc_preserve(size_t size);
> +void kho_free_unpreserve(void *mem, size_t size);
> +void kho_free_restore(void *mem, size_t size);
> struct folio *kho_restore_folio(phys_addr_t phys);
> struct page *kho_restore_pages(phys_addr_t phys, unsigned int nr_pages);
> void *kho_restore_vmalloc(const struct kho_vmalloc *preservation);
> @@ -101,6 +105,14 @@ static inline int kho_unpreserve_vmalloc(struct kho_vmalloc *preservation)
> return -EOPNOTSUPP;
> }
>
> +void *kho_alloc_preserve(size_t size)
> +{
> + return ERR_PTR(-EOPNOTSUPP);
> +}
> +
> +void kho_free_unpreserve(void *mem, size_t size) { }
> +void kho_free_restore(void *mem, size_t size) { }
> +
> static inline struct folio *kho_restore_folio(phys_addr_t phys)
> {
> return NULL;
> @@ -122,18 +134,14 @@ static inline int kho_add_subtree(const char *name, void *fdt)
> return -EOPNOTSUPP;
> }
>
> -static inline void kho_remove_subtree(void *fdt)
> -{
> -}
> +static inline void kho_remove_subtree(void *fdt) { }
>
> static inline int kho_retrieve_subtree(const char *name, phys_addr_t *phys)
> {
> return -EOPNOTSUPP;
> }
>
> -static inline void kho_memory_init(void)
> -{
> -}
> +static inline void kho_memory_init(void) { }
>
> static inline void kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
> phys_addr_t scratch_phys, u64 scratch_len)
> diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
> index a905bccf5f65..9f05849fd68e 100644
> --- a/kernel/liveupdate/kexec_handover.c
> +++ b/kernel/liveupdate/kexec_handover.c
> @@ -4,6 +4,7 @@
> * Copyright (C) 2023 Alexander Graf <graf@...zon.com>
> * Copyright (C) 2025 Microsoft Corporation, Mike Rapoport <rppt@...nel.org>
> * Copyright (C) 2025 Google LLC, Changyuan Lyu <changyuanl@...gle.com>
> + * Copyright (C) 2025 Pasha Tatashin <pasha.tatashin@...een.com>
> */
>
> #define pr_fmt(fmt) "KHO: " fmt
> @@ -1151,6 +1152,106 @@ void *kho_restore_vmalloc(const struct kho_vmalloc *preservation)
> }
> EXPORT_SYMBOL_GPL(kho_restore_vmalloc);
>
> +/**
> + * kho_alloc_preserve - Allocate, zero, and preserve memory.
> + * @size: The number of bytes to allocate.
> + *
> + * Allocates a physically contiguous block of zeroed pages that is large
> + * enough to hold @size bytes. The allocated memory is then registered with
> + * KHO for preservation across a kexec.
> + *
> + * Note: The actual allocated size will be rounded up to the nearest
> + * power-of-two page boundary.
> + *
> + * @return A virtual pointer to the allocated and preserved memory on success,
> + * or an ERR_PTR() encoded error on failure.
> + */
> +void *kho_alloc_preserve(size_t size)
> +{
> + struct folio *folio;
> + int order, ret;
> +
> + if (!size)
> + return ERR_PTR(-EINVAL);
> +
> + order = get_order(size);
> + if (order > MAX_PAGE_ORDER)
> + return ERR_PTR(-E2BIG);
> +
> + folio = folio_alloc(GFP_KERNEL | __GFP_ZERO, order);
> + if (!folio)
> + return ERR_PTR(-ENOMEM);
> +
> + ret = kho_preserve_folio(folio);
> + if (ret) {
> + folio_put(folio);
> + return ERR_PTR(ret);
> + }
> +
> + return folio_address(folio);
> +}
> +EXPORT_SYMBOL_GPL(kho_alloc_preserve);
> +
> +/**
> + * kho_free_unpreserve - Unpreserve and free memory.
> + * @mem: Pointer to the memory allocated by kho_alloc_preserve().
> + * @size: The original size requested during allocation. This is used to
> + * recalculate the correct order for freeing the pages.
> + *
> + * Unregisters the memory from KHO preservation and frees the underlying
> + * pages back to the system. This function should be called to clean up
> + * memory allocated with kho_alloc_preserve().
> + */
> +void kho_free_unpreserve(void *mem, size_t size)
> +{
> + struct folio *folio;
> + unsigned int order;
> +
> + if (!mem || !size)
> + return;
> +
> + order = get_order(size);
> + if (WARN_ON_ONCE(order > MAX_PAGE_ORDER))
> + return;
> +
> + folio = virt_to_folio(mem);
> + WARN_ON_ONCE(kho_unpreserve_folio(folio));
This is what I meant in my reply to the previous patch.
kho_unpreserve_folio() can be void now, so the WARN_ON_ONCE() is not
needed.
> + folio_put(folio);
> +}
> +EXPORT_SYMBOL_GPL(kho_free_unpreserve);
> +
> +/**
> + * kho_free_restore - Restore and free memory after kexec.
> + * @mem: Pointer to the memory (in the new kernel's address space)
> + * that was allocated by the old kernel.
> + * @size: The original size requested during allocation. This is used to
> + * recalculate the correct order for freeing the pages.
> + *
> + * This function is intended to be called in the new kernel (post-kexec)
> + * to take ownership of and free a memory region that was preserved by the
> + * old kernel using kho_alloc_preserve().
> + *
> + * It first restores the pages from KHO (using their physical address)
> + * and then frees the pages back to the new kernel's page allocator.
> + */
> +void kho_free_restore(void *mem, size_t size)
On restore side, callers are already using the phys addr directly. So do
kho_restore_folio() and kho_restore_pages() for example. This should
follow suit for uniformity. Would also save the callers a __va() call
and this function the __pa() call.
> +{
> + struct folio *folio;
> + unsigned int order;
> +
> + if (!mem || !size)
> + return;
> +
> + order = get_order(size);
> + if (WARN_ON_ONCE(order > MAX_PAGE_ORDER))
> + return;
> +
> + folio = kho_restore_folio(__pa(mem));
> + if (!WARN_ON(!folio))
kho_restore_folio() already WARNs on failure. So the WARN_ON() here can
be skipped I think.
> + free_pages((unsigned long)mem, order);
folio_put() here makes more sense since we just restored a folio.
> +}
> +EXPORT_SYMBOL_GPL(kho_free_restore);
> +
> int kho_finalize(void)
> {
> int ret;
--
Regards,
Pratyush Yadav
Powered by blists - more mailing lists