[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <mafs05xhbv9fd.fsf@kernel.org>
Date: Wed, 04 Jun 2025 17:00:22 +0200
From: Pratyush Yadav <pratyush@...nel.org>
To: Pasha Tatashin <pasha.tatashin@...een.com>
Cc: pratyush@...nel.org, jasonmiu@...gle.com, graf@...zon.com,
changyuanl@...gle.com, rppt@...nel.org, dmatlack@...gle.com,
rientjes@...gle.com, corbet@....net, rdunlap@...radead.org,
ilpo.jarvinen@...ux.intel.com, kanie@...ux.alibaba.com,
ojeda@...nel.org, aliceryhl@...gle.com, masahiroy@...nel.org,
akpm@...ux-foundation.org, tj@...nel.org, yoann.congal@...le.fr,
mmaurer@...gle.com, roman.gushchin@...ux.dev, chenridong@...wei.com,
axboe@...nel.dk, mark.rutland@....com, jannh@...gle.com,
vincent.guittot@...aro.org, hannes@...xchg.org,
dan.j.williams@...el.com, david@...hat.com, joel.granados@...nel.org,
rostedt@...dmis.org, anna.schumaker@...cle.com, song@...nel.org,
zhangguopeng@...inos.cn, linux@...ssschuh.net,
linux-kernel@...r.kernel.org, linux-doc@...r.kernel.org,
linux-mm@...ck.org, gregkh@...uxfoundation.org, tglx@...utronix.de,
mingo@...hat.com, bp@...en8.de, dave.hansen@...ux.intel.com,
x86@...nel.org, hpa@...or.com, rafael@...nel.org, dakr@...nel.org,
bartosz.golaszewski@...aro.org, cw00.choi@...sung.com,
myungjoo.ham@...sung.com, yesanishhere@...il.com,
Jonathan.Cameron@...wei.com, quic_zijuhu@...cinc.com,
aleksander.lobakin@...el.com, ira.weiny@...el.com,
andriy.shevchenko@...ux.intel.com, leon@...nel.org, lukas@...ner.de,
bhelgaas@...gle.com, wagi@...nel.org, djeffery@...hat.com,
stuart.w.hayes@...il.com
Subject: Re: [RFC v2 03/16] kho: add kho_unpreserve_folio/phys
On Thu, May 15 2025, Pasha Tatashin wrote:
> From: Changyuan Lyu <changyuanl@...gle.com>
>
> Allow users of KHO to cancel the previous preservation by adding the
> necessary interfaces to unpreserve folio.
>
> Signed-off-by: Changyuan Lyu <changyuanl@...gle.com>
> Co-developed-by: Pasha Tatashin <pasha.tatashin@...een.com>
> Signed-off-by: Pasha Tatashin <pasha.tatashin@...een.com>
> ---
> include/linux/kexec_handover.h | 12 +++++
> kernel/kexec_handover.c | 84 ++++++++++++++++++++++++++++------
> 2 files changed, 83 insertions(+), 13 deletions(-)
>
[...]
> diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c
> index 8ff561e36a87..eb305e7e6129 100644
> --- a/kernel/kexec_handover.c
> +++ b/kernel/kexec_handover.c
> @@ -101,26 +101,33 @@ static void *xa_load_or_alloc(struct xarray *xa, unsigned long index, size_t sz)
> return elm;
> }
>
> -static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn,
> - unsigned long end_pfn)
> +static void __kho_unpreserve_order(struct kho_mem_track *track, unsigned long pfn,
> + unsigned int order)
> {
> struct kho_mem_phys_bits *bits;
> struct kho_mem_phys *physxa;
> + const unsigned long pfn_high = pfn >> order;
>
> - while (pfn < end_pfn) {
> - const unsigned int order =
> - min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
> - const unsigned long pfn_high = pfn >> order;
> + physxa = xa_load(&track->orders, order);
> + if (!physxa)
> + return;
>
> - physxa = xa_load(&track->orders, order);
> - if (!physxa)
> - continue;
> + bits = xa_load(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
> + if (!bits)
> + return;
>
> - bits = xa_load(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
> - if (!bits)
> - continue;
> + clear_bit(pfn_high % PRESERVE_BITS, bits->preserve);
> +}
>
> - clear_bit(pfn_high % PRESERVE_BITS, bits->preserve);
> +static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn,
> + unsigned long end_pfn)
> +{
> + unsigned int order;
> +
> + while (pfn < end_pfn) {
> + order = min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
This is fragile. If the preserve call spans say 4 PFNs, then it gets
preserved as a order 2 allocation, but if the PFNs are unpreserved
one-by-one, __kho_unpreserve_order() will unpreserve from the order 0
xarray, which will end up doing nothing, leaking those pages.
It should either look through all orders to find the PFN, or at least
have a requirement in the API that the same phys and size combination as
the preserve call must be given to unpreserve.
> +
> + __kho_unpreserve_order(track, pfn, order);
>
> pfn += 1 << order;
> }
> @@ -607,6 +614,29 @@ int kho_preserve_folio(struct folio *folio)
> }
> EXPORT_SYMBOL_GPL(kho_preserve_folio);
>
> +/**
> + * kho_unpreserve_folio - unpreserve a folio.
> + * @folio: folio to unpreserve.
> + *
> + * Instructs KHO to unpreserve a folio that was preserved by
> + * kho_preserve_folio() before.
> + *
> + * Return: 0 on success, error code on failure
> + */
> +int kho_unpreserve_folio(struct folio *folio)
> +{
> + const unsigned long pfn = folio_pfn(folio);
> + const unsigned int order = folio_order(folio);
> + struct kho_mem_track *track = &kho_out.ser.track;
> +
> + if (kho_out.finalized)
> + return -EBUSY;
> +
> + __kho_unpreserve_order(track, pfn, order);
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(kho_unpreserve_folio);
> +
> /**
> * kho_preserve_phys - preserve a physically contiguous range across kexec.
> * @phys: physical address of the range.
> @@ -652,6 +682,34 @@ int kho_preserve_phys(phys_addr_t phys, size_t size)
> }
> EXPORT_SYMBOL_GPL(kho_preserve_phys);
>
> +/**
> + * kho_unpreserve_phys - unpreserve a physically contiguous range across kexec.
> + * @phys: physical address of the range.
> + * @size: size of the range.
> + *
> + * Instructs KHO to unpreserve the memory range from @phys to @phys + @size
> + * across kexec.
> + *
> + * Return: 0 on success, error code on failure
> + */
> +int kho_unpreserve_phys(phys_addr_t phys, size_t size)
> +{
> + struct kho_mem_track *track = &kho_out.ser.track;
> + unsigned long pfn = PHYS_PFN(phys);
> + unsigned long end_pfn = PHYS_PFN(phys + size);
> +
> + if (kho_out.finalized)
> + return -EBUSY;
> +
> + if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size))
> + return -EINVAL;
> +
> + __kho_unpreserve(track, pfn, end_pfn);
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(kho_unpreserve_phys);
> +
> int __kho_abort(void)
> {
> int err;
--
Regards,
Pratyush Yadav
Powered by blists - more mailing lists