lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <mafs0ikg7fbez.fsf@kernel.org>
Date: Wed, 22 Oct 2025 13:01:08 +0200
From: Pratyush Yadav <pratyush@...nel.org>
To: Pasha Tatashin <pasha.tatashin@...een.com>
Cc: akpm@...ux-foundation.org,  brauner@...nel.org,  corbet@....net,
  graf@...zon.com,  jgg@...pe.ca,  linux-kernel@...r.kernel.org,
  linux-kselftest@...r.kernel.org,  linux-mm@...ck.org,
  masahiroy@...nel.org,  ojeda@...nel.org,  pratyush@...nel.org,
  rdunlap@...radead.org,  rppt@...nel.org,  tj@...nel.org
Subject: Re: [PATCHv7 3/7] kho: drop notifiers

Hi Pasha,

On Tue, Oct 21 2025, Pasha Tatashin wrote:

> From: "Mike Rapoport (Microsoft)" <rppt@...nel.org>
>
> The KHO framework uses a notifier chain as the mechanism for clients to
> participate in the finalization process. While this works for a single,
> central state machine, it is too restrictive for kernel-internal
> components like pstore/reserve_mem or IMA. These components need a
> simpler, direct way to register their state for preservation (e.g.,
> during their initcall) without being part of a complex,
> shutdown-time notifier sequence. The notifier model forces all
> participants into a single finalization flow and makes direct
> preservation from an arbitrary context difficult.
> This patch refactors the client participation model by removing the
> notifier chain and introducing a direct API for managing FDT subtrees.
>
> The core kho_finalize() and kho_abort() state machine remains, but
> clients now register their data with KHO beforehand.
>
> Signed-off-by: Mike Rapoport (Microsoft) <rppt@...nel.org>
> Co-developed-by: Pasha Tatashin <pasha.tatashin@...een.com>
> Signed-off-by: Pasha Tatashin <pasha.tatashin@...een.com>
> ---
>  include/linux/kexec_handover.h   |  28 +-----
>  kernel/kexec_handover.c          | 164 +++++++++++++++++--------------
>  kernel/kexec_handover_debugfs.c  |  17 ++--
>  kernel/kexec_handover_internal.h |   5 +-
>  lib/test_kho.c                   |  30 +-----
>  mm/memblock.c                    |  62 +++---------
>  6 files changed, 127 insertions(+), 179 deletions(-)
>
> diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
> index 04d0108db98e..2faf290803ce 100644
> --- a/include/linux/kexec_handover.h
> +++ b/include/linux/kexec_handover.h
> @@ -10,14 +10,7 @@ struct kho_scratch {
>  	phys_addr_t size;
>  };
>  
> -/* KHO Notifier index */
> -enum kho_event {
> -	KEXEC_KHO_FINALIZE = 0,
> -	KEXEC_KHO_ABORT = 1,
> -};
> -
>  struct folio;
> -struct notifier_block;
>  struct page;
>  
>  #define DECLARE_KHOSER_PTR(name, type) \
> @@ -37,8 +30,6 @@ struct page;
>  		(typeof((s).ptr))((s).phys ? phys_to_virt((s).phys) : NULL); \
>  	})
>  
> -struct kho_serialization;
> -
>  struct kho_vmalloc_chunk;
>  struct kho_vmalloc {
>  	DECLARE_KHOSER_PTR(first, struct kho_vmalloc_chunk *);
> @@ -57,12 +48,10 @@ int kho_preserve_vmalloc(void *ptr, struct kho_vmalloc *preservation);
>  struct folio *kho_restore_folio(phys_addr_t phys);
>  struct page *kho_restore_pages(phys_addr_t phys, unsigned int nr_pages);
>  void *kho_restore_vmalloc(const struct kho_vmalloc *preservation);
> -int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt);
> +int kho_add_subtree(const char *name, void *fdt);
> +void kho_remove_subtree(void *fdt);
>  int kho_retrieve_subtree(const char *name, phys_addr_t *phys);
>  
> -int register_kho_notifier(struct notifier_block *nb);
> -int unregister_kho_notifier(struct notifier_block *nb);
> -
>  void kho_memory_init(void);
>  
>  void kho_populate(phys_addr_t fdt_phys, u64 fdt_len, phys_addr_t scratch_phys,
> @@ -114,23 +103,16 @@ static inline void *kho_restore_vmalloc(const struct kho_vmalloc *preservation)
>  	return NULL;
>  }
>  
> -static inline int kho_add_subtree(struct kho_serialization *ser,
> -				  const char *name, void *fdt)
> +static inline int kho_add_subtree(const char *name, void *fdt)
>  {
>  	return -EOPNOTSUPP;
>  }
>  
> -static inline int kho_retrieve_subtree(const char *name, phys_addr_t *phys)
> +static inline void kho_remove_subtree(void *fdt)
>  {
> -	return -EOPNOTSUPP;
>  }
>  
> -static inline int register_kho_notifier(struct notifier_block *nb)
> -{
> -	return -EOPNOTSUPP;
> -}
> -
> -static inline int unregister_kho_notifier(struct notifier_block *nb)
> +static inline int kho_retrieve_subtree(const char *name, phys_addr_t *phys)
>  {
>  	return -EOPNOTSUPP;
>  }
> diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c
> index da071277d85e..0a4234269fe5 100644
> --- a/kernel/kexec_handover.c
> +++ b/kernel/kexec_handover.c
> @@ -16,7 +16,6 @@
>  #include <linux/libfdt.h>
>  #include <linux/list.h>
>  #include <linux/memblock.h>
> -#include <linux/notifier.h>
>  #include <linux/page-isolation.h>
>  #include <linux/vmalloc.h>
>  
> @@ -103,29 +102,34 @@ struct kho_mem_track {
>  
>  struct khoser_mem_chunk;
>  
> -struct kho_serialization {
> -	struct page *fdt;
> -	struct kho_mem_track track;
> -	/* First chunk of serialized preserved memory map */
> -	struct khoser_mem_chunk *preserved_mem_map;
> +struct kho_sub_fdt {
> +	struct list_head l;
> +	const char *name;
> +	void *fdt;
>  };
>  
>  struct kho_out {
> -	struct blocking_notifier_head chain_head;
> -	struct mutex lock; /* protects KHO FDT finalization */
> -	struct kho_serialization ser;
> +	void *fdt;
>  	bool finalized;
> +	struct mutex lock; /* protects KHO FDT finalization */
> +
> +	struct list_head sub_fdts;
> +	struct mutex fdts_lock;
> +
> +	struct kho_mem_track track;
> +	/* First chunk of serialized preserved memory map */
> +	struct khoser_mem_chunk *preserved_mem_map;
> +
>  	struct kho_debugfs dbg;
>  };
>  
>  static struct kho_out kho_out = {
> -	.chain_head = BLOCKING_NOTIFIER_INIT(kho_out.chain_head),
>  	.lock = __MUTEX_INITIALIZER(kho_out.lock),
> -	.ser = {
> -		.track = {
> -			.orders = XARRAY_INIT(kho_out.ser.track.orders, 0),
> -		},
> +	.track = {
> +		.orders = XARRAY_INIT(kho_out.track.orders, 0),
>  	},
> +	.sub_fdts = LIST_HEAD_INIT(kho_out.sub_fdts),
> +	.fdts_lock = __MUTEX_INITIALIZER(kho_out.fdts_lock),
>  	.finalized = false,
>  };
>  
> @@ -369,7 +373,7 @@ static void kho_mem_ser_free(struct khoser_mem_chunk *first_chunk)
>  	}
>  }
>  
> -static int kho_mem_serialize(struct kho_serialization *ser)
> +static int kho_mem_serialize(struct kho_out *kho_out)
>  {
>  	struct khoser_mem_chunk *first_chunk = NULL;
>  	struct khoser_mem_chunk *chunk = NULL;
> @@ -377,7 +381,7 @@ static int kho_mem_serialize(struct kho_serialization *ser)
>  	unsigned long order;
>  	int err = -ENOMEM;
>  
> -	xa_for_each(&ser->track.orders, order, physxa) {
> +	xa_for_each(&kho_out->track.orders, order, physxa) {
>  		struct kho_mem_phys_bits *bits;
>  		unsigned long phys;
>  
> @@ -409,7 +413,7 @@ static int kho_mem_serialize(struct kho_serialization *ser)
>  		}
>  	}
>  
> -	ser->preserved_mem_map = first_chunk;
> +	kho_out->preserved_mem_map = first_chunk;
>  
>  	return 0;
>  
> @@ -670,7 +674,6 @@ static void __init kho_reserve_scratch(void)
>  
>  /**
>   * kho_add_subtree - record the physical address of a sub FDT in KHO root tree.
> - * @ser: serialization control object passed by KHO notifiers.
>   * @name: name of the sub tree.
>   * @fdt: the sub tree blob.
>   *
> @@ -684,34 +687,45 @@ static void __init kho_reserve_scratch(void)
>   *
>   * Return: 0 on success, error code on failure
>   */
> -int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt)
> +int kho_add_subtree(const char *name, void *fdt)
>  {
> -	int err = 0;
> -	u64 phys = (u64)virt_to_phys(fdt);
> -	void *root = page_to_virt(ser->fdt);
> +	struct kho_sub_fdt *sub_fdt;
> +	int err;
>  
> -	err |= fdt_begin_node(root, name);
> -	err |= fdt_property(root, PROP_SUB_FDT, &phys, sizeof(phys));
> -	err |= fdt_end_node(root);
> +	sub_fdt = kmalloc(sizeof(*sub_fdt), GFP_KERNEL);
> +	if (!sub_fdt)
> +		return -ENOMEM;
>  
> -	if (err)
> -		return err;
> +	INIT_LIST_HEAD(&sub_fdt->l);
> +	sub_fdt->name = name;
> +	sub_fdt->fdt = fdt;
>  
> -	return kho_debugfs_fdt_add(&kho_out.dbg, name, fdt, false);
> +	mutex_lock(&kho_out.fdts_lock);
> +	list_add_tail(&sub_fdt->l, &kho_out.sub_fdts);
> +	err = kho_debugfs_fdt_add(&kho_out.dbg, name, fdt, false);

I think you should remove sub_fdt from the list and kfree() it on error
here. Otherwise we signal an error to the caller and they might free
sub_fdt->fdt, which will later result in a use-after-free at
__kho_finalize().

> +	mutex_unlock(&kho_out.fdts_lock);
> +
> +	return err;
>  }
>  EXPORT_SYMBOL_GPL(kho_add_subtree);
>  
> -int register_kho_notifier(struct notifier_block *nb)
> +void kho_remove_subtree(void *fdt)
>  {
> -	return blocking_notifier_chain_register(&kho_out.chain_head, nb);
> -}
> -EXPORT_SYMBOL_GPL(register_kho_notifier);
> +	struct kho_sub_fdt *sub_fdt;
> +
> +	mutex_lock(&kho_out.fdts_lock);
> +	list_for_each_entry(sub_fdt, &kho_out.sub_fdts, l) {

list_for_each_entry_safe() here since we delete.

> +		if (sub_fdt->fdt == fdt) {
> +			list_del(&sub_fdt->l);
> +			kfree(sub_fdt);
> +			kho_debugfs_fdt_remove(&kho_out.dbg, fdt);
> +			break;
> +		}
> +	}
> +	mutex_unlock(&kho_out.fdts_lock);
>  
> -int unregister_kho_notifier(struct notifier_block *nb)
> -{
> -	return blocking_notifier_chain_unregister(&kho_out.chain_head, nb);
>  }
> -EXPORT_SYMBOL_GPL(unregister_kho_notifier);
> +EXPORT_SYMBOL_GPL(kho_remove_subtree);
>  
>  /**
>   * kho_preserve_folio - preserve a folio across kexec.
> @@ -726,7 +740,7 @@ int kho_preserve_folio(struct folio *folio)
>  {
>  	const unsigned long pfn = folio_pfn(folio);
>  	const unsigned int order = folio_order(folio);
> -	struct kho_mem_track *track = &kho_out.ser.track;
> +	struct kho_mem_track *track = &kho_out.track;
>  
>  	if (WARN_ON(kho_scratch_overlap(pfn << PAGE_SHIFT, PAGE_SIZE << order)))
>  		return -EINVAL;
> @@ -747,7 +761,7 @@ EXPORT_SYMBOL_GPL(kho_preserve_folio);
>   */
>  int kho_preserve_pages(struct page *page, unsigned int nr_pages)
>  {
> -	struct kho_mem_track *track = &kho_out.ser.track;
> +	struct kho_mem_track *track = &kho_out.track;
>  	const unsigned long start_pfn = page_to_pfn(page);
>  	const unsigned long end_pfn = start_pfn + nr_pages;
>  	unsigned long pfn = start_pfn;
> @@ -848,7 +862,7 @@ static struct kho_vmalloc_chunk *new_vmalloc_chunk(struct kho_vmalloc_chunk *cur
>  
>  static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk)
>  {
> -	struct kho_mem_track *track = &kho_out.ser.track;
> +	struct kho_mem_track *track = &kho_out.track;
>  	unsigned long pfn = PHYS_PFN(virt_to_phys(chunk));
>  
>  	__kho_unpreserve(track, pfn, pfn + 1);
> @@ -1030,11 +1044,11 @@ EXPORT_SYMBOL_GPL(kho_restore_vmalloc);
>  
>  static int __kho_abort(void)
>  {
> -	int err;
> +	int err = 0;
>  	unsigned long order;
>  	struct kho_mem_phys *physxa;
>  
> -	xa_for_each(&kho_out.ser.track.orders, order, physxa) {
> +	xa_for_each(&kho_out.track.orders, order, physxa) {
>  		struct kho_mem_phys_bits *bits;
>  		unsigned long phys;
>  
> @@ -1044,17 +1058,13 @@ static int __kho_abort(void)
>  		xa_destroy(&physxa->phys_bits);
>  		kfree(physxa);
>  	}
> -	xa_destroy(&kho_out.ser.track.orders);
> +	xa_destroy(&kho_out.track.orders);
>  
> -	if (kho_out.ser.preserved_mem_map) {
> -		kho_mem_ser_free(kho_out.ser.preserved_mem_map);
> -		kho_out.ser.preserved_mem_map = NULL;
> +	if (kho_out.preserved_mem_map) {
> +		kho_mem_ser_free(kho_out.preserved_mem_map);
> +		kho_out.preserved_mem_map = NULL;
>  	}
>  
> -	err = blocking_notifier_call_chain(&kho_out.chain_head, KEXEC_KHO_ABORT,
> -					   NULL);
> -	err = notifier_to_errno(err);
> -
>  	if (err)
>  		pr_err("Failed to abort KHO finalization: %d\n", err);
>  
> @@ -1077,7 +1087,8 @@ int kho_abort(void)
>  		return ret;
>  
>  	kho_out.finalized = false;
> -	kho_debugfs_cleanup(&kho_out.dbg);
> +
> +	kho_debugfs_fdt_remove(&kho_out.dbg, kho_out.fdt);
>  
>  	return 0;
>  }
> @@ -1086,41 +1097,46 @@ static int __kho_finalize(void)
>  {
>  	int err = 0;
>  	u64 *preserved_mem_map;
> -	void *fdt = page_to_virt(kho_out.ser.fdt);
> +	void *root = kho_out.fdt;
> +	struct kho_sub_fdt *fdt;
>  
> -	err |= fdt_create(fdt, PAGE_SIZE);
> -	err |= fdt_finish_reservemap(fdt);
> -	err |= fdt_begin_node(fdt, "");
> -	err |= fdt_property_string(fdt, "compatible", KHO_FDT_COMPATIBLE);
> +	err |= fdt_create(root, PAGE_SIZE);
> +	err |= fdt_finish_reservemap(root);
> +	err |= fdt_begin_node(root, "");
> +	err |= fdt_property_string(root, "compatible", KHO_FDT_COMPATIBLE);
>  	/**
>  	 * Reserve the preserved-memory-map property in the root FDT, so
>  	 * that all property definitions will precede subnodes created by
>  	 * KHO callers.
>  	 */
> -	err |= fdt_property_placeholder(fdt, PROP_PRESERVED_MEMORY_MAP,
> +	err |= fdt_property_placeholder(root, PROP_PRESERVED_MEMORY_MAP,
>  					sizeof(*preserved_mem_map),
>  					(void **)&preserved_mem_map);
>  	if (err)
>  		goto abort;
>  
> -	err = kho_preserve_folio(page_folio(kho_out.ser.fdt));
> +	err = kho_preserve_folio(virt_to_folio(kho_out.fdt));
>  	if (err)
>  		goto abort;
>  
> -	err = blocking_notifier_call_chain(&kho_out.chain_head,
> -					   KEXEC_KHO_FINALIZE, &kho_out.ser);
> -	err = notifier_to_errno(err);
> +	err = kho_mem_serialize(&kho_out);
>  	if (err)
>  		goto abort;
>  
> -	err = kho_mem_serialize(&kho_out.ser);
> -	if (err)
> -		goto abort;
> +	*preserved_mem_map = (u64)virt_to_phys(kho_out.preserved_mem_map);
> +
> +	mutex_lock(&kho_out.fdts_lock);
> +	list_for_each_entry(fdt, &kho_out.sub_fdts, l) {
> +		phys_addr_t phys = virt_to_phys(fdt->fdt);
>  
> -	*preserved_mem_map = (u64)virt_to_phys(kho_out.ser.preserved_mem_map);
> +		err |= fdt_begin_node(root, fdt->name);
> +		err |= fdt_property(root, PROP_SUB_FDT, &phys, sizeof(phys));
> +		err |= fdt_end_node(root);
> +	}
> +	mutex_unlock(&kho_out.fdts_lock);
>  
> -	err |= fdt_end_node(fdt);
> -	err |= fdt_finish(fdt);
> +	err |= fdt_end_node(root);
> +	err |= fdt_finish(root);
>  
>  abort:
>  	if (err) {
> @@ -1149,7 +1165,7 @@ int kho_finalize(void)
>  	kho_out.finalized = true;
>  
>  	return kho_debugfs_fdt_add(&kho_out.dbg, "fdt",
> -				   page_to_virt(kho_out.ser.fdt), true);
> +				  kho_out.fdt, true);
>  }
>  
>  bool kho_finalized(void)
> @@ -1232,15 +1248,17 @@ static __init int kho_init(void)
>  {
>  	int err = 0;
>  	const void *fdt = kho_get_fdt();
> +	struct page *fdt_page;
>  
>  	if (!kho_enable)
>  		return 0;
>  
> -	kho_out.ser.fdt = alloc_page(GFP_KERNEL);
> -	if (!kho_out.ser.fdt) {
> +	fdt_page = alloc_page(GFP_KERNEL);
> +	if (!fdt_page) {
>  		err = -ENOMEM;
>  		goto err_free_scratch;
>  	}
> +	kho_out.fdt = page_to_virt(fdt_page);
>  
>  	err = kho_debugfs_init();
>  	if (err)
> @@ -1268,8 +1286,8 @@ static __init int kho_init(void)
>  	return 0;
>  
>  err_free_fdt:
> -	put_page(kho_out.ser.fdt);
> -	kho_out.ser.fdt = NULL;
> +	put_page(fdt_page);
> +	kho_out.fdt = NULL;
>  err_free_scratch:
>  	for (int i = 0; i < kho_scratch_cnt; i++) {
>  		void *start = __va(kho_scratch[i].addr);
> @@ -1280,7 +1298,7 @@ static __init int kho_init(void)
>  	kho_enable = false;
>  	return err;
>  }
> -late_initcall(kho_init);
> +fs_initcall(kho_init);

Is this change related to this patch? Also, why fs_initcall?

>  
>  static void __init kho_release_scratch(void)
>  {
> @@ -1416,7 +1434,7 @@ int kho_fill_kimage(struct kimage *image)
>  	if (!kho_out.finalized)
>  		return 0;
>  
> -	image->kho.fdt = page_to_phys(kho_out.ser.fdt);
> +	image->kho.fdt = virt_to_phys(kho_out.fdt);
>  
>  	scratch_size = sizeof(*kho_scratch) * kho_scratch_cnt;
>  	scratch = (struct kexec_buf){
> diff --git a/kernel/kexec_handover_debugfs.c b/kernel/kexec_handover_debugfs.c
> index a91b279f1b23..46e9e6c0791f 100644
> --- a/kernel/kexec_handover_debugfs.c
> +++ b/kernel/kexec_handover_debugfs.c
> @@ -61,14 +61,17 @@ int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name,
>  	return __kho_debugfs_fdt_add(&dbg->fdt_list, dir, name, fdt);
>  }
>  
> -void kho_debugfs_cleanup(struct kho_debugfs *dbg)
> +void kho_debugfs_fdt_remove(struct kho_debugfs *dbg, void *fdt)
>  {
> -	struct fdt_debugfs *ff, *tmp;
> -
> -	list_for_each_entry_safe(ff, tmp, &dbg->fdt_list, list) {
> -		debugfs_remove(ff->file);
> -		list_del(&ff->list);
> -		kfree(ff);
> +	struct fdt_debugfs *ff;
> +
> +	list_for_each_entry(ff, &dbg->fdt_list, list) {

list_for_each_entry_safe() here too.

> +		if (ff->wrapper.data == fdt) {
> +			debugfs_remove(ff->file);
> +			list_del(&ff->list);
> +			kfree(ff);
> +			break;
> +		}
>  	}
>  }
>  
> diff --git a/kernel/kexec_handover_internal.h b/kernel/kexec_handover_internal.h
> index 28c0e971613d..17ae101dc6ae 100644
> --- a/kernel/kexec_handover_internal.h
> +++ b/kernel/kexec_handover_internal.h
> @@ -30,7 +30,7 @@ void kho_in_debugfs_init(struct kho_debugfs *dbg, const void *fdt);
>  int kho_out_debugfs_init(struct kho_debugfs *dbg);
>  int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name,
>  			const void *fdt, bool root);
> -void kho_debugfs_cleanup(struct kho_debugfs *dbg);
> +void kho_debugfs_fdt_remove(struct kho_debugfs *dbg, void *fdt);
>  #else
>  static inline int kho_debugfs_init(void) { return 0; }
>  static inline void kho_in_debugfs_init(struct kho_debugfs *dbg,
> @@ -38,7 +38,8 @@ static inline void kho_in_debugfs_init(struct kho_debugfs *dbg,
>  static inline int kho_out_debugfs_init(struct kho_debugfs *dbg) { return 0; }
>  static inline int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name,
>  				      const void *fdt, bool root) { return 0; }
> -static inline void kho_debugfs_cleanup(struct kho_debugfs *dbg) {}
> +static inline void kho_debugfs_fdt_remove(struct kho_debugfs *dbg,
> +					  void *fdt) { }
>  #endif /* CONFIG_KEXEC_HANDOVER_DEBUGFS */
>  
>  #ifdef CONFIG_KEXEC_HANDOVER_DEBUG
> diff --git a/lib/test_kho.c b/lib/test_kho.c
> index 60cd899ea745..8d57049e8c8c 100644
> --- a/lib/test_kho.c
> +++ b/lib/test_kho.c
> @@ -39,33 +39,17 @@ struct kho_test_state {
>  
>  static struct kho_test_state kho_test_state;
>  
> -static int kho_test_notifier(struct notifier_block *self, unsigned long cmd,
> -			     void *v)
> +static int kho_test(void)
>  {
>  	struct kho_test_state *state = &kho_test_state;
> -	struct kho_serialization *ser = v;
>  	int err = 0;
>  
> -	switch (cmd) {
> -	case KEXEC_KHO_ABORT:
> -		return NOTIFY_DONE;
> -	case KEXEC_KHO_FINALIZE:
> -		/* Handled below */
> -		break;
> -	default:
> -		return NOTIFY_BAD;
> -	}
> -
>  	err |= kho_preserve_folio(state->fdt);
> -	err |= kho_add_subtree(ser, KHO_TEST_FDT, folio_address(state->fdt));
> +	err |= kho_add_subtree(KHO_TEST_FDT, folio_address(state->fdt));
>  
>  	return err ? NOTIFY_BAD : NOTIFY_DONE;
>  }
>  
> -static struct notifier_block kho_test_nb = {
> -	.notifier_call = kho_test_notifier,
> -};
> -
>  static int kho_test_save_data(struct kho_test_state *state, void *fdt)
>  {
>  	phys_addr_t *folios_info __free(kvfree) = NULL;
> @@ -102,6 +86,9 @@ static int kho_test_save_data(struct kho_test_state *state, void *fdt)
>  	if (!err)
>  		state->folios_info = no_free_ptr(folios_info);
>  
> +	if (!err)
> +		err = kho_test();

This name is very undescriptive. Also, not the right place to add the
subtree since the FDT isn't finished yet. I think it should be done from
kho_test_save() instead. This patch is also missing removing the subtree
at exit, and that can cause a UAF.

I sent you a patch earlier with my take on how it should be done. I
still think that is the way to go:
https://lore.kernel.org/all/mafs0347woui2.fsf@kernel.org/

> +
>  	return err;
>  }
>  
> @@ -203,14 +190,8 @@ static int kho_test_save(void)
>  	if (err)
>  		goto err_free_folios;
>  
> -	err = register_kho_notifier(&kho_test_nb);
> -	if (err)
> -		goto err_free_fdt;
> -
>  	return 0;
>  
> -err_free_fdt:
> -	folio_put(state->fdt);
>  err_free_folios:
>  	kvfree(folios);
>  	return err;
> @@ -326,7 +307,6 @@ static void kho_test_cleanup(void)
>  
>  static void __exit kho_test_exit(void)
>  {
> -	unregister_kho_notifier(&kho_test_nb);
>  	kho_test_cleanup();
>  }
>  module_exit(kho_test_exit);
> diff --git a/mm/memblock.c b/mm/memblock.c
> index e23e16618e9b..e3bef9b35d63 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -2444,53 +2444,18 @@ int reserve_mem_release_by_name(const char *name)
>  #define MEMBLOCK_KHO_FDT "memblock"
>  #define MEMBLOCK_KHO_NODE_COMPATIBLE "memblock-v1"
>  #define RESERVE_MEM_KHO_NODE_COMPATIBLE "reserve-mem-v1"
> -static struct page *kho_fdt;
> -
> -static int reserve_mem_kho_finalize(struct kho_serialization *ser)
> -{
> -	int err = 0, i;
> -
> -	for (i = 0; i < reserved_mem_count; i++) {
> -		struct reserve_mem_table *map = &reserved_mem_table[i];
> -		struct page *page = phys_to_page(map->start);
> -		unsigned int nr_pages = map->size >> PAGE_SHIFT;
> -
> -		err |= kho_preserve_pages(page, nr_pages);
> -	}
> -
> -	err |= kho_preserve_folio(page_folio(kho_fdt));
> -	err |= kho_add_subtree(ser, MEMBLOCK_KHO_FDT, page_to_virt(kho_fdt));
> -
> -	return notifier_from_errno(err);
> -}
> -
> -static int reserve_mem_kho_notifier(struct notifier_block *self,
> -				    unsigned long cmd, void *v)
> -{
> -	switch (cmd) {
> -	case KEXEC_KHO_FINALIZE:
> -		return reserve_mem_kho_finalize((struct kho_serialization *)v);
> -	case KEXEC_KHO_ABORT:
> -		return NOTIFY_DONE;
> -	default:
> -		return NOTIFY_BAD;
> -	}
> -}
> -
> -static struct notifier_block reserve_mem_kho_nb = {
> -	.notifier_call = reserve_mem_kho_notifier,
> -};
>  
>  static int __init prepare_kho_fdt(void)
>  {
>  	int err = 0, i;
> +	struct page *fdt_page;
>  	void *fdt;
>  
> -	kho_fdt = alloc_page(GFP_KERNEL);
> -	if (!kho_fdt)
> +	fdt_page = alloc_page(GFP_KERNEL);
> +	if (!fdt_page)
>  		return -ENOMEM;
>  
> -	fdt = page_to_virt(kho_fdt);
> +	fdt = page_to_virt(fdt_page);
>  
>  	err |= fdt_create(fdt, PAGE_SIZE);
>  	err |= fdt_finish_reservemap(fdt);
> @@ -2499,7 +2464,10 @@ static int __init prepare_kho_fdt(void)
>  	err |= fdt_property_string(fdt, "compatible", MEMBLOCK_KHO_NODE_COMPATIBLE);
>  	for (i = 0; i < reserved_mem_count; i++) {
>  		struct reserve_mem_table *map = &reserved_mem_table[i];
> +		struct page *page = phys_to_page(map->start);
> +		unsigned int nr_pages = map->size >> PAGE_SHIFT;
>  
> +		err |= kho_preserve_pages(page, nr_pages);
>  		err |= fdt_begin_node(fdt, map->name);
>  		err |= fdt_property_string(fdt, "compatible", RESERVE_MEM_KHO_NODE_COMPATIBLE);
>  		err |= fdt_property(fdt, "start", &map->start, sizeof(map->start));
> @@ -2507,13 +2475,16 @@ static int __init prepare_kho_fdt(void)
>  		err |= fdt_end_node(fdt);
>  	}
>  	err |= fdt_end_node(fdt);
> -
>  	err |= fdt_finish(fdt);
>  
> +	err |= kho_preserve_folio(page_folio(fdt_page));
> +
> +	if (!err)
> +		err = kho_add_subtree(MEMBLOCK_KHO_FDT, fdt);
> +
>  	if (err) {
>  		pr_err("failed to prepare memblock FDT for KHO: %d\n", err);
> -		put_page(kho_fdt);
> -		kho_fdt = NULL;
> +		put_page(fdt_page);
>  	}
>  
>  	return err;
> @@ -2529,13 +2500,6 @@ static int __init reserve_mem_init(void)
>  	err = prepare_kho_fdt();
>  	if (err)
>  		return err;
> -
> -	err = register_kho_notifier(&reserve_mem_kho_nb);
> -	if (err) {
> -		put_page(kho_fdt);
> -		kho_fdt = NULL;
> -	}
> -
>  	return err;
>  }
>  late_initcall(reserve_mem_init);

-- 
Regards,
Pratyush Yadav

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ