lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aFkDBNpzcCNdqjm8@kernel.org>
Date: Mon, 23 Jun 2025 10:32:20 +0300
From: Mike Rapoport <rppt@...nel.org>
To: Pasha Tatashin <pasha.tatashin@...een.com>
Cc: Pratyush Yadav <pratyush@...nel.org>, Jason Gunthorpe <jgg@...pe.ca>,
	jasonmiu@...gle.com, graf@...zon.com, changyuanl@...gle.com,
	dmatlack@...gle.com, rientjes@...gle.com, corbet@....net,
	rdunlap@...radead.org, ilpo.jarvinen@...ux.intel.com,
	kanie@...ux.alibaba.com, ojeda@...nel.org, aliceryhl@...gle.com,
	masahiroy@...nel.org, akpm@...ux-foundation.org, tj@...nel.org,
	yoann.congal@...le.fr, mmaurer@...gle.com, roman.gushchin@...ux.dev,
	chenridong@...wei.com, axboe@...nel.dk, mark.rutland@....com,
	jannh@...gle.com, vincent.guittot@...aro.org, hannes@...xchg.org,
	dan.j.williams@...el.com, david@...hat.com,
	joel.granados@...nel.org, rostedt@...dmis.org,
	anna.schumaker@...cle.com, song@...nel.org, zhangguopeng@...inos.cn,
	linux@...ssschuh.net, linux-kernel@...r.kernel.org,
	linux-doc@...r.kernel.org, linux-mm@...ck.org,
	gregkh@...uxfoundation.org, tglx@...utronix.de, mingo@...hat.com,
	bp@...en8.de, dave.hansen@...ux.intel.com, x86@...nel.org,
	hpa@...or.com, rafael@...nel.org, dakr@...nel.org,
	bartosz.golaszewski@...aro.org, cw00.choi@...sung.com,
	myungjoo.ham@...sung.com, yesanishhere@...il.com,
	Jonathan.Cameron@...wei.com, quic_zijuhu@...cinc.com,
	aleksander.lobakin@...el.com, ira.weiny@...el.com,
	andriy.shevchenko@...ux.intel.com, leon@...nel.org, lukas@...ner.de,
	bhelgaas@...gle.com, wagi@...nel.org, djeffery@...hat.com,
	stuart.w.hayes@...il.com
Subject: Re: [RFC v2 05/16] luo: luo_core: integrate with KHO

On Wed, Jun 18, 2025 at 01:43:18PM -0400, Pasha Tatashin wrote:
> On Wed, Jun 18, 2025 at 1:00 PM Pasha Tatashin
>
> So currently, KHO provides the following two types of  internal API:
> 
> Preserve memory and metadata
> =========================
> kho_preserve_folio() / kho_preserve_phys()
> kho_unpreserve_folio() / kho_unpreserve_phys()
> kho_restore_folio()
> 
> kho_add_subtree() kho_retrieve_subtree()
> 
> State machine
> ===========
> register_kho_notifier() / unregister_kho_notifier()
> 
> kho_finalize() / kho_abort()
> 
> We should remove the "State machine", and only keep the "Preserve
> Memory" API functions. At the time these functions are called, KHO
> should do the magic of making sure that the memory gets preserved
> across the reboot.
> 
> This way, reserve_mem_init() would call: kho_preserve_folio() and
> kho_add_subtree() during boot, and be done with it.

I agree that there's no need in notifiers.

I even have a half cooked patch for this on top of "kho: allow to drive kho
from within kernel"

>From 02716e4731480bde997a9c1676b7246aa8e358de Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" <rppt@...nel.org>
Date: Sun, 22 Jun 2025 14:37:17 +0300
Subject: [PATCH] kho: drop notifiers

Signed-off-by: Mike Rapoport (Microsoft) <rppt@...nel.org>
---
 include/linux/kexec_handover.h   |  27 +-------
 kernel/kexec_handover.c          | 114 ++++++++++++++-----------------
 kernel/kexec_handover_debug.c    |   3 +-
 kernel/kexec_handover_internal.h |   3 +-
 mm/memblock.c                    |  56 +++------------
 5 files changed, 65 insertions(+), 138 deletions(-)

diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
index f98565def593..ac9cb6eae71f 100644
--- a/include/linux/kexec_handover.h
+++ b/include/linux/kexec_handover.h
@@ -10,14 +10,7 @@ struct kho_scratch {
 	phys_addr_t size;
 };
 
-/* KHO Notifier index */
-enum kho_event {
-	KEXEC_KHO_FINALIZE = 0,
-	KEXEC_KHO_ABORT = 1,
-};
-
 struct folio;
-struct notifier_block;
 
 #define DECLARE_KHOSER_PTR(name, type) \
 	union {                        \
@@ -36,20 +29,15 @@ struct notifier_block;
 		(typeof((s).ptr))((s).phys ? phys_to_virt((s).phys) : NULL); \
 	})
 
-struct kho_serialization;
-
 #ifdef CONFIG_KEXEC_HANDOVER
 bool kho_is_enabled(void);
 
 int kho_preserve_folio(struct folio *folio);
 int kho_preserve_phys(phys_addr_t phys, size_t size);
 struct folio *kho_restore_folio(phys_addr_t phys);
-int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt);
+int kho_add_subtree(const char *name, void *fdt);
 int kho_retrieve_subtree(const char *name, phys_addr_t *phys);
 
-int register_kho_notifier(struct notifier_block *nb);
-int unregister_kho_notifier(struct notifier_block *nb);
-
 void kho_memory_init(void);
 
 void kho_populate(phys_addr_t fdt_phys, u64 fdt_len, phys_addr_t scratch_phys,
@@ -79,8 +67,7 @@ static inline struct folio *kho_restore_folio(phys_addr_t phys)
 	return NULL;
 }
 
-static inline int kho_add_subtree(struct kho_serialization *ser,
-				  const char *name, void *fdt)
+static inline int kho_add_subtree(const char *name, void *fdt)
 {
 	return -EOPNOTSUPP;
 }
@@ -90,16 +77,6 @@ static inline int kho_retrieve_subtree(const char *name, phys_addr_t *phys)
 	return -EOPNOTSUPP;
 }
 
-static inline int register_kho_notifier(struct notifier_block *nb)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline int unregister_kho_notifier(struct notifier_block *nb)
-{
-	return -EOPNOTSUPP;
-}
-
 static inline void kho_memory_init(void)
 {
 }
diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c
index 176eaf2c31ab..b609eaf92550 100644
--- a/kernel/kexec_handover.c
+++ b/kernel/kexec_handover.c
@@ -15,7 +15,6 @@
 #include <linux/libfdt.h>
 #include <linux/list.h>
 #include <linux/memblock.h>
-#include <linux/notifier.h>
 #include <linux/page-isolation.h>
 
 #include <asm/early_ioremap.h>
@@ -552,7 +551,6 @@ static void __init kho_reserve_scratch(void)
 
 /**
  * kho_add_subtree - record the physical address of a sub FDT in KHO root tree.
- * @ser: serialization control object passed by KHO notifiers.
  * @name: name of the sub tree.
  * @fdt: the sub tree blob.
  *
@@ -566,11 +564,12 @@ static void __init kho_reserve_scratch(void)
  *
  * Return: 0 on success, error code on failure
  */
-int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt)
+int kho_add_subtree(const char *name, void *fdt)
 {
+	struct kho_serialization *ser = &kho_out.ser;
 	int err = 0;
 	u64 phys = (u64)virt_to_phys(fdt);
-	void *root = page_to_virt(ser->fdt);
+	void *root = ser->fdt;
 
 	err |= fdt_begin_node(root, name);
 	err |= fdt_property(root, PROP_SUB_FDT, &phys, sizeof(phys));
@@ -584,7 +583,6 @@ int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt)
 EXPORT_SYMBOL_GPL(kho_add_subtree);
 
 struct kho_out kho_out = {
-	.chain_head = BLOCKING_NOTIFIER_INIT(kho_out.chain_head),
 	.lock = __MUTEX_INITIALIZER(kho_out.lock),
 	.ser = {
 		.fdt_list = LIST_HEAD_INIT(kho_out.ser.fdt_list),
@@ -595,18 +593,6 @@ struct kho_out kho_out = {
 	.finalized = false,
 };
 
-int register_kho_notifier(struct notifier_block *nb)
-{
-	return blocking_notifier_chain_register(&kho_out.chain_head, nb);
-}
-EXPORT_SYMBOL_GPL(register_kho_notifier);
-
-int unregister_kho_notifier(struct notifier_block *nb)
-{
-	return blocking_notifier_chain_unregister(&kho_out.chain_head, nb);
-}
-EXPORT_SYMBOL_GPL(unregister_kho_notifier);
-
 /**
  * kho_preserve_folio - preserve a folio across kexec.
  * @folio: folio to preserve.
@@ -676,7 +662,6 @@ EXPORT_SYMBOL_GPL(kho_preserve_phys);
 
 int __kho_abort(void)
 {
-	int err;
 	unsigned long order;
 	struct kho_mem_phys *physxa;
 
@@ -697,44 +682,15 @@ int __kho_abort(void)
 		kho_out.ser.preserved_mem_map = NULL;
 	}
 
-	err = blocking_notifier_call_chain(&kho_out.chain_head, KEXEC_KHO_ABORT,
-					   NULL);
-	err = notifier_to_errno(err);
-
-	if (err)
-		pr_err("Failed to abort KHO finalization: %d\n", err);
-
-	return err;
+	return 0;
 }
 
 int __kho_finalize(void)
 {
 	int err = 0;
-	u64 *preserved_mem_map;
-	void *fdt = page_to_virt(kho_out.ser.fdt);
-
-	err |= fdt_create(fdt, PAGE_SIZE);
-	err |= fdt_finish_reservemap(fdt);
-	err |= fdt_begin_node(fdt, "");
-	err |= fdt_property_string(fdt, "compatible", KHO_FDT_COMPATIBLE);
-	/**
-	 * Reserve the preserved-memory-map property in the root FDT, so
-	 * that all property definitions will precede subnodes created by
-	 * KHO callers.
-	 */
-	err |= fdt_property_placeholder(fdt, PROP_PRESERVED_MEMORY_MAP,
-					sizeof(*preserved_mem_map),
-					(void **)&preserved_mem_map);
-	if (err)
-		goto abort;
+	void *fdt = kho_out.ser.fdt;
 
-	err = kho_preserve_folio(page_folio(kho_out.ser.fdt));
-	if (err)
-		goto abort;
-
-	err = blocking_notifier_call_chain(&kho_out.chain_head,
-					   KEXEC_KHO_FINALIZE, &kho_out.ser);
-	err = notifier_to_errno(err);
+	err = kho_preserve_folio(page_folio(virt_to_page(kho_out.ser.fdt)));
 	if (err)
 		goto abort;
 
@@ -742,7 +698,7 @@ int __kho_finalize(void)
 	if (err)
 		goto abort;
 
-	*preserved_mem_map = (u64)virt_to_phys(kho_out.ser.preserved_mem_map);
+	*kho_out.ser.fdt_mem_map = (u64)virt_to_phys(kho_out.ser.preserved_mem_map);
 
 	err |= fdt_end_node(fdt);
 	err |= fdt_finish(fdt);
@@ -863,19 +819,13 @@ static __init int kho_init(void)
 	if (!kho_enable)
 		return 0;
 
-	kho_out.ser.fdt = alloc_page(GFP_KERNEL);
-	if (!kho_out.ser.fdt) {
-		err = -ENOMEM;
-		goto err_free_scratch;
-	}
-
 	err = kho_debugfs_init();
 	if (err)
-		goto err_free_fdt;
+		goto err_free_scratch;
 
 	err = kho_out_debugfs_init();
 	if (err)
-		goto err_free_fdt;
+		goto err_free_scratch;
 
 	if (fdt) {
 		kho_in_debugfs_init(fdt);
@@ -894,9 +844,6 @@ static __init int kho_init(void)
 
 	return 0;
 
-err_free_fdt:
-	put_page(kho_out.ser.fdt);
-	kho_out.ser.fdt = NULL;
 err_free_scratch:
 	for (int i = 0; i < kho_scratch_cnt; i++) {
 		void *start = __va(kho_scratch[i].addr);
@@ -933,10 +880,50 @@ static void __init kho_release_scratch(void)
 	}
 }
 
+static int __init kho_out_fdt_init(void)
+{
+	void *fdt;
+	int err = 0;
+
+	fdt = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+	if (!fdt)
+		return -ENOMEM;
+
+	err |= fdt_create(fdt, PAGE_SIZE);
+	err |= fdt_finish_reservemap(fdt);
+	err |= fdt_begin_node(fdt, "");
+	err |= fdt_property_string(fdt, "compatible", KHO_FDT_COMPATIBLE);
+	/**
+	 * Reserve the preserved-memory-map property in the root FDT, so
+	 * that all property definitions will precede subnodes created by
+	 * KHO callers.
+	 */
+	err |= fdt_property_placeholder(fdt, PROP_PRESERVED_MEMORY_MAP,
+					sizeof(*kho_out.ser.fdt_mem_map),
+					(void **)&kho_out.ser.fdt_mem_map);
+	if (err)
+		goto err_free_fdt;
+
+	kho_out.ser.fdt = fdt;
+	return 0;
+
+err_free_fdt:
+	memblock_free(fdt, PAGE_SIZE);
+	return err;
+}
+
 void __init kho_memory_init(void)
 {
 	struct folio *folio;
 
+	int err = kho_out_fdt_init();
+
+	if (err) {
+		pr_err("failed to allocate root FDT, disabling KHO\n");
+		kho_enable = false;
+		return;
+	}
+
 	if (kho_in.scratch_phys) {
 		kho_scratch = phys_to_virt(kho_in.scratch_phys);
 		kho_release_scratch();
@@ -1008,6 +995,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
 	}
 
 	memblock_reserve(scratch_phys, scratch_len);
+	memblock_reserve(fdt_phys, PAGE_SIZE);
 
 	/*
 	 * Now that we have a viable region of scratch memory, let's tell
@@ -1043,7 +1031,7 @@ int kho_fill_kimage(struct kimage *image)
 	if (!kho_enable)
 		return 0;
 
-	image->kho.fdt = page_to_phys(kho_out.ser.fdt);
+	image->kho.fdt = virt_to_phys(kho_out.ser.fdt);
 
 	scratch_size = sizeof(*kho_scratch) * kho_scratch_cnt;
 	scratch = (struct kexec_buf){
diff --git a/kernel/kexec_handover_debug.c b/kernel/kexec_handover_debug.c
index a15c238ec98e..a34997a1adae 100644
--- a/kernel/kexec_handover_debug.c
+++ b/kernel/kexec_handover_debug.c
@@ -62,8 +62,7 @@ int kho_out_update_debugfs_fdt(void)
 
 	if (kho_out.finalized) {
 		err = __kho_debugfs_fdt_add(&kho_out.ser.fdt_list, kho_out.dir,
-					    "fdt",
-					    page_to_virt(kho_out.ser.fdt));
+					    "fdt", kho_out.ser.fdt);
 	} else {
 		list_for_each_entry_safe(ff, tmp, &kho_out.ser.fdt_list, list) {
 			debugfs_remove(ff->file);
diff --git a/kernel/kexec_handover_internal.h b/kernel/kexec_handover_internal.h
index 0b534758d39d..bf78ecb06996 100644
--- a/kernel/kexec_handover_internal.h
+++ b/kernel/kexec_handover_internal.h
@@ -16,7 +16,8 @@ struct kho_mem_track {
 };
 
 struct kho_serialization {
-	struct page *fdt;
+	void *fdt;
+	u64 *fdt_mem_map;
 	struct list_head fdt_list;
 	struct kho_mem_track track;
 	/* First chunk of serialized preserved memory map */
diff --git a/mm/memblock.c b/mm/memblock.c
index 154f1d73b61f..6af0b51b1bb7 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -2501,51 +2501,18 @@ int reserve_mem_release_by_name(const char *name)
 #define MEMBLOCK_KHO_FDT "memblock"
 #define MEMBLOCK_KHO_NODE_COMPATIBLE "memblock-v1"
 #define RESERVE_MEM_KHO_NODE_COMPATIBLE "reserve-mem-v1"
-static struct page *kho_fdt;
-
-static int reserve_mem_kho_finalize(struct kho_serialization *ser)
-{
-	int err = 0, i;
-
-	for (i = 0; i < reserved_mem_count; i++) {
-		struct reserve_mem_table *map = &reserved_mem_table[i];
-
-		err |= kho_preserve_phys(map->start, map->size);
-	}
-
-	err |= kho_preserve_folio(page_folio(kho_fdt));
-	err |= kho_add_subtree(ser, MEMBLOCK_KHO_FDT, page_to_virt(kho_fdt));
-
-	return notifier_from_errno(err);
-}
-
-static int reserve_mem_kho_notifier(struct notifier_block *self,
-				    unsigned long cmd, void *v)
-{
-	switch (cmd) {
-	case KEXEC_KHO_FINALIZE:
-		return reserve_mem_kho_finalize((struct kho_serialization *)v);
-	case KEXEC_KHO_ABORT:
-		return NOTIFY_DONE;
-	default:
-		return NOTIFY_BAD;
-	}
-}
-
-static struct notifier_block reserve_mem_kho_nb = {
-	.notifier_call = reserve_mem_kho_notifier,
-};
 
 static int __init prepare_kho_fdt(void)
 {
 	int err = 0, i;
+	struct page *fdt_page;
 	void *fdt;
 
-	kho_fdt = alloc_page(GFP_KERNEL);
-	if (!kho_fdt)
+	fdt_page = alloc_page(GFP_KERNEL);
+	if (!fdt_page)
 		return -ENOMEM;
 
-	fdt = page_to_virt(kho_fdt);
+	fdt = page_to_virt(fdt_page);
 
 	err |= fdt_create(fdt, PAGE_SIZE);
 	err |= fdt_finish_reservemap(fdt);
@@ -2555,6 +2522,7 @@ static int __init prepare_kho_fdt(void)
 	for (i = 0; i < reserved_mem_count; i++) {
 		struct reserve_mem_table *map = &reserved_mem_table[i];
 
+		err |= kho_preserve_phys(map->start, map->size);
 		err |= fdt_begin_node(fdt, map->name);
 		err |= fdt_property_string(fdt, "compatible", RESERVE_MEM_KHO_NODE_COMPATIBLE);
 		err |= fdt_property(fdt, "start", &map->start, sizeof(map->start));
@@ -2562,13 +2530,14 @@ static int __init prepare_kho_fdt(void)
 		err |= fdt_end_node(fdt);
 	}
 	err |= fdt_end_node(fdt);
-
 	err |= fdt_finish(fdt);
 
+	err |= kho_preserve_folio(page_folio(fdt_page));
+	err |= kho_add_subtree(MEMBLOCK_KHO_FDT, fdt);
+
 	if (err) {
 		pr_err("failed to prepare memblock FDT for KHO: %d\n", err);
-		put_page(kho_fdt);
-		kho_fdt = NULL;
+		put_page(fdt_page);
 	}
 
 	return err;
@@ -2584,13 +2553,6 @@ static int __init reserve_mem_init(void)
 	err = prepare_kho_fdt();
 	if (err)
 		return err;
-
-	err = register_kho_notifier(&reserve_mem_kho_nb);
-	if (err) {
-		put_page(kho_fdt);
-		kho_fdt = NULL;
-	}
-
 	return err;
 }
 late_initcall(reserve_mem_init);
-- 
2.47.2




> Pasha
> 

-- 
Sincerely yours,
Mike.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ