[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <d843596e-0def-439b-966a-a0f10a1b7f6d@amazon.com>
Date: Tue, 19 Dec 2023 00:01:02 +0100
From: Alexander Graf <graf@...zon.com>
To: Rob Herring <robh@...nel.org>
CC: <linux-kernel@...r.kernel.org>, <linux-trace-kernel@...r.kernel.org>,
	<linux-mm@...ck.org>, <devicetree@...r.kernel.org>,
	<linux-arm-kernel@...ts.infradead.org>, <kexec@...ts.infradead.org>,
	<linux-doc@...r.kernel.org>, <x86@...nel.org>, Eric Biederman
	<ebiederm@...ssion.com>, "H. Peter Anvin" <hpa@...or.com>, Andy Lutomirski
	<luto@...nel.org>, Peter Zijlstra <peterz@...radead.org>, Steven Rostedt
	<rostedt@...dmis.org>, Andrew Morton <akpm@...ux-foundation.org>, "Mark
 Rutland" <mark.rutland@....com>, Tom Lendacky <thomas.lendacky@....com>,
	Ashish Kalra <ashish.kalra@....com>, James Gowans <jgowans@...zon.com>,
	Stanislav Kinsburskii <skinsburskii@...ux.microsoft.com>, <arnd@...db.de>,
	<pbonzini@...hat.com>, <madvenka@...ux.microsoft.com>, Anthony Yznaga
	<anthony.yznaga@...cle.com>, Usama Arif <usama.arif@...edance.com>, "David
 Woodhouse" <dwmw@...zon.co.uk>, Benjamin Herrenschmidt
	<benh@...nel.crashing.org>
Subject: Re: [PATCH 06/15] arm64: Add KHO support
Hey Rob!
On 14.12.23 23:36, Rob Herring wrote:
> On Wed, Dec 13, 2023 at 12:04:43AM +0000, Alexander Graf wrote:
>> We now have all bits in place to support KHO kexecs. This patch adds
>> awareness of KHO in the kexec file as well as boot path for arm64 and
>> adds the respective kconfig option to the architecture so that it can
>> use KHO successfully.
>>
>> Signed-off-by: Alexander Graf <graf@...zon.com>
>> ---
>>   arch/arm64/Kconfig        | 12 ++++++++++++
>>   arch/arm64/kernel/setup.c |  2 ++
>>   arch/arm64/mm/init.c      |  8 ++++++++
>>   drivers/of/fdt.c          | 41 +++++++++++++++++++++++++++++++++++++++
>>   drivers/of/kexec.c        | 36 ++++++++++++++++++++++++++++++++++
>>   5 files changed, 99 insertions(+)
>>
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index 7b071a00425d..1ba338ce7598 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -1501,6 +1501,18 @@ config ARCH_SUPPORTS_CRASH_DUMP
>>   config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
>>        def_bool CRASH_CORE
>>
>> +config KEXEC_KHO
>> +     bool "kexec handover"
>> +     depends on KEXEC
>> +     select MEMBLOCK_SCRATCH
>> +     select LIBFDT
>> +     select CMA
>> +     help
>> +       Allow kexec to hand over state across kernels by generating and
>> +       passing additional metadata to the target kernel. This is useful
>> +       to keep data or state alive across the kexec. For this to work,
>> +       both source and target kernels need to have this option enabled.
> Why do we have the same kconfig entry twice? Here and x86.
This was how the kexec config options were done when I wrote the patches 
originally. Since then, looks like Eric DeVolder has cleaned up things 
quite nicely. I'll adapt the new way.
>
>> +
>>   config TRANS_TABLE
>>        def_bool y
>>        depends on HIBERNATION || KEXEC_CORE
>> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
>> index 417a8a86b2db..8035b673d96d 100644
>> --- a/arch/arm64/kernel/setup.c
>> +++ b/arch/arm64/kernel/setup.c
>> @@ -346,6 +346,8 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
>>
>>        paging_init();
>>
>> +     kho_reserve_mem();
>> +
>>        acpi_table_upgrade();
>>
>>        /* Parse the ACPI tables for possible boot-time configuration */
>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
>> index 74c1db8ce271..254d82f3383a 100644
>> --- a/arch/arm64/mm/init.c
>> +++ b/arch/arm64/mm/init.c
>> @@ -358,6 +358,8 @@ void __init bootmem_init(void)
>>         */
>>        arch_reserve_crashkernel();
>>
>> +     kho_reserve();
>> +
> reserve what? It is not obvious what the difference between
> kho_reserve_mem() and kho_reserve() are.
Yeah, I agree. I was struggling to find good names for them. What they 
do is:
kho_reserve() - Reserve CMA memory for later kexec. We use this memory 
region as scratch memory later.
kho_reserve_mem() - Post-KHO. Creates memory reservations inside 
memblocks for pre-KHO handed over memory.
For v2, I'll change them to kho_reserve_scratch() and 
kho_reserve_previous_mem() unless you have better ideas :)
>
>>        memblock_dump_all();
>>   }
>>
>> @@ -386,6 +388,12 @@ void __init mem_init(void)
>>        /* this will put all unused low memory onto the freelists */
>>        memblock_free_all();
>>
>> +     /*
>> +      * Now that all KHO pages are marked as reserved, let's flip them back
>> +      * to normal pages with accurate refcount.
>> +      */
>> +     kho_populate_refcount();
>> +
>>        /*
>>         * Check boundaries twice: Some fundamental inconsistencies can be
>>         * detected at build time already.
>> diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
>> index bf502ba8da95..af95139351ed 100644
>> --- a/drivers/of/fdt.c
>> +++ b/drivers/of/fdt.c
>> @@ -1006,6 +1006,44 @@ void __init early_init_dt_check_for_usable_mem_range(void)
>>                memblock_add(rgn[i].base, rgn[i].size);
>>   }
>>
>> +/**
>> + * early_init_dt_check_kho - Decode info required for kexec handover from DT
>> + */
>> +void __init early_init_dt_check_kho(void)
>> +{
>> +#ifdef CONFIG_KEXEC_KHO
> if (!IS_ENABLED(CONFIG_KEXEC_KHO))
>    return;
>
> You'll need a kho_populate() stub.
Always happy to remove #ifdefs :)
>
>> +     unsigned long node = chosen_node_offset;
>> +     u64 kho_start, scratch_start, scratch_size, mem_start, mem_size;
>> +     const __be32 *p;
>> +     int l;
>> +
>> +     if ((long)node < 0)
>> +             return;
>> +
>> +     p = of_get_flat_dt_prop(node, "linux,kho-dt", &l);
>> +     if (l != (dt_root_addr_cells + dt_root_size_cells) * sizeof(__be32))
>> +             return;
>> +
>> +     kho_start = dt_mem_next_cell(dt_root_addr_cells, &p);
>> +
>> +     p = of_get_flat_dt_prop(node, "linux,kho-scratch", &l);
>> +     if (l != (dt_root_addr_cells + dt_root_size_cells) * sizeof(__be32))
>> +             return;
>> +
>> +     scratch_start = dt_mem_next_cell(dt_root_addr_cells, &p);
>> +     scratch_size = dt_mem_next_cell(dt_root_addr_cells, &p);
>> +
>> +     p = of_get_flat_dt_prop(node, "linux,kho-mem", &l);
>> +     if (l != (dt_root_addr_cells + dt_root_size_cells) * sizeof(__be32))
>> +             return;
>> +
>> +     mem_start = dt_mem_next_cell(dt_root_addr_cells, &p);
>> +     mem_size = dt_mem_next_cell(dt_root_addr_cells, &p);
>> +
>> +     kho_populate(kho_start, scratch_start, scratch_size, mem_start, mem_size);
>> +#endif
>> +}
>> +
>>   #ifdef CONFIG_SERIAL_EARLYCON
>>
>>   int __init early_init_dt_scan_chosen_stdout(void)
>> @@ -1304,6 +1342,9 @@ void __init early_init_dt_scan_nodes(void)
>>
>>        /* Handle linux,usable-memory-range property */
>>        early_init_dt_check_for_usable_mem_range();
>> +
>> +     /* Handle kexec handover */
>> +     early_init_dt_check_kho();
>>   }
>>
>>   bool __init early_init_dt_scan(void *params)
>> diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c
>> index 68278340cecf..a612e6bb8c75 100644
>> --- a/drivers/of/kexec.c
>> +++ b/drivers/of/kexec.c
>> @@ -264,6 +264,37 @@ static inline int setup_ima_buffer(const struct kimage *image, void *fdt,
>>   }
>>   #endif /* CONFIG_IMA_KEXEC */
>>
>> +static int kho_add_chosen(const struct kimage *image, void *fdt, int chosen_node)
>> +{
>> +     int ret = 0;
>> +
>> +#ifdef CONFIG_KEXEC_KHO
> ditto
>
> Though perhaps image->kho is not defined?
Correct, it is not. But I'm happy to have a few local variables that I 
stash the image->kho contents inside an ifdef into so we can at least 
compile check all libfdt invocations.
Alex
Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879
Powered by blists - more mailing lists
 
