[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <2fd31764-b341-638a-f70e-43dc0d5b0092@oracle.com>
Date: Tue, 14 Mar 2023 08:28:53 -0500
From: Eric DeVolder <eric.devolder@...cle.com>
To: Baoquan He <bhe@...hat.com>
Cc: linux-kernel@...r.kernel.org, x86@...nel.org,
kexec@...ts.infradead.org, ebiederm@...ssion.com,
dyoung@...hat.com, vgoyal@...hat.com, tglx@...utronix.de,
mingo@...hat.com, bp@...en8.de, dave.hansen@...ux.intel.com,
hpa@...or.com, nramas@...ux.microsoft.com, thomas.lendacky@....com,
robh@...nel.org, efault@....de, rppt@...nel.org, david@...hat.com,
sourabhjain@...ux.ibm.com, konrad.wilk@...cle.com,
boris.ostrovsky@...cle.com
Subject: Re: [PATCH v19 2/7] crash: add generic infrastructure for crash
hotplug support
On 3/14/23 05:43, Baoquan He wrote:
> On 03/06/23 at 11:22am, Eric DeVolder wrote:
> ......
>> +#ifdef CONFIG_CRASH_HOTPLUG
>> +#undef pr_fmt
>> +#define pr_fmt(fmt) "crash hp: " fmt
>> +/*
>> + * To accurately reflect hot un/plug changes of cpu and memory resources
>> + * (including onling and offlining of those resources), the elfcorehdr
>> + * (which is passed to the crash kernel via the elfcorehdr= parameter)
>> + * must be updated with the new list of CPUs and memories.
>> + *
>> + * In order to make changes to elfcorehdr, two conditions are needed:
>> + * First, the segment containing the elfcorehdr must be large enough
>> + * to permit a growing number of resources; the elfcorehdr memory size
>> + * is based on NR_CPUS_DEFAULT and CRASH_MAX_MEMORY_RANGES.
>> + * Second, purgatory must explicitly exclude the elfcorehdr from the
>> + * list of segments it checks (since the elfcorehdr changes and thus
>> + * would require an update to purgatory itself to update the digest).
>> + */
>> +static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
>> +{
>> + /* Obtain lock while changing crash information */
>> + if (kexec_trylock()) {
>> +
>> + /* Check kdump is loaded */
>> + if (kexec_crash_image) {
>
> If the above check failed, I would directly return or jump out becuase
> one indentation can be reduced.
Baoquan, ok, I'll change that in next version.
>
>> + struct kimage *image = kexec_crash_image;
>> +
>> + if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
>> + hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
>> + pr_debug("hp_action %u, cpu %u\n", hp_action, cpu);
>> + else
>> + pr_debug("hp_action %u\n", hp_action);
>> +
>> + /*
>> + * When the struct kimage is allocated, the elfcorehdr_index
>> + * is set to -1. Find the segment containing the elfcorehdr,
>> + * if not already found. This works for both the kexec_load
>> + * and kexec_file_load paths.
>> + */
>> + if (image->elfcorehdr_index < 0) {
>> + unsigned long mem;
>> + unsigned char *ptr;
>> + unsigned int n;
>> +
>> + for (n = 0; n < image->nr_segments; n++) {
>> + mem = image->segment[n].mem;
>> + ptr = kmap_local_page(pfn_to_page(mem >> PAGE_SHIFT));
>> + if (ptr) {
>> + /* The segment containing elfcorehdr */
>> + if (memcmp(ptr, ELFMAG, SELFMAG) == 0) {
>> + image->elfcorehdr_index = (int)n;
>> + }
>> + kunmap_local(ptr);
>> + }
>> + }
>> + }
>> +
>> + if (image->elfcorehdr_index < 0) {
>> + pr_err("unable to locate elfcorehdr segment");
>> + goto out;
>> + }
>> +
>> + /* Needed in order for the segments to be updated */
>> + arch_kexec_unprotect_crashkres();
>> +
>> + /* Differentiate between normal load and hotplug update */
>> + image->hp_action = hp_action;
>> +
>> + /* Now invoke arch-specific update handler */
>> + arch_crash_handle_hotplug_event(image);
>> +
>> + /* No longer handling a hotplug event */
>> + image->hp_action = KEXEC_CRASH_HP_NONE;
>> + image->elfcorehdr_updated = true;
>> +
>> + /* Change back to read-only */
>> + arch_kexec_protect_crashkres();
>> + }
>> +
>> +out:
>> + /* Release lock now that update complete */
>> + kexec_unlock();
>> + }
>> +}
>> +
>> +static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v)
>> +{
>> + switch (val) {
>> + case MEM_ONLINE:
>> + crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY,
>> + KEXEC_CRASH_HP_INVALID_CPU);
>> + break;
>> +
>> + case MEM_OFFLINE:
>> + crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY,
>> + KEXEC_CRASH_HP_INVALID_CPU);
>> + break;
>> + }
>> + return NOTIFY_OK;
>> +}
>> +
>> +static struct notifier_block crash_memhp_nb = {
>> + .notifier_call = crash_memhp_notifier,
>> + .priority = 0
>> +};
>> +
>
> Because for_each_possible_cpu() is taken in
> crash_prepare_elf64_headers(), x86 doesn't need to respond to cpu
> hotplug or doesn't do anything with this patchset. This cpu part in
> infrastructure is only for the later powerpc usage, right?
That is true, yes.
>
>> +static int crash_cpuhp_online(unsigned int cpu)
>> +{
>> + crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu);
>> + return 0;
>> +}
>> +
>> +static int crash_cpuhp_offline(unsigned int cpu)
>> +{
>> + crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu);
>> + return 0;
>> +}
>> +
>> +static int __init crash_hotplug_init(void)
>> +{
>> + int result = 0;
>> +
>> + if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
>> + register_memory_notifier(&crash_memhp_nb);
>> +
>> + if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
>> + result = cpuhp_setup_state_nocalls(CPUHP_BP_PREPARE_DYN,
>> + "crash/cpuhp", crash_cpuhp_online, crash_cpuhp_offline);
>> + }
>> +
>> + return result;
>> +}
>> +
>> +subsys_initcall(crash_hotplug_init);
>> +#endif
>> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
>> index 969e8f52f7da..f2f9d41ce5df 100644
>> --- a/kernel/kexec_core.c
>> +++ b/kernel/kexec_core.c
>> @@ -276,6 +276,11 @@ struct kimage *do_kimage_alloc_init(void)
>> /* Initialize the list of unusable pages */
>> INIT_LIST_HEAD(&image->unusable_pages);
>>
>> +#ifdef CONFIG_CRASH_HOTPLUG
>> + image->elfcorehdr_index = -1;
>> + image->elfcorehdr_updated = false;
>> +#endif
>> +
>> return image;
>> }
>>
>> --
>> 2.31.1
>>
>
Powered by blists - more mailing lists