linux-kernel - Re: [PATCH v14 7/7] x86/crash: add x86 crash hotplug support

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <c15e968f-c23f-b41f-1fa3-10ad16f6fb35@oracle.com>
Date:   Thu, 8 Dec 2022 14:34:47 -0600
From:   Eric DeVolder <eric.devolder@...cle.com>
To:     Borislav Petkov <bp@...en8.de>
Cc:     linux-kernel@...r.kernel.org, x86@...nel.org,
        kexec@...ts.infradead.org, ebiederm@...ssion.com,
        dyoung@...hat.com, bhe@...hat.com, vgoyal@...hat.com,
        tglx@...utronix.de, mingo@...hat.com, dave.hansen@...ux.intel.com,
        hpa@...or.com, nramas@...ux.microsoft.com, thomas.lendacky@....com,
        robh@...nel.org, efault@....de, rppt@...nel.org, david@...hat.com,
        sourabhjain@...ux.ibm.com, konrad.wilk@...cle.com,
        boris.ostrovsky@...cle.com
Subject: Re: [PATCH v14 7/7] x86/crash: add x86 crash hotplug support



On 12/7/22 04:19, Borislav Petkov wrote:
> On Wed, Nov 16, 2022 at 04:46:43PM -0500, Eric DeVolder wrote:
>> When CPU or memory is hot un/plugged, the crash elfcorehdr, which
>> describes the CPUs and memory in the system, must also be updated.
>>
>> A new elfcorehdr is generated from the available CPUs and memory
>> into a buffer, and then installed over the top of the existing
>> elfcorehdr. The segment containing the elfcorehdr is identified
>> at run time in crash_core:handle_hotplug_event(), which works for
>> both the kexec_load() and kexec_file_load() syscalls.
>>
>> In the patch 'kexec: exclude elfcorehdr from the segment digest'
>> the need to update purgatory due to the change in elfcorehdr was
>> eliminated.  As a result, no changes to purgatory or boot_params
>> (as the elfcorehdr= kernel command line parameter pointer
>> remains unchanged and correct) are needed, just elfcorehdr.
>>
>> To accommodate a growing number of resources via hotplug, the
>> elfcorehdr segment must be sufficiently large enough to accommodate
>> changes, see the CRASH_MAX_MEMORY_RANGES description.
>>
>> Signed-off-by: Eric DeVolder <eric.devolder@...cle.com>
>> ---
>>   arch/x86/Kconfig             |   9 +++
>>   arch/x86/include/asm/kexec.h |  15 +++++
>>   arch/x86/kernel/crash.c      | 106 ++++++++++++++++++++++++++++++++++-
>>   3 files changed, 127 insertions(+), 3 deletions(-)
> 
> Some quick cleanups ontop, there's potential for more:
> 
> ---
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index ffee99046942..486509030d3a 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -2091,13 +2091,16 @@ config CRASH_DUMP
>   	  For more details see Documentation/admin-guide/kdump/kdump.rst
>   
>   config CRASH_HOTPLUG
> -	bool "kernel updates of crash elfcorehdr"
> +	bool "Update the crash elfcorehdr on system configuration changes"
>   	default n
>   	depends on CRASH_DUMP && (HOTPLUG_CPU || MEMORY_HOTPLUG)
>   	help
> -	  Enable the kernel to directly update the crash elfcorehdr (which
> -	  contains the list of CPUs and memory regions to be dumped upon
> -	  a crash) in response to hot plug/unplug of CPUs or memory.
> +	  Enable direct updates to the crash elfcorehdr (which contains
> +	  the list of CPUs and memory regions to be dumped upon a crash)
> +	  in response to hot plug/unplug of CPUs or memory. This is a much
> +	  more advanced approach than userspace attempting that.
> +
> +	  If unsure, say Y.
Done!

>   
>   config KEXEC_JUMP
>   	bool "kexec jump"
> diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
> index d2238bcf8106..d26f208e582e 100644
> --- a/arch/x86/kernel/crash.c
> +++ b/arch/x86/kernel/crash.c
> @@ -413,25 +413,32 @@ int crash_load_segments(struct kimage *image)
>   	image->elf_headers_sz	= kbuf.bufsz;
>   	kbuf.memsz		= kbuf.bufsz;
>   
> -#ifdef CONFIG_CRASH_HOTPLUG
> -	/* Ensure elfcorehdr segment large enough for hotplug changes */
> -	unsigned long pnum = 2; /* VMCOREINFO and kernel_map */
> -
> -	if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
> -		pnum += CONFIG_NR_CPUS_DEFAULT;
> -	if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
> -		pnum += CRASH_MAX_MEMORY_RANGES;
> -	if (pnum < (unsigned long)PN_XNUM) {
> -		kbuf.memsz = pnum * sizeof(Elf64_Phdr);
> -		kbuf.memsz += sizeof(Elf64_Ehdr);
> -		image->elfcorehdr_index = image->nr_segments;
> -		image->elfcorehdr_index_valid = true;
> -		/* Mark as usable to crash kernel, else crash kernel fails on boot */
> -		image->elf_headers_sz = kbuf.memsz;
> -	} else {
> -		pr_err("number of Phdrs %lu exceeds max\n", pnum);
> +	if (IS_ENABLED(CONFIG_CRASH_HOTPLUG)) {
> +		/*
> +		 * Ensure the elfcorehdr segment large enough for hotplug changes.
> +		 * Start with VMCOREINFO and kernel_map:
> +		 */
> +		unsigned long pnum = 2;
> +
> +		if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
> +			pnum += CONFIG_NR_CPUS_DEFAULT;
> +
> +		if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
> +			pnum += CRASH_MAX_MEMORY_RANGES;
> +
> +		if (pnum < (unsigned long)PN_XNUM) {
> +			kbuf.memsz = pnum * sizeof(Elf64_Phdr);
> +			kbuf.memsz += sizeof(Elf64_Ehdr);
> +
> +			image->elfcorehdr_index = image->nr_segments;
> +			image->elfcorehdr_index_valid = true;
> +
> +			/* Mark as usable to crash kernel, else crash kernel fails on boot */
> +			image->elf_headers_sz = kbuf.memsz;
> +		} else {
> +			pr_err("number of Phdrs %lu exceeds max\n", pnum);
> +			}
Done, converted this block to an IS_ENABLED(CONFIG_CRASH_HOTPLUG).

>   	}
> -#endif
>   
>   	kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
>   	kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
> @@ -463,10 +470,9 @@ int crash_load_segments(struct kimage *image)
>    */
>   void arch_crash_handle_hotplug_event(struct kimage *image)
>   {
> +	void *elfbuf, *old_elfcorehdr;
>   	unsigned long mem, memsz;
> -	unsigned long elfsz = 0;
> -	void *elfbuf = NULL;
> -	void *ptr;
> +	unsigned long elfsz;
Done, note that I leave elfbuf initialized to NULL as its use in prepare_elf_headers() does not 
necessarily write it, so it could otherwise have been used uninitialized.

>   
>   	/*
>   	 * Create the new elfcorehdr reflecting the changes to CPU and/or
> @@ -489,26 +495,24 @@ void arch_crash_handle_hotplug_event(struct kimage *image)
>   		goto out;
>   	}
>   
> -	/*
> -	 * Copy new elfcorehdr over the old elfcorehdr at destination.
> -	 */
> -	ptr = arch_map_crash_pages(mem, memsz);
> -	if (ptr) {
> -		/*
> -		 * Temporarily invalidate the crash image while the
> -		 * elfcorehdr is updated.
> -		 */
> -		xchg(&kexec_crash_image, NULL);
> -		memcpy_flushcache(ptr, elfbuf, elfsz);
> -		xchg(&kexec_crash_image, image);
> -		arch_unmap_crash_pages(ptr);
> -		pr_debug("updated elfcorehdr\n");
> -	} else {
> +	/* Copy new elfcorehdr over the old elfcorehdr at destination. */
> +	old_elfcorehdr = arch_map_crash_pages(mem, memsz);
> +	if (!old_elfcorehdr) {
>   		pr_err("updating elfcorehdr failed\n");
> +		goto out;
>   	}
Done, changed to old_elfcorehdr and error/early-out.

>   
> +	/*
> +	 * Temporarily invalidate the crash image while the elfcorehdr
> +	 * is updated.
> +	 * */
> +	xchg(&kexec_crash_image, NULL);
> +	memcpy_flushcache(old_elfcorehdr, elfbuf, elfsz);
> +	xchg(&kexec_crash_image, image);
> +	arch_unmap_crash_pages(old_elfcorehdr);
> +	pr_debug("updated elfcorehdr\n");
> +
>   out:
> -	if (elfbuf)
> -		vfree(elfbuf);
> +	vfree(elfbuf);
>   }
>   #endif
> 
Done.

Thanks!
eric