lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5dc4745c-4608-a070-d8a8-6afb6f9b14a9@amd.com>
Date: Mon, 21 Jul 2025 09:40:57 -0500
From: Tom Lendacky <thomas.lendacky@....com>
To: Kai Huang <kai.huang@...el.com>, dave.hansen@...el.com, bp@...en8.de,
 tglx@...utronix.de, peterz@...radead.org, mingo@...hat.com, hpa@...or.com
Cc: x86@...nel.org, kas@...nel.org, rick.p.edgecombe@...el.com,
 dwmw@...zon.co.uk, linux-kernel@...r.kernel.org, pbonzini@...hat.com,
 seanjc@...gle.com, kvm@...r.kernel.org, reinette.chatre@...el.com,
 isaku.yamahata@...el.com, dan.j.williams@...el.com, ashish.kalra@....com,
 nik.borisov@...e.com, chao.gao@...el.com, sagis@...gle.com
Subject: Re: [PATCH v4 1/7] x86/kexec: Consolidate relocate_kernel() function
 parameters

On 7/17/25 16:46, Kai Huang wrote:
> During kexec, the kernel jumps to the new kernel in relocate_kernel(),
> which is implemented in assembly and both 32-bit and 64-bit have their
> own version.
> 
> Currently, for both 32-bit and 64-bit, the last two parameters of the
> relocate_kernel() are both 'unsigned int' but actually they only convey
> a boolean, i.e., one bit information.  The 'unsigned int' has enough
> space to carry two bits information therefore there's no need to pass
> the two booleans in two separate 'unsigned int'.
> 
> Consolidate the last two function parameters of relocate_kernel() into a
> single 'unsigned int' and pass flags instead.
> 
> Only consolidate the 64-bit version albeit the similar optimization can
> be done for the 32-bit version too.  Don't bother changing the 32-bit
> version while it is working (since assembly code change is required).
> 
> Signed-off-by: Kai Huang <kai.huang@...el.com>
> ---
>  arch/x86/include/asm/kexec.h         | 12 ++++++++++--
>  arch/x86/kernel/machine_kexec_64.c   | 22 +++++++++++++---------
>  arch/x86/kernel/relocate_kernel_64.S | 19 +++++++++----------
>  3 files changed, 32 insertions(+), 21 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
> index f2ad77929d6e..5f09791dc4e9 100644
> --- a/arch/x86/include/asm/kexec.h
> +++ b/arch/x86/include/asm/kexec.h
> @@ -13,6 +13,15 @@
>  # define KEXEC_DEBUG_EXC_HANDLER_SIZE	6 /* PUSHI, PUSHI, 2-byte JMP */
>  #endif
>  
> +#ifdef CONFIG_X86_64
> +
> +#include <linux/bits.h>
> +
> +#define RELOC_KERNEL_PRESERVE_CONTEXT	BIT(0)
> +#define RELOC_KERNEL_HOST_MEM_ACTIVE	BIT(1)

This isn't as descriptive with "ENC" removed from the name. It's almost
like you read this and think it should always be 1 because the kernel
always has host memory active.

> +
> +#endif
> +
>  # define KEXEC_CONTROL_PAGE_SIZE	4096
>  # define KEXEC_CONTROL_CODE_MAX_SIZE	2048
>  
> @@ -121,8 +130,7 @@ typedef unsigned long
>  relocate_kernel_fn(unsigned long indirection_page,
>  		   unsigned long pa_control_page,
>  		   unsigned long start_address,
> -		   unsigned int preserve_context,
> -		   unsigned int host_mem_enc_active);
> +		   unsigned int flags);
>  #endif
>  extern relocate_kernel_fn relocate_kernel;
>  #define ARCH_HAS_KIMAGE_ARCH
> diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
> index 697fb99406e6..25cff38f5e60 100644
> --- a/arch/x86/kernel/machine_kexec_64.c
> +++ b/arch/x86/kernel/machine_kexec_64.c
> @@ -384,16 +384,10 @@ void __nocfi machine_kexec(struct kimage *image)
>  {
>  	unsigned long reloc_start = (unsigned long)__relocate_kernel_start;
>  	relocate_kernel_fn *relocate_kernel_ptr;
> -	unsigned int host_mem_enc_active;
> +	unsigned int relocate_kernel_flags;
>  	int save_ftrace_enabled;
>  	void *control_page;
>  
> -	/*
> -	 * This must be done before load_segments() since if call depth tracking
> -	 * is used then GS must be valid to make any function calls.
> -	 */
> -	host_mem_enc_active = cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT);
> -
>  #ifdef CONFIG_KEXEC_JUMP
>  	if (image->preserve_context)
>  		save_processor_state();
> @@ -427,6 +421,17 @@ void __nocfi machine_kexec(struct kimage *image)
>  	 */
>  	relocate_kernel_ptr = control_page + (unsigned long)relocate_kernel - reloc_start;
>  
> +	relocate_kernel_flags = 0;
> +	if (image->preserve_context)
> +		relocate_kernel_flags |= RELOC_KERNEL_PRESERVE_CONTEXT;
> +
> +	/*
> +	 * This must be done before load_segments() since if call depth tracking
> +	 * is used then GS must be valid to make any function calls.
> +	 */
> +	if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
> +		relocate_kernel_flags |= RELOC_KERNEL_HOST_MEM_ACTIVE;
> +
>  	/*
>  	 * The segment registers are funny things, they have both a
>  	 * visible and an invisible part.  Whenever the visible part is
> @@ -443,8 +448,7 @@ void __nocfi machine_kexec(struct kimage *image)
>  	image->start = relocate_kernel_ptr((unsigned long)image->head,
>  					   virt_to_phys(control_page),
>  					   image->start,
> -					   image->preserve_context,
> -					   host_mem_enc_active);
> +					   relocate_kernel_flags);
>  
>  #ifdef CONFIG_KEXEC_JUMP
>  	if (image->preserve_context)
> diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
> index ea604f4d0b52..1dfa323b33d5 100644
> --- a/arch/x86/kernel/relocate_kernel_64.S
> +++ b/arch/x86/kernel/relocate_kernel_64.S
> @@ -66,8 +66,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
>  	 * %rdi indirection_page
>  	 * %rsi pa_control_page
>  	 * %rdx start address
> -	 * %rcx preserve_context
> -	 * %r8  host_mem_enc_active
> +	 * %rcx flags: RELOC_KERNEL_*
>  	 */
>  
>  	/* Save the CPU context, used for jumping back */
> @@ -111,7 +110,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
>  	/* save indirection list for jumping back */
>  	movq	%rdi, pa_backup_pages_map(%rip)
>  
> -	/* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */
> +	/* Save the flags to %r11 as swap_pages clobbers %rcx. */
>  	movq	%rcx, %r11
>  
>  	/* setup a new stack at the end of the physical control page */
> @@ -129,9 +128,8 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
>  	/*
>  	 * %rdi	indirection page
>  	 * %rdx start address
> -	 * %r8 host_mem_enc_active
>  	 * %r9 page table page
> -	 * %r11 preserve_context
> +	 * %r11 flags: RELOC_KERNEL_*
>  	 * %r13 original CR4 when relocate_kernel() was invoked
>  	 */
>  
> @@ -204,7 +202,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
>  	 * entries that will conflict with the now unencrypted memory
>  	 * used by kexec. Flush the caches before copying the kernel.
>  	 */
> -	testq	%r8, %r8
> +	testq	$RELOC_KERNEL_HOST_MEM_ACTIVE, %r11

Hmmm... can't both bits be set at the same time? If so, then this will
fail. This should be doing bit tests now.

>  	jz .Lsme_off
>  	wbinvd
>  .Lsme_off:
> @@ -220,7 +218,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
>  	movq	%cr3, %rax
>  	movq	%rax, %cr3
>  
> -	testq	%r11, %r11	/* preserve_context */
> +	testq	$RELOC_KERNEL_PRESERVE_CONTEXT, %r11
>  	jnz .Lrelocate
>  
>  	/*
> @@ -273,7 +271,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
>  	ANNOTATE_NOENDBR
>  	andq	$PAGE_MASK, %r8
>  	lea	PAGE_SIZE(%r8), %rsp
> -	movl	$1, %r11d	/* Ensure preserve_context flag is set */
> +	movl	$RELOC_KERNEL_PRESERVE_CONTEXT, %r11d	/* Ensure preserve_context flag is set */

And this will clear any value that was in r11 vs setting a single bit.
Not sure it currently has any effect because r8 (where the memory
encryption setting was held) is modified just before this. But if any
bits are added in the future that are needed past here, this will be a
problem.

>  	call	swap_pages
>  	movq	kexec_va_control_page(%rip), %rax
>  0:	addq	$virtual_mapped - 0b, %rax
> @@ -321,7 +319,7 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
>  	UNWIND_HINT_END_OF_STACK
>  	/*
>  	 * %rdi indirection page
> -	 * %r11 preserve_context
> +	 * %r11 flags: RELOC_KERNEL_*
>  	 */
>  	movq	%rdi, %rcx	/* Put the indirection_page in %rcx */
>  	xorl	%edi, %edi
> @@ -357,7 +355,8 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
>  	movq	%rdi, %rdx    /* Save destination page to %rdx */
>  	movq	%rsi, %rax    /* Save source page to %rax */
>  
> -	testq	%r11, %r11    /* Only actually swap for ::preserve_context */
> +	/* Only actually swap for ::preserve_context */
> +	testq	$RELOC_KERNEL_PRESERVE_CONTEXT, %r11

Ditto here on the bit testing.

Thanks,
Tom

>  	jz	.Lnoswap
>  
>  	/* copy source page to swap page */

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ