lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID:
 <SN6PR02MB4157ACEFEFE3E856CB93BC1AD4822@SN6PR02MB4157.namprd02.prod.outlook.com>
Date: Thu, 1 May 2025 02:36:00 +0000
From: Michael Kelley <mhklinux@...look.com>
To: Peter Zijlstra <peterz@...radead.org>, "x86@...nel.org" <x86@...nel.org>
CC: "kys@...rosoft.com" <kys@...rosoft.com>, "haiyangz@...rosoft.com"
	<haiyangz@...rosoft.com>, "wei.liu@...nel.org" <wei.liu@...nel.org>,
	"decui@...rosoft.com" <decui@...rosoft.com>, "tglx@...utronix.de"
	<tglx@...utronix.de>, "mingo@...hat.com" <mingo@...hat.com>, "bp@...en8.de"
	<bp@...en8.de>, "dave.hansen@...ux.intel.com" <dave.hansen@...ux.intel.com>,
	"hpa@...or.com" <hpa@...or.com>, "seanjc@...gle.com" <seanjc@...gle.com>,
	"pbonzini@...hat.com" <pbonzini@...hat.com>, "ardb@...nel.org"
	<ardb@...nel.org>, "kees@...nel.org" <kees@...nel.org>, Arnd Bergmann
	<arnd@...db.de>, "gregkh@...uxfoundation.org" <gregkh@...uxfoundation.org>,
	"jpoimboe@...nel.org" <jpoimboe@...nel.org>, "linux-hyperv@...r.kernel.org"
	<linux-hyperv@...r.kernel.org>, "linux-kernel@...r.kernel.org"
	<linux-kernel@...r.kernel.org>, "kvm@...r.kernel.org" <kvm@...r.kernel.org>,
	"linux-efi@...r.kernel.org" <linux-efi@...r.kernel.org>,
	"samitolvanen@...gle.com" <samitolvanen@...gle.com>, "ojeda@...nel.org"
	<ojeda@...nel.org>
Subject: RE: [PATCH v2 11/13] x86,hyperv: Clean up hv_do_hypercall()

From: Peter Zijlstra <peterz@...radead.org> Sent: Wednesday, April 30, 2025 4:08 AM
> 
> What used to be a simple few instructions has turned into a giant mess
> (for x86_64). Not only does it use static_branch wrong, it mixes it
> with dynamic branches for no apparent reason.
> 
> Notably it uses static_branch through an out-of-line function call,
> which completely defeats the purpose, since instead of a simple
> JMP/NOP site, you get a CALL+RET+TEST+Jcc sequence in return, which is
> absolutely idiotic.
> 
> Add to that a dynamic test of hyperv_paravisor_present, something
> which is set once and never changed.
> 
> Replace all this idiocy with a single direct function call to the
> right hypercall variant.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>

I've done these tests on Hyper-V VMs with this patch series. My focus
is the Hyper-V changes in Patches 11 and 12, not the other changes.

* Normal VM boot and basic smoke test
* TDX and SEV-SNP VMs boot and basic smoke test. These VMs have
   a paravisor
* Normal VM taking a panic and running the kdump kernel
* Normal VM suspending for hibernation, then resuming from
   hibernation
* Verified that IBT is enabled in a normal VM. It's not offered in a TDX
   VM on Hyper-V when a paravisor is used. I don't know about the case
   without a paravisor.
* Building a 64-bit kernel with and without CONFIG_AMD_MEM_ENCRYPT
   and CONFIG_INTEL_TDX_GUEST.
* Building a 32-bit kernel (but I did not try to run it)

TDX and SEV-SNP VMs without a paravisor are not tested, so updating
the static call, and the new direct call path, has not been tested for
TDX and SNP hypercalls. I don't have a hardware environment where I
can test without a paravisor.

Tested-by: Michael Kelley <mhklinux@...look.com>
Reviewed-by: Michael Kelley <mhklinux@...look.com>

> ---
>  arch/x86/hyperv/hv_init.c       |   20 +++++
>  arch/x86/hyperv/ivm.c           |   15 ++++
>  arch/x86/include/asm/mshyperv.h |  137 +++++++++++-----------------------------
>  arch/x86/kernel/cpu/mshyperv.c  |   19 +++--
>  4 files changed, 89 insertions(+), 102 deletions(-)
> 
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -35,7 +35,27 @@
>  #include <linux/highmem.h>
> 
>  void *hv_hypercall_pg;
> +
> +#ifdef CONFIG_X86_64
> +u64 hv_std_hypercall(u64 control, u64 param1, u64 param2)
> +{
> +	u64 hv_status;
> +
> +	if (!hv_hypercall_pg)
> +		return U64_MAX;
> +
> +	register u64 __r8 asm("r8") = param2;
> +	asm volatile (CALL_NOSPEC
> +		      : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> +		        "+c" (control), "+d" (param1), "+r" (__r8)
> +		      : THUNK_TARGET(hv_hypercall_pg)
> +		      : "cc", "memory", "r9", "r10", "r11");
> +
> +	return hv_status;
> +}
> +#else
>  EXPORT_SYMBOL_GPL(hv_hypercall_pg);
> +#endif
> 
>  union hv_ghcb * __percpu *hv_ghcb_pg;
> 
> --- a/arch/x86/hyperv/ivm.c
> +++ b/arch/x86/hyperv/ivm.c
> @@ -376,9 +376,23 @@ int hv_snp_boot_ap(u32 cpu, unsigned lon
>  	return ret;
>  }
> 
> +u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2)
> +{
> +	u64 hv_status;
> +
> +	register u64 __r8 asm("r8") = param2;
> +	asm volatile("vmmcall"
> +		     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> +		       "+c" (control), "+d" (param1), "+r" (__r8)
> +		     : : "cc", "memory", "r9", "r10", "r11");
> +
> +	return hv_status;
> +}
> +
>  #else
>  static inline void hv_ghcb_msr_write(u64 msr, u64 value) {}
>  static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {}
> +u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2) { return U64_MAX; }
>  #endif /* CONFIG_AMD_MEM_ENCRYPT */
> 
>  #ifdef CONFIG_INTEL_TDX_GUEST
> @@ -428,6 +442,7 @@ u64 hv_tdx_hypercall(u64 control, u64 pa
>  #else
>  static inline void hv_tdx_msr_write(u64 msr, u64 value) {}
>  static inline void hv_tdx_msr_read(u64 msr, u64 *value) {}
> +u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2) { return U64_MAX; }
>  #endif /* CONFIG_INTEL_TDX_GUEST */
> 
>  #if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST)
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -6,6 +6,7 @@
>  #include <linux/nmi.h>
>  #include <linux/msi.h>
>  #include <linux/io.h>
> +#include <linux/static_call.h>
>  #include <asm/nospec-branch.h>
>  #include <asm/paravirt.h>
>  #include <hyperv/hvhdk.h>
> @@ -38,16 +39,21 @@ static inline unsigned char hv_get_nmi_r
>  	return 0;
>  }
> 
> -#if IS_ENABLED(CONFIG_HYPERV)
> -extern bool hyperv_paravisor_present;
> +extern u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
> +extern u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2);
> +extern u64 hv_std_hypercall(u64 control, u64 param1, u64 param2);
> 
> +#if IS_ENABLED(CONFIG_HYPERV)
>  extern void *hv_hypercall_pg;
> 
>  extern union hv_ghcb * __percpu *hv_ghcb_pg;
> 
>  bool hv_isolation_type_snp(void);
>  bool hv_isolation_type_tdx(void);
> -u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
> +
> +#ifdef CONFIG_X86_64
> +DECLARE_STATIC_CALL(hv_hypercall, hv_std_hypercall);
> +#endif
> 
>  /*
>   * DEFAULT INIT GPAT and SEGMENT LIMIT value in struct VMSA
> @@ -64,37 +70,15 @@ static inline u64 hv_do_hypercall(u64 co
>  {
>  	u64 input_address = input ? virt_to_phys(input) : 0;
>  	u64 output_address = output ? virt_to_phys(output) : 0;
> -	u64 hv_status;
> 
>  #ifdef CONFIG_X86_64
> -	if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
> -		return hv_tdx_hypercall(control, input_address, output_address);
> -
> -	if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
> -		__asm__ __volatile__("mov %[output_address], %%r8\n"
> -				     "vmmcall"
> -				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> -				       "+c" (control), "+d" (input_address)
> -				     : [output_address] "r" (output_address)
> -				     : "cc", "memory", "r8", "r9", "r10", "r11");
> -		return hv_status;
> -	}
> -
> -	if (!hv_hypercall_pg)
> -		return U64_MAX;
> -
> -	__asm__ __volatile__("mov %[output_address], %%r8\n"
> -			     CALL_NOSPEC
> -			     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> -			       "+c" (control), "+d" (input_address)
> -			     : [output_address] "r" (output_address),
> -			       THUNK_TARGET(hv_hypercall_pg)
> -			     : "cc", "memory", "r8", "r9", "r10", "r11");
> +	return static_call_mod(hv_hypercall)(control, input_address, output_address);
>  #else
>  	u32 input_address_hi = upper_32_bits(input_address);
>  	u32 input_address_lo = lower_32_bits(input_address);
>  	u32 output_address_hi = upper_32_bits(output_address);
>  	u32 output_address_lo = lower_32_bits(output_address);
> +	u64 hv_status;
> 
>  	if (!hv_hypercall_pg)
>  		return U64_MAX;
> @@ -107,8 +91,8 @@ static inline u64 hv_do_hypercall(u64 co
>  			       "D"(output_address_hi), "S"(output_address_lo),
>  			       THUNK_TARGET(hv_hypercall_pg)
>  			     : "cc", "memory");
> -#endif /* !x86_64 */
>  	return hv_status;
> +#endif /* !x86_64 */
>  }
> 
>  /* Hypercall to the L0 hypervisor */
> @@ -120,41 +104,23 @@ static inline u64 hv_do_nested_hypercall
>  /* Fast hypercall with 8 bytes of input and no output */
>  static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1)
>  {
> -	u64 hv_status;
> -
>  #ifdef CONFIG_X86_64
> -	if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
> -		return hv_tdx_hypercall(control, input1, 0);
> -
> -	if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
> -		__asm__ __volatile__(
> -				"vmmcall"
> -				: "=a" (hv_status), ASM_CALL_CONSTRAINT,
> -				"+c" (control), "+d" (input1)
> -				:: "cc", "r8", "r9", "r10", "r11");
> -	} else {
> -		__asm__ __volatile__(CALL_NOSPEC
> -				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> -				       "+c" (control), "+d" (input1)
> -				     : THUNK_TARGET(hv_hypercall_pg)
> -				     : "cc", "r8", "r9", "r10", "r11");
> -	}
> +	return static_call_mod(hv_hypercall)(control, input1, 0);
>  #else
> -	{
> -		u32 input1_hi = upper_32_bits(input1);
> -		u32 input1_lo = lower_32_bits(input1);
> -
> -		__asm__ __volatile__ (CALL_NOSPEC
> -				      : "=A"(hv_status),
> -					"+c"(input1_lo),
> -					ASM_CALL_CONSTRAINT
> -				      :	"A" (control),
> -					"b" (input1_hi),
> -					THUNK_TARGET(hv_hypercall_pg)
> -				      : "cc", "edi", "esi");
> -	}
> -#endif
> +	u32 input1_hi = upper_32_bits(input1);
> +	u32 input1_lo = lower_32_bits(input1);
> +	u64 hv_status;
> +
> +	__asm__ __volatile__ (CALL_NOSPEC
> +			      : "=A"(hv_status),
> +			      "+c"(input1_lo),
> +			      ASM_CALL_CONSTRAINT
> +			      :	"A" (control),
> +			      "b" (input1_hi),
> +			      THUNK_TARGET(hv_hypercall_pg)
> +			      : "cc", "edi", "esi");
>  	return hv_status;
> +#endif
>  }
> 
>  static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
> @@ -174,45 +140,24 @@ static inline u64 hv_do_fast_nested_hype
>  /* Fast hypercall with 16 bytes of input */
>  static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2)
>  {
> -	u64 hv_status;
> -
>  #ifdef CONFIG_X86_64
> -	if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
> -		return hv_tdx_hypercall(control, input1, input2);
> -
> -	if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
> -		__asm__ __volatile__("mov %[input2], %%r8\n"
> -				     "vmmcall"
> -				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> -				       "+c" (control), "+d" (input1)
> -				     : [input2] "r" (input2)
> -				     : "cc", "r8", "r9", "r10", "r11");
> -	} else {
> -		__asm__ __volatile__("mov %[input2], %%r8\n"
> -				     CALL_NOSPEC
> -				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> -				       "+c" (control), "+d" (input1)
> -				     : [input2] "r" (input2),
> -				       THUNK_TARGET(hv_hypercall_pg)
> -				     : "cc", "r8", "r9", "r10", "r11");
> -	}
> +	return static_call_mod(hv_hypercall)(control, input1, input2);
>  #else
> -	{
> -		u32 input1_hi = upper_32_bits(input1);
> -		u32 input1_lo = lower_32_bits(input1);
> -		u32 input2_hi = upper_32_bits(input2);
> -		u32 input2_lo = lower_32_bits(input2);
> -
> -		__asm__ __volatile__ (CALL_NOSPEC
> -				      : "=A"(hv_status),
> -					"+c"(input1_lo), ASM_CALL_CONSTRAINT
> -				      :	"A" (control), "b" (input1_hi),
> -					"D"(input2_hi), "S"(input2_lo),
> -					THUNK_TARGET(hv_hypercall_pg)
> -				      : "cc");
> -	}
> -#endif
> +	u32 input1_hi = upper_32_bits(input1);
> +	u32 input1_lo = lower_32_bits(input1);
> +	u32 input2_hi = upper_32_bits(input2);
> +	u32 input2_lo = lower_32_bits(input2);
> +	u64 hv_status;
> +
> +	__asm__ __volatile__ (CALL_NOSPEC
> +			      : "=A"(hv_status),
> +			      "+c"(input1_lo), ASM_CALL_CONSTRAINT
> +			      :	"A" (control), "b" (input1_hi),
> +			      "D"(input2_hi), "S"(input2_lo),
> +			      THUNK_TARGET(hv_hypercall_pg)
> +			      : "cc");
>  	return hv_status;
> +#endif
>  }
> 
>  static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -37,10 +37,6 @@
>  bool hv_nested;
>  struct ms_hyperv_info ms_hyperv;
> 
> -/* Used in modules via hv_do_hypercall(): see arch/x86/include/asm/mshyperv.h */
> -bool hyperv_paravisor_present __ro_after_init;
> -EXPORT_SYMBOL_GPL(hyperv_paravisor_present);
> -
>  #if IS_ENABLED(CONFIG_HYPERV)
>  static inline unsigned int hv_get_nested_msr(unsigned int reg)
>  {
> @@ -287,8 +283,18 @@ static void __init x86_setup_ops_for_tsc
>  	old_restore_sched_clock_state = x86_platform.restore_sched_clock_state;
>  	x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state;
>  }
> +
> +#ifdef CONFIG_X86_64
> +DEFINE_STATIC_CALL(hv_hypercall, hv_std_hypercall);
> +EXPORT_STATIC_CALL_TRAMP_GPL(hv_hypercall);
> +#define hypercall_update(hc) static_call_update(hv_hypercall, hc)
> +#endif
>  #endif /* CONFIG_HYPERV */
> 
> +#ifndef hypercall_update
> +#define hypercall_update(hc) (void)hc
> +#endif
> +
>  static uint32_t  __init ms_hyperv_platform(void)
>  {
>  	u32 eax;
> @@ -483,14 +489,14 @@ static void __init ms_hyperv_init_platfo
>  			ms_hyperv.shared_gpa_boundary =
>  				BIT_ULL(ms_hyperv.shared_gpa_boundary_bits);
> 
> -		hyperv_paravisor_present = !!ms_hyperv.paravisor_present;
> -
>  		pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n",
>  			ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b);
> 
> 
>  		if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) {
>  			static_branch_enable(&isolation_type_snp);
> +			if (!ms_hyperv.paravisor_present)
> +				hypercall_update(hv_snp_hypercall);
>  		} else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) {
>  			static_branch_enable(&isolation_type_tdx);
> 
> @@ -498,6 +504,7 @@ static void __init ms_hyperv_init_platfo
>  			ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED;
> 
>  			if (!ms_hyperv.paravisor_present) {
> +				hypercall_update(hv_tdx_hypercall);
>  				/*
>  				 * Mark the Hyper-V TSC page feature as disabled
>  				 * in a TDX VM without paravisor so that the
> 
> 


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ