[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID:
<SN6PR02MB4157ACEFEFE3E856CB93BC1AD4822@SN6PR02MB4157.namprd02.prod.outlook.com>
Date: Thu, 1 May 2025 02:36:00 +0000
From: Michael Kelley <mhklinux@...look.com>
To: Peter Zijlstra <peterz@...radead.org>, "x86@...nel.org" <x86@...nel.org>
CC: "kys@...rosoft.com" <kys@...rosoft.com>, "haiyangz@...rosoft.com"
<haiyangz@...rosoft.com>, "wei.liu@...nel.org" <wei.liu@...nel.org>,
"decui@...rosoft.com" <decui@...rosoft.com>, "tglx@...utronix.de"
<tglx@...utronix.de>, "mingo@...hat.com" <mingo@...hat.com>, "bp@...en8.de"
<bp@...en8.de>, "dave.hansen@...ux.intel.com" <dave.hansen@...ux.intel.com>,
"hpa@...or.com" <hpa@...or.com>, "seanjc@...gle.com" <seanjc@...gle.com>,
"pbonzini@...hat.com" <pbonzini@...hat.com>, "ardb@...nel.org"
<ardb@...nel.org>, "kees@...nel.org" <kees@...nel.org>, Arnd Bergmann
<arnd@...db.de>, "gregkh@...uxfoundation.org" <gregkh@...uxfoundation.org>,
"jpoimboe@...nel.org" <jpoimboe@...nel.org>, "linux-hyperv@...r.kernel.org"
<linux-hyperv@...r.kernel.org>, "linux-kernel@...r.kernel.org"
<linux-kernel@...r.kernel.org>, "kvm@...r.kernel.org" <kvm@...r.kernel.org>,
"linux-efi@...r.kernel.org" <linux-efi@...r.kernel.org>,
"samitolvanen@...gle.com" <samitolvanen@...gle.com>, "ojeda@...nel.org"
<ojeda@...nel.org>
Subject: RE: [PATCH v2 11/13] x86,hyperv: Clean up hv_do_hypercall()
From: Peter Zijlstra <peterz@...radead.org> Sent: Wednesday, April 30, 2025 4:08 AM
>
> What used to be a simple few instructions has turned into a giant mess
> (for x86_64). Not only does it use static_branch wrong, it mixes it
> with dynamic branches for no apparent reason.
>
> Notably it uses static_branch through an out-of-line function call,
> which completely defeats the purpose, since instead of a simple
> JMP/NOP site, you get a CALL+RET+TEST+Jcc sequence in return, which is
> absolutely idiotic.
>
> Add to that a dynamic test of hyperv_paravisor_present, something
> which is set once and never changed.
>
> Replace all this idiocy with a single direct function call to the
> right hypercall variant.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
I've done these tests on Hyper-V VMs with this patch series. My focus
is the Hyper-V changes in Patches 11 and 12, not the other changes.
* Normal VM boot and basic smoke test
* TDX and SEV-SNP VMs boot and basic smoke test. These VMs have
a paravisor
* Normal VM taking a panic and running the kdump kernel
* Normal VM suspending for hibernation, then resuming from
hibernation
* Verified that IBT is enabled in a normal VM. It's not offered in a TDX
VM on Hyper-V when a paravisor is used. I don't know about the case
without a paravisor.
* Building a 64-bit kernel with and without CONFIG_AMD_MEM_ENCRYPT
and CONFIG_INTEL_TDX_GUEST.
* Building a 32-bit kernel (but I did not try to run it)
TDX and SEV-SNP VMs without a paravisor are not tested, so updating
the static call, and the new direct call path, has not been tested for
TDX and SNP hypercalls. I don't have a hardware environment where I
can test without a paravisor.
Tested-by: Michael Kelley <mhklinux@...look.com>
Reviewed-by: Michael Kelley <mhklinux@...look.com>
> ---
> arch/x86/hyperv/hv_init.c | 20 +++++
> arch/x86/hyperv/ivm.c | 15 ++++
> arch/x86/include/asm/mshyperv.h | 137 +++++++++++-----------------------------
> arch/x86/kernel/cpu/mshyperv.c | 19 +++--
> 4 files changed, 89 insertions(+), 102 deletions(-)
>
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -35,7 +35,27 @@
> #include <linux/highmem.h>
>
> void *hv_hypercall_pg;
> +
> +#ifdef CONFIG_X86_64
> +u64 hv_std_hypercall(u64 control, u64 param1, u64 param2)
> +{
> + u64 hv_status;
> +
> + if (!hv_hypercall_pg)
> + return U64_MAX;
> +
> + register u64 __r8 asm("r8") = param2;
> + asm volatile (CALL_NOSPEC
> + : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> + "+c" (control), "+d" (param1), "+r" (__r8)
> + : THUNK_TARGET(hv_hypercall_pg)
> + : "cc", "memory", "r9", "r10", "r11");
> +
> + return hv_status;
> +}
> +#else
> EXPORT_SYMBOL_GPL(hv_hypercall_pg);
> +#endif
>
> union hv_ghcb * __percpu *hv_ghcb_pg;
>
> --- a/arch/x86/hyperv/ivm.c
> +++ b/arch/x86/hyperv/ivm.c
> @@ -376,9 +376,23 @@ int hv_snp_boot_ap(u32 cpu, unsigned lon
> return ret;
> }
>
> +u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2)
> +{
> + u64 hv_status;
> +
> + register u64 __r8 asm("r8") = param2;
> + asm volatile("vmmcall"
> + : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> + "+c" (control), "+d" (param1), "+r" (__r8)
> + : : "cc", "memory", "r9", "r10", "r11");
> +
> + return hv_status;
> +}
> +
> #else
> static inline void hv_ghcb_msr_write(u64 msr, u64 value) {}
> static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {}
> +u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2) { return U64_MAX; }
> #endif /* CONFIG_AMD_MEM_ENCRYPT */
>
> #ifdef CONFIG_INTEL_TDX_GUEST
> @@ -428,6 +442,7 @@ u64 hv_tdx_hypercall(u64 control, u64 pa
> #else
> static inline void hv_tdx_msr_write(u64 msr, u64 value) {}
> static inline void hv_tdx_msr_read(u64 msr, u64 *value) {}
> +u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2) { return U64_MAX; }
> #endif /* CONFIG_INTEL_TDX_GUEST */
>
> #if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST)
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -6,6 +6,7 @@
> #include <linux/nmi.h>
> #include <linux/msi.h>
> #include <linux/io.h>
> +#include <linux/static_call.h>
> #include <asm/nospec-branch.h>
> #include <asm/paravirt.h>
> #include <hyperv/hvhdk.h>
> @@ -38,16 +39,21 @@ static inline unsigned char hv_get_nmi_r
> return 0;
> }
>
> -#if IS_ENABLED(CONFIG_HYPERV)
> -extern bool hyperv_paravisor_present;
> +extern u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
> +extern u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2);
> +extern u64 hv_std_hypercall(u64 control, u64 param1, u64 param2);
>
> +#if IS_ENABLED(CONFIG_HYPERV)
> extern void *hv_hypercall_pg;
>
> extern union hv_ghcb * __percpu *hv_ghcb_pg;
>
> bool hv_isolation_type_snp(void);
> bool hv_isolation_type_tdx(void);
> -u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
> +
> +#ifdef CONFIG_X86_64
> +DECLARE_STATIC_CALL(hv_hypercall, hv_std_hypercall);
> +#endif
>
> /*
> * DEFAULT INIT GPAT and SEGMENT LIMIT value in struct VMSA
> @@ -64,37 +70,15 @@ static inline u64 hv_do_hypercall(u64 co
> {
> u64 input_address = input ? virt_to_phys(input) : 0;
> u64 output_address = output ? virt_to_phys(output) : 0;
> - u64 hv_status;
>
> #ifdef CONFIG_X86_64
> - if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
> - return hv_tdx_hypercall(control, input_address, output_address);
> -
> - if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
> - __asm__ __volatile__("mov %[output_address], %%r8\n"
> - "vmmcall"
> - : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> - "+c" (control), "+d" (input_address)
> - : [output_address] "r" (output_address)
> - : "cc", "memory", "r8", "r9", "r10", "r11");
> - return hv_status;
> - }
> -
> - if (!hv_hypercall_pg)
> - return U64_MAX;
> -
> - __asm__ __volatile__("mov %[output_address], %%r8\n"
> - CALL_NOSPEC
> - : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> - "+c" (control), "+d" (input_address)
> - : [output_address] "r" (output_address),
> - THUNK_TARGET(hv_hypercall_pg)
> - : "cc", "memory", "r8", "r9", "r10", "r11");
> + return static_call_mod(hv_hypercall)(control, input_address, output_address);
> #else
> u32 input_address_hi = upper_32_bits(input_address);
> u32 input_address_lo = lower_32_bits(input_address);
> u32 output_address_hi = upper_32_bits(output_address);
> u32 output_address_lo = lower_32_bits(output_address);
> + u64 hv_status;
>
> if (!hv_hypercall_pg)
> return U64_MAX;
> @@ -107,8 +91,8 @@ static inline u64 hv_do_hypercall(u64 co
> "D"(output_address_hi), "S"(output_address_lo),
> THUNK_TARGET(hv_hypercall_pg)
> : "cc", "memory");
> -#endif /* !x86_64 */
> return hv_status;
> +#endif /* !x86_64 */
> }
>
> /* Hypercall to the L0 hypervisor */
> @@ -120,41 +104,23 @@ static inline u64 hv_do_nested_hypercall
> /* Fast hypercall with 8 bytes of input and no output */
> static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1)
> {
> - u64 hv_status;
> -
> #ifdef CONFIG_X86_64
> - if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
> - return hv_tdx_hypercall(control, input1, 0);
> -
> - if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
> - __asm__ __volatile__(
> - "vmmcall"
> - : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> - "+c" (control), "+d" (input1)
> - :: "cc", "r8", "r9", "r10", "r11");
> - } else {
> - __asm__ __volatile__(CALL_NOSPEC
> - : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> - "+c" (control), "+d" (input1)
> - : THUNK_TARGET(hv_hypercall_pg)
> - : "cc", "r8", "r9", "r10", "r11");
> - }
> + return static_call_mod(hv_hypercall)(control, input1, 0);
> #else
> - {
> - u32 input1_hi = upper_32_bits(input1);
> - u32 input1_lo = lower_32_bits(input1);
> -
> - __asm__ __volatile__ (CALL_NOSPEC
> - : "=A"(hv_status),
> - "+c"(input1_lo),
> - ASM_CALL_CONSTRAINT
> - : "A" (control),
> - "b" (input1_hi),
> - THUNK_TARGET(hv_hypercall_pg)
> - : "cc", "edi", "esi");
> - }
> -#endif
> + u32 input1_hi = upper_32_bits(input1);
> + u32 input1_lo = lower_32_bits(input1);
> + u64 hv_status;
> +
> + __asm__ __volatile__ (CALL_NOSPEC
> + : "=A"(hv_status),
> + "+c"(input1_lo),
> + ASM_CALL_CONSTRAINT
> + : "A" (control),
> + "b" (input1_hi),
> + THUNK_TARGET(hv_hypercall_pg)
> + : "cc", "edi", "esi");
> return hv_status;
> +#endif
> }
>
> static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
> @@ -174,45 +140,24 @@ static inline u64 hv_do_fast_nested_hype
> /* Fast hypercall with 16 bytes of input */
> static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2)
> {
> - u64 hv_status;
> -
> #ifdef CONFIG_X86_64
> - if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
> - return hv_tdx_hypercall(control, input1, input2);
> -
> - if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
> - __asm__ __volatile__("mov %[input2], %%r8\n"
> - "vmmcall"
> - : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> - "+c" (control), "+d" (input1)
> - : [input2] "r" (input2)
> - : "cc", "r8", "r9", "r10", "r11");
> - } else {
> - __asm__ __volatile__("mov %[input2], %%r8\n"
> - CALL_NOSPEC
> - : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> - "+c" (control), "+d" (input1)
> - : [input2] "r" (input2),
> - THUNK_TARGET(hv_hypercall_pg)
> - : "cc", "r8", "r9", "r10", "r11");
> - }
> + return static_call_mod(hv_hypercall)(control, input1, input2);
> #else
> - {
> - u32 input1_hi = upper_32_bits(input1);
> - u32 input1_lo = lower_32_bits(input1);
> - u32 input2_hi = upper_32_bits(input2);
> - u32 input2_lo = lower_32_bits(input2);
> -
> - __asm__ __volatile__ (CALL_NOSPEC
> - : "=A"(hv_status),
> - "+c"(input1_lo), ASM_CALL_CONSTRAINT
> - : "A" (control), "b" (input1_hi),
> - "D"(input2_hi), "S"(input2_lo),
> - THUNK_TARGET(hv_hypercall_pg)
> - : "cc");
> - }
> -#endif
> + u32 input1_hi = upper_32_bits(input1);
> + u32 input1_lo = lower_32_bits(input1);
> + u32 input2_hi = upper_32_bits(input2);
> + u32 input2_lo = lower_32_bits(input2);
> + u64 hv_status;
> +
> + __asm__ __volatile__ (CALL_NOSPEC
> + : "=A"(hv_status),
> + "+c"(input1_lo), ASM_CALL_CONSTRAINT
> + : "A" (control), "b" (input1_hi),
> + "D"(input2_hi), "S"(input2_lo),
> + THUNK_TARGET(hv_hypercall_pg)
> + : "cc");
> return hv_status;
> +#endif
> }
>
> static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -37,10 +37,6 @@
> bool hv_nested;
> struct ms_hyperv_info ms_hyperv;
>
> -/* Used in modules via hv_do_hypercall(): see arch/x86/include/asm/mshyperv.h */
> -bool hyperv_paravisor_present __ro_after_init;
> -EXPORT_SYMBOL_GPL(hyperv_paravisor_present);
> -
> #if IS_ENABLED(CONFIG_HYPERV)
> static inline unsigned int hv_get_nested_msr(unsigned int reg)
> {
> @@ -287,8 +283,18 @@ static void __init x86_setup_ops_for_tsc
> old_restore_sched_clock_state = x86_platform.restore_sched_clock_state;
> x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state;
> }
> +
> +#ifdef CONFIG_X86_64
> +DEFINE_STATIC_CALL(hv_hypercall, hv_std_hypercall);
> +EXPORT_STATIC_CALL_TRAMP_GPL(hv_hypercall);
> +#define hypercall_update(hc) static_call_update(hv_hypercall, hc)
> +#endif
> #endif /* CONFIG_HYPERV */
>
> +#ifndef hypercall_update
> +#define hypercall_update(hc) (void)hc
> +#endif
> +
> static uint32_t __init ms_hyperv_platform(void)
> {
> u32 eax;
> @@ -483,14 +489,14 @@ static void __init ms_hyperv_init_platfo
> ms_hyperv.shared_gpa_boundary =
> BIT_ULL(ms_hyperv.shared_gpa_boundary_bits);
>
> - hyperv_paravisor_present = !!ms_hyperv.paravisor_present;
> -
> pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n",
> ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b);
>
>
> if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) {
> static_branch_enable(&isolation_type_snp);
> + if (!ms_hyperv.paravisor_present)
> + hypercall_update(hv_snp_hypercall);
> } else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) {
> static_branch_enable(&isolation_type_tdx);
>
> @@ -498,6 +504,7 @@ static void __init ms_hyperv_init_platfo
> ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED;
>
> if (!ms_hyperv.paravisor_present) {
> + hypercall_update(hv_tdx_hypercall);
> /*
> * Mark the Hyper-V TSC page feature as disabled
> * in a TDX VM without paravisor so that the
>
>
Powered by blists - more mailing lists