[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID:
<SN6PR02MB41576A943191D154521C23C8D4B82@SN6PR02MB4157.namprd02.prod.outlook.com>
Date: Mon, 21 Apr 2025 18:27:57 +0000
From: Michael Kelley <mhklinux@...look.com>
To: Peter Zijlstra <peterz@...radead.org>, "x86@...nel.org" <x86@...nel.org>
CC: "kys@...rosoft.com" <kys@...rosoft.com>, "haiyangz@...rosoft.com"
<haiyangz@...rosoft.com>, "wei.liu@...nel.org" <wei.liu@...nel.org>,
"decui@...rosoft.com" <decui@...rosoft.com>, "tglx@...utronix.de"
<tglx@...utronix.de>, "mingo@...hat.com" <mingo@...hat.com>, "bp@...en8.de"
<bp@...en8.de>, "dave.hansen@...ux.intel.com" <dave.hansen@...ux.intel.com>,
"hpa@...or.com" <hpa@...or.com>, "jpoimboe@...nel.org" <jpoimboe@...nel.org>,
"pawan.kumar.gupta@...ux.intel.com" <pawan.kumar.gupta@...ux.intel.com>,
"seanjc@...gle.com" <seanjc@...gle.com>, "pbonzini@...hat.com"
<pbonzini@...hat.com>, "ardb@...nel.org" <ardb@...nel.org>, "kees@...nel.org"
<kees@...nel.org>, Arnd Bergmann <arnd@...db.de>,
"gregkh@...uxfoundation.org" <gregkh@...uxfoundation.org>,
"linux-hyperv@...r.kernel.org" <linux-hyperv@...r.kernel.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"kvm@...r.kernel.org" <kvm@...r.kernel.org>, "linux-efi@...r.kernel.org"
<linux-efi@...r.kernel.org>, "samitolvanen@...gle.com"
<samitolvanen@...gle.com>, "ojeda@...nel.org" <ojeda@...nel.org>
Subject: RE: [PATCH 4/6] x86,hyperv: Clean up hv_do_hypercall()
From: Peter Zijlstra <peterz@...radead.org> Sent: Monday, April 14, 2025 4:12 AM
>
> What used to be a simple few instructions has turned into a giant mess
> (for x86_64). Not only does it use static_branch wrong, it mixes it
> with dynamic branches for no apparent reason.
>
> Notably it uses static_branch through an out-of-line function call,
> which completely defeats the purpose, since instead of a simple
> JMP/NOP site, you get a CALL+RET+TEST+Jcc sequence in return, which is
> absolutely idiotic.
>
> Add to that a dynamic test of hyperv_paravisor_present, something
> which is set once and never changed.
>
> Replace all this idiocy with a single direct function call to the
> right hypercall variant.
This did indeed need cleaning after all the CoCo VM and paravisor
stuff got added. Thanks for doing it.
From looking at the code changes, I believe the 32-bit hypercall paths
are unchanged, as they weren't affected the CoCo VM and paravisor
additions. Perhaps explicitly state that intent in the commit message.
I've tested this patch set against linux-next-20250411 on normal Hyper-V
guests. Basic smoke tests pass, along with taking a panic, and
suspend/resume for guest hibernation. But getting into kdump after a
panic does not work. See comments in Patch 5 for the likely reason why.
I've also tested SNP and TDX VMs with a paravisor, and basic smoke
tests pass. But I'm testing in the Azure cloud, and I don't have access to an
environment where I can test without a paravisor. So my testing doesn't
cover the SNP and TDX specific static call paths. Maybe someone at
Microsoft can test that configuration.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
> ---
> arch/x86/hyperv/hv_init.c | 21 ++++++
> arch/x86/hyperv/ivm.c | 14 ++++
> arch/x86/include/asm/mshyperv.h | 137 +++++++++++-----------------------------
> arch/x86/kernel/cpu/mshyperv.c | 18 +++--
> 4 files changed, 88 insertions(+), 102 deletions(-)
>
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -35,7 +35,28 @@
> #include <linux/highmem.h>
>
> void *hv_hypercall_pg;
> +
> +#ifdef CONFIG_X86_64
> +u64 hv_pg_hypercall(u64 control, u64 param1, u64 param2)
Could this get a different name so we don't have the confusion of
hv_hypercall_pg vs hv_pg_hypercall? Some possibilities:
hv_std_hypercall
hv_basic_hypercall
hv_core_hypercall
hv_normal_hypercall
hv_simple_hypercall
> +{
> + u64 hv_status;
> +
> + if (!hv_hypercall_pg)
> + return U64_MAX;
> +
> + register u64 __r8 asm("r8") = param2;
> + asm volatile (CALL_NOSPEC
> + : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> + "+c" (control), "+d" (param1)
> + : "r" (__r8),
> + THUNK_TARGET(hv_hypercall_pg)
> + : "cc", "memory", "r9", "r10", "r11");
> +
> + return hv_status;
> +}
> +#else
> EXPORT_SYMBOL_GPL(hv_hypercall_pg);
> +#endif
>
> union hv_ghcb * __percpu *hv_ghcb_pg;
>
> --- a/arch/x86/hyperv/ivm.c
> +++ b/arch/x86/hyperv/ivm.c
> @@ -376,6 +376,20 @@ int hv_snp_boot_ap(u32 cpu, unsigned lon
> return ret;
> }
>
> +u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2)
> +{
> + u64 hv_status;
> +
> + register u64 __r8 asm("r8") = param2;
> + asm volatile("vmmcall"
> + : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> + "+c" (control), "+d" (param1)
> + : "r" (__r8)
> + : "cc", "memory", "r9", "r10", "r11");
> +
> + return hv_status;
> +}
> +
> #else
> static inline void hv_ghcb_msr_write(u64 msr, u64 value) {}
> static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {}
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -6,6 +6,7 @@
> #include <linux/nmi.h>
> #include <linux/msi.h>
> #include <linux/io.h>
> +#include <linux/static_call.h>
> #include <asm/nospec-branch.h>
> #include <asm/paravirt.h>
> #include <hyperv/hvhdk.h>
> @@ -39,15 +40,20 @@ static inline unsigned char hv_get_nmi_r
> }
>
> #if IS_ENABLED(CONFIG_HYPERV)
> -extern bool hyperv_paravisor_present;
> -
> extern void *hv_hypercall_pg;
>
> extern union hv_ghcb * __percpu *hv_ghcb_pg;
>
> bool hv_isolation_type_snp(void);
> bool hv_isolation_type_tdx(void);
> -u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
> +
> +#ifdef CONFIG_X86_64
> +extern u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
> +extern u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2);
> +extern u64 hv_pg_hypercall(u64 control, u64 param1, u64 param2);
> +
> +DECLARE_STATIC_CALL(hv_hypercall, hv_pg_hypercall);
> +#endif
>
> /*
> * DEFAULT INIT GPAT and SEGMENT LIMIT value in struct VMSA
> @@ -64,37 +70,15 @@ static inline u64 hv_do_hypercall(u64 co
> {
> u64 input_address = input ? virt_to_phys(input) : 0;
> u64 output_address = output ? virt_to_phys(output) : 0;
> - u64 hv_status;
>
> #ifdef CONFIG_X86_64
> - if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
> - return hv_tdx_hypercall(control, input_address, output_address);
> -
> - if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
> - __asm__ __volatile__("mov %[output_address], %%r8\n"
> - "vmmcall"
> - : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> - "+c" (control), "+d" (input_address)
> - : [output_address] "r" (output_address)
> - : "cc", "memory", "r8", "r9", "r10", "r11");
> - return hv_status;
> - }
> -
> - if (!hv_hypercall_pg)
> - return U64_MAX;
> -
> - __asm__ __volatile__("mov %[output_address], %%r8\n"
> - CALL_NOSPEC
> - : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> - "+c" (control), "+d" (input_address)
> - : [output_address] "r" (output_address),
> - THUNK_TARGET(hv_hypercall_pg)
> - : "cc", "memory", "r8", "r9", "r10", "r11");
> + return static_call_mod(hv_hypercall)(control, input_address, output_address);
> #else
> u32 input_address_hi = upper_32_bits(input_address);
> u32 input_address_lo = lower_32_bits(input_address);
> u32 output_address_hi = upper_32_bits(output_address);
> u32 output_address_lo = lower_32_bits(output_address);
> + u64 hv_status;
>
> if (!hv_hypercall_pg)
> return U64_MAX;
> @@ -107,8 +91,8 @@ static inline u64 hv_do_hypercall(u64 co
> "D"(output_address_hi), "S"(output_address_lo),
> THUNK_TARGET(hv_hypercall_pg)
> : "cc", "memory");
> -#endif /* !x86_64 */
> return hv_status;
> +#endif /* !x86_64 */
> }
>
> /* Hypercall to the L0 hypervisor */
> @@ -120,41 +104,23 @@ static inline u64 hv_do_nested_hypercall
> /* Fast hypercall with 8 bytes of input and no output */
> static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1)
> {
> - u64 hv_status;
> -
> #ifdef CONFIG_X86_64
> - if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
> - return hv_tdx_hypercall(control, input1, 0);
> -
> - if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
> - __asm__ __volatile__(
> - "vmmcall"
> - : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> - "+c" (control), "+d" (input1)
> - :: "cc", "r8", "r9", "r10", "r11");
> - } else {
> - __asm__ __volatile__(CALL_NOSPEC
> - : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> - "+c" (control), "+d" (input1)
> - : THUNK_TARGET(hv_hypercall_pg)
> - : "cc", "r8", "r9", "r10", "r11");
> - }
> + return static_call_mod(hv_hypercall)(control, input1, 0);
> #else
> - {
> - u32 input1_hi = upper_32_bits(input1);
> - u32 input1_lo = lower_32_bits(input1);
> -
> - __asm__ __volatile__ (CALL_NOSPEC
> - : "=A"(hv_status),
> - "+c"(input1_lo),
> - ASM_CALL_CONSTRAINT
> - : "A" (control),
> - "b" (input1_hi),
> - THUNK_TARGET(hv_hypercall_pg)
> - : "cc", "edi", "esi");
> - }
> -#endif
> + u32 input1_hi = upper_32_bits(input1);
> + u32 input1_lo = lower_32_bits(input1);
> + u64 hv_status;
> +
> + __asm__ __volatile__ (CALL_NOSPEC
> + : "=A"(hv_status),
> + "+c"(input1_lo),
> + ASM_CALL_CONSTRAINT
> + : "A" (control),
> + "b" (input1_hi),
> + THUNK_TARGET(hv_hypercall_pg)
> + : "cc", "edi", "esi");
> return hv_status;
> +#endif
> }
>
> static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
> @@ -174,45 +140,24 @@ static inline u64 hv_do_fast_nested_hype
> /* Fast hypercall with 16 bytes of input */
> static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2)
> {
> - u64 hv_status;
> -
> #ifdef CONFIG_X86_64
> - if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
> - return hv_tdx_hypercall(control, input1, input2);
> -
> - if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
> - __asm__ __volatile__("mov %[input2], %%r8\n"
> - "vmmcall"
> - : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> - "+c" (control), "+d" (input1)
> - : [input2] "r" (input2)
> - : "cc", "r8", "r9", "r10", "r11");
> - } else {
> - __asm__ __volatile__("mov %[input2], %%r8\n"
> - CALL_NOSPEC
> - : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> - "+c" (control), "+d" (input1)
> - : [input2] "r" (input2),
> - THUNK_TARGET(hv_hypercall_pg)
> - : "cc", "r8", "r9", "r10", "r11");
> - }
> + return static_call_mod(hv_hypercall)(control, input1, input2);
> #else
> - {
> - u32 input1_hi = upper_32_bits(input1);
> - u32 input1_lo = lower_32_bits(input1);
> - u32 input2_hi = upper_32_bits(input2);
> - u32 input2_lo = lower_32_bits(input2);
> -
> - __asm__ __volatile__ (CALL_NOSPEC
> - : "=A"(hv_status),
> - "+c"(input1_lo), ASM_CALL_CONSTRAINT
> - : "A" (control), "b" (input1_hi),
> - "D"(input2_hi), "S"(input2_lo),
> - THUNK_TARGET(hv_hypercall_pg)
> - : "cc");
> - }
> -#endif
> + u32 input1_hi = upper_32_bits(input1);
> + u32 input1_lo = lower_32_bits(input1);
> + u32 input2_hi = upper_32_bits(input2);
> + u32 input2_lo = lower_32_bits(input2);
> + u64 hv_status;
> +
> + __asm__ __volatile__ (CALL_NOSPEC
> + : "=A"(hv_status),
> + "+c"(input1_lo), ASM_CALL_CONSTRAINT
> + : "A" (control), "b" (input1_hi),
> + "D"(input2_hi), "S"(input2_lo),
> + THUNK_TARGET(hv_hypercall_pg)
> + : "cc");
> return hv_status;
> +#endif
> }
>
> static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -37,10 +37,6 @@
> bool hv_nested;
> struct ms_hyperv_info ms_hyperv;
>
> -/* Used in modules via hv_do_hypercall(): see arch/x86/include/asm/mshyperv.h */
> -bool hyperv_paravisor_present __ro_after_init;
> -EXPORT_SYMBOL_GPL(hyperv_paravisor_present);
> -
> #if IS_ENABLED(CONFIG_HYPERV)
> static inline unsigned int hv_get_nested_msr(unsigned int reg)
> {
> @@ -287,6 +283,11 @@ static void __init x86_setup_ops_for_tsc
> old_restore_sched_clock_state = x86_platform.restore_sched_clock_state;
> x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state;
> }
> +
> +#ifdef CONFIG_X86_64
> +DEFINE_STATIC_CALL(hv_hypercall, hv_pg_hypercall);
> +EXPORT_STATIC_CALL_TRAMP_GPL(hv_hypercall);
> +#endif
> #endif /* CONFIG_HYPERV */
>
> static uint32_t __init ms_hyperv_platform(void)
> @@ -483,14 +484,16 @@ static void __init ms_hyperv_init_platfo
> ms_hyperv.shared_gpa_boundary =
> BIT_ULL(ms_hyperv.shared_gpa_boundary_bits);
>
> - hyperv_paravisor_present = !!ms_hyperv.paravisor_present;
> -
> pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n",
> ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b);
>
>
> if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) {
> static_branch_enable(&isolation_type_snp);
> +#if defined(CONFIG_AMD_MEM_ENCRYPT) && defined(CONFIG_HYPERV)
> + if (!ms_hyperv.paravisor_present)
> + static_call_update(hv_hypercall, hv_snp_hypercall);
> +#endif
This #ifdef (and one below for TDX) are really ugly. They could be avoided by adding
stubs for hv_snp_hypercall() and hv_tdx_hypercall(), and making the hv_hypercall static
call exist even when !CONFIG_HYPERV (and for 32-bit builds). Or is there a reason to
not do that?
> } else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) {
> static_branch_enable(&isolation_type_tdx);
>
> @@ -498,6 +501,9 @@ static void __init ms_hyperv_init_platfo
> ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED;
>
> if (!ms_hyperv.paravisor_present) {
> +#if defined(CONFIG_INTEL_TDX_GUEST) && defined(CONFIG_HYPERV)
> + static_call_update(hv_hypercall, hv_tdx_hypercall);
> +#endif
> /*
> * Mark the Hyper-V TSC page feature as disabled
> * in a TDX VM without paravisor so that the
>
>
Powered by blists - more mailing lists