[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID:
<SN6PR02MB4157E849025C4A6B64933150D4842@SN6PR02MB4157.namprd02.prod.outlook.com>
Date: Fri, 25 Apr 2025 15:12:34 +0000
From: Michael Kelley <mhklinux@...look.com>
To: Roman Kisel <romank@...ux.microsoft.com>, "bp@...en8.de" <bp@...en8.de>,
"dave.hansen@...ux.intel.com" <dave.hansen@...ux.intel.com>,
"decui@...rosoft.com" <decui@...rosoft.com>, "haiyangz@...rosoft.com"
<haiyangz@...rosoft.com>, "hpa@...or.com" <hpa@...or.com>,
"kys@...rosoft.com" <kys@...rosoft.com>, "mikelley@...rosoft.com"
<mikelley@...rosoft.com>, "mingo@...hat.com" <mingo@...hat.com>,
"tglx@...utronix.de" <tglx@...utronix.de>, "tiala@...rosoft.com"
<tiala@...rosoft.com>, "wei.liu@...nel.org" <wei.liu@...nel.org>,
"linux-hyperv@...r.kernel.org" <linux-hyperv@...r.kernel.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"x86@...nel.org" <x86@...nel.org>
CC: "apais@...rosoft.com" <apais@...rosoft.com>, "benhill@...rosoft.com"
<benhill@...rosoft.com>, "bperkins@...rosoft.com" <bperkins@...rosoft.com>,
"sunilmut@...rosoft.com" <sunilmut@...rosoft.com>
Subject: RE: [PATCH hyperv-next] x86/hyperv: Fix APIC ID and VP ID confusion
in hv_snp_boot_ap()
From: Roman Kisel <romank@...ux.microsoft.com> Sent: Thursday, April 24, 2025 2:58 PM
>
> To start an application processor in SNP-isolated guest, a hypercall
> is used that takes a virtual processor index. The hv_snp_boot_ap()
> function uses that START_VP hypercall but passes as VP ID to it what
> it receives as a wakeup_secondary_cpu_64 callback: the APIC ID.
>
> As those two aren't generally interchangeable, that may lead to hung
> APs if VP IDs and APIC IDs don't match, e.g. APIC IDs might be sparse
> whereas VP IDs never are.
I agree that VP IDs (a.k.a. VP indexes) and APIC IDs don't necessary match,
and that APIC IDs might be sparse. But I'm not aware of any statement
in the TLFS about the nature of VP indexes, except that
"A virtual processor index must be less than the maximum number of
virtual processors per partition."
But that maximum is the Hyper-V implementation maximum, not the
maximum for a particular VM. So the statement does not imply
denseness unless the number of CPUs in the VM is equal to the
Hyper-V implementation max. In other parts of Linux kernel code,
we assume that VP indexes might be sparse as well.
All that said, this is just a comment about the precise accuracy of
your commit message, and doesn't affect the code.
>
> Update the parameter names to avoid confusion as to what the parameter
> is. Use the APIC ID to VP ID conversion to provide correct input to the
> hypercall.
Terminology: The TLFS calls this the "VP Index", not the "VP ID". In
other Linux code, we also call it the "VP Index". See the hv_vp_index
array, for example. The exception is the hypercall itself, which the TLFS
calls HvCallGetVpIndexFromApicId, but which our Linux code calls
HVCALL_GET_VP_ID_FROM_APIC_ID for some unknown reason.
Could you fix the terminology to be consistent? And maybe fix the
HVCALL_* string name as well. I know you are just moving the
existing VTL code, but let's take the opportunity to avoid any
terminology inconsistency.
>
> Cc: stable@...r.kernel.org
> Fixes: 44676bb9d566 ("x86/hyperv: Add smp support for SEV-SNP guest")
> Signed-off-by: Roman Kisel <romank@...ux.microsoft.com>
> ---
> arch/x86/hyperv/hv_init.c | 33 ++++++++++++++++++++++++++++++++
> arch/x86/hyperv/hv_vtl.c | 34 +--------------------------------
> arch/x86/hyperv/ivm.c | 11 +++++++++--
> arch/x86/include/asm/mshyperv.h | 5 +++--
> 4 files changed, 46 insertions(+), 37 deletions(-)
>
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index ddeb40930bc8..23422342a091 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -706,3 +706,36 @@ bool hv_is_hyperv_initialized(void)
> return hypercall_msr.enable;
> }
> EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized);
> +
> +int hv_apicid_to_vp_id(u32 apic_id)
> +{
> + u64 control;
> + u64 status;
> + unsigned long irq_flags;
> + struct hv_get_vp_from_apic_id_in *input;
> + u32 *output, ret;
> +
> + local_irq_save(irq_flags);
> +
> + input = *this_cpu_ptr(hyperv_pcpu_input_arg);
> + memset(input, 0, sizeof(*input));
> + input->partition_id = HV_PARTITION_ID_SELF;
> + input->apic_ids[0] = apic_id;
> +
> + output = *this_cpu_ptr(hyperv_pcpu_output_arg);
> +
> + control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_ID_FROM_APIC_ID;
> + status = hv_do_hypercall(control, input, output);
> + ret = output[0];
> +
> + local_irq_restore(irq_flags);
> +
> + if (!hv_result_success(status)) {
> + pr_err("failed to get vp id from apic id %d, status %#llx\n",
> + apic_id, status);
> + return -EINVAL;
> + }
> +
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(hv_apicid_to_vp_id);
> diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c
> index 582fe820e29c..8bc4f0121e5e 100644
> --- a/arch/x86/hyperv/hv_vtl.c
> +++ b/arch/x86/hyperv/hv_vtl.c
> @@ -205,38 +205,6 @@ static int hv_vtl_bringup_vcpu(u32 target_vp_index, int cpu, u64 eip_ignored)
> return ret;
> }
>
> -static int hv_vtl_apicid_to_vp_id(u32 apic_id)
> -{
> - u64 control;
> - u64 status;
> - unsigned long irq_flags;
> - struct hv_get_vp_from_apic_id_in *input;
> - u32 *output, ret;
> -
> - local_irq_save(irq_flags);
> -
> - input = *this_cpu_ptr(hyperv_pcpu_input_arg);
> - memset(input, 0, sizeof(*input));
> - input->partition_id = HV_PARTITION_ID_SELF;
> - input->apic_ids[0] = apic_id;
> -
> - output = *this_cpu_ptr(hyperv_pcpu_output_arg);
> -
> - control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_ID_FROM_APIC_ID;
> - status = hv_do_hypercall(control, input, output);
> - ret = output[0];
> -
> - local_irq_restore(irq_flags);
> -
> - if (!hv_result_success(status)) {
> - pr_err("failed to get vp id from apic id %d, status %#llx\n",
> - apic_id, status);
> - return -EINVAL;
> - }
> -
> - return ret;
> -}
> -
> static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip)
> {
> int vp_id, cpu;
> @@ -250,7 +218,7 @@ static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned
> long start_eip)
> return -EINVAL;
>
> pr_debug("Bringing up CPU with APIC ID %d in VTL2...\n", apicid);
> - vp_id = hv_vtl_apicid_to_vp_id(apicid);
> + vp_id = hv_apicid_to_vp_id(apicid);
>
> if (vp_id < 0) {
> pr_err("Couldn't find CPU with APIC ID %d\n", apicid);
> diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
> index c0039a90e9e0..e3c32bb0d0cf 100644
> --- a/arch/x86/hyperv/ivm.c
> +++ b/arch/x86/hyperv/ivm.c
> @@ -288,7 +288,7 @@ static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa)
> free_page((unsigned long)vmsa);
> }
>
> -int hv_snp_boot_ap(u32 cpu, unsigned long start_ip)
> +int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip)
> {
> struct sev_es_save_area *vmsa = (struct sev_es_save_area *)
> __get_free_page(GFP_KERNEL | __GFP_ZERO);
> @@ -297,10 +297,17 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip)
> u64 ret, retry = 5;
> struct hv_enable_vp_vtl *start_vp_input;
> unsigned long flags;
> + int vp_id;
>
> if (!vmsa)
> return -ENOMEM;
>
> + vp_id = hv_apicid_to_vp_id(apic_id);
> +
> + /* The BSP or an error */
> + if (vp_id <= 0)
Returning an error on value 0 may be problematic here. Consider
the panic case where a CPU other than the BSP takes a panic and
initiates kdump. If the kdump kernel runs with more than 1 CPU, it
may try to start the CPU that was originally the BSP. To my
knowledge, SEV-SNP guests on Hyper-V don't support kdump at
the moment so this problem is currently theoretical, but let's not
leave a potential future problem by excluding 0 here.
Also, since I assert that we really don't know anything about the
VP index values, we can't exclude 0. It may or may not be the
original BSP.
Michael
> + return -EINVAL;
> +
> native_store_gdt(&gdtr);
>
> vmsa->gdtr.base = gdtr.address;
> @@ -348,7 +355,7 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip)
> start_vp_input = (struct hv_enable_vp_vtl *)ap_start_input_arg;
> memset(start_vp_input, 0, sizeof(*start_vp_input));
> start_vp_input->partition_id = -1;
> - start_vp_input->vp_index = cpu;
> + start_vp_input->vp_index = vp_id;
> start_vp_input->target_vtl.target_vtl = ms_hyperv.vtl;
> *(u64 *)&start_vp_input->vp_context = __pa(vmsa) | 1;
>
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 07aadf0e839f..ae62a34bfd1e 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -268,11 +268,11 @@ int hv_unmap_ioapic_interrupt(int ioapic_id, struct
> hv_interrupt_entry *entry);
> #ifdef CONFIG_AMD_MEM_ENCRYPT
> bool hv_ghcb_negotiate_protocol(void);
> void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason);
> -int hv_snp_boot_ap(u32 cpu, unsigned long start_ip);
> +int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip);
> #else
> static inline bool hv_ghcb_negotiate_protocol(void) { return false; }
> static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {}
> -static inline int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) { return 0; }
> +static inline int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip) { return 0; }
> #endif
>
> #if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST)
> @@ -329,6 +329,7 @@ static inline void hv_set_non_nested_msr(unsigned int reg,
> u64 value) { }
> static inline u64 hv_get_non_nested_msr(unsigned int reg) { return 0; }
> #endif /* CONFIG_HYPERV */
>
> +int hv_apicid_to_vp_id(u32 apic_id);
>
> #ifdef CONFIG_HYPERV_VTL_MODE
> void __init hv_vtl_init_platform(void);
>
> base-commit: 628cc040b3a2980df6032766e8ef0688e981ab95
> --
> 2.43.0
>
Powered by blists - more mailing lists