[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <buq5hn27q7r5ktb33rxejp7i54s22zqu3vw44bie6vzcouzzdc@btjgkdpoeclw>
Date: Thu, 27 Feb 2025 15:15:23 +0200
From: "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
To: Sean Christopherson <seanjc@...gle.com>
Cc: Thomas Gleixner <tglx@...utronix.de>, Ingo Molnar <mingo@...hat.com>,
Borislav Petkov <bp@...en8.de>, Dave Hansen <dave.hansen@...ux.intel.com>, x86@...nel.org,
Paolo Bonzini <pbonzini@...hat.com>, Juergen Gross <jgross@...e.com>,
"K. Y. Srinivasan" <kys@...rosoft.com>, Haiyang Zhang <haiyangz@...rosoft.com>,
Wei Liu <wei.liu@...nel.org>, Dexuan Cui <decui@...rosoft.com>,
Ajay Kaher <ajay.kaher@...adcom.com>, Jan Kiszka <jan.kiszka@...mens.com>,
Andy Lutomirski <luto@...nel.org>, Peter Zijlstra <peterz@...radead.org>,
Daniel Lezcano <daniel.lezcano@...aro.org>, John Stultz <jstultz@...gle.com>, linux-kernel@...r.kernel.org,
linux-coco@...ts.linux.dev, kvm@...r.kernel.org, virtualization@...ts.linux.dev,
linux-hyperv@...r.kernel.org, xen-devel@...ts.xenproject.org,
Tom Lendacky <thomas.lendacky@....com>, Nikunj A Dadhania <nikunj@....com>
Subject: Re: [PATCH v2 06/38] x86/tdx: Override PV calibration routines with
CPUID-based calibration
On Wed, Feb 26, 2025 at 06:18:22PM -0800, Sean Christopherson wrote:
> When running as a TDX guest, explicitly override the TSC frequency
> calibration routine with CPUID-based calibration instead of potentially
> relying on a hypervisor-controlled PV routine. For TDX guests, CPUID.0x15
> is always emulated by the TDX-Module, i.e. the information from CPUID is
> more trustworthy than the information provided by the hypervisor.
>
> To maintain backwards compatibility with TDX guest kernels that use native
> calibration, and because it's the least awful option, retain
> native_calibrate_tsc()'s stuffing of the local APIC bus period using the
> core crystal frequency. While it's entirely possible for the hypervisor
> to emulate the APIC timer at a different frequency than the core crystal
> frequency, the commonly accepted interpretation of Intel's SDM is that APIC
> timer runs at the core crystal frequency when that latter is enumerated via
> CPUID:
>
> The APIC timer frequency will be the processor’s bus clock or core
> crystal clock frequency (when TSC/core crystal clock ratio is enumerated
> in CPUID leaf 0x15).
>
> If the hypervisor is malicious and deliberately runs the APIC timer at the
> wrong frequency, nothing would stop the hypervisor from modifying the
> frequency at any time, i.e. attempting to manually calibrate the frequency
> out of paranoia would be futile.
>
> Deliberately leave the CPU frequency calibration routine as is, since the
> TDX-Module doesn't provide any guarantees with respect to CPUID.0x16.
>
> Opportunistically add a comment explaining that CoCo TSC initialization
> needs to come after hypervisor specific initialization.
>
> Cc: Kirill A. Shutemov <kirill.shutemov@...ux.intel.com>
> Signed-off-by: Sean Christopherson <seanjc@...gle.com>
> ---
> arch/x86/coco/tdx/tdx.c | 30 +++++++++++++++++++++++++++---
> arch/x86/include/asm/tdx.h | 2 ++
> arch/x86/kernel/tsc.c | 8 ++++++++
> 3 files changed, 37 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> index 32809a06dab4..42cdaa98dc5e 100644
> --- a/arch/x86/coco/tdx/tdx.c
> +++ b/arch/x86/coco/tdx/tdx.c
> @@ -8,6 +8,7 @@
> #include <linux/export.h>
> #include <linux/io.h>
> #include <linux/kexec.h>
> +#include <asm/apic.h>
> #include <asm/coco.h>
> #include <asm/tdx.h>
> #include <asm/vmx.h>
> @@ -1063,9 +1064,6 @@ void __init tdx_early_init(void)
>
> setup_force_cpu_cap(X86_FEATURE_TDX_GUEST);
>
> - /* TSC is the only reliable clock in TDX guest */
> - setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
> -
> cc_vendor = CC_VENDOR_INTEL;
>
> /* Configure the TD */
> @@ -1122,3 +1120,29 @@ void __init tdx_early_init(void)
>
> tdx_announce();
> }
> +
> +static unsigned long tdx_get_tsc_khz(void)
> +{
> + struct cpuid_tsc_info info;
> +
> + if (WARN_ON_ONCE(cpuid_get_tsc_freq(&info)))
> + return 0;
> +
> + lapic_timer_period = info.crystal_khz * 1000 / HZ;
> +
> + return info.tsc_khz;
> +}
> +
> +void __init tdx_tsc_init(void)
> +{
> + /* TSC is the only reliable clock in TDX guest */
> + setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
> + setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
> +
> + /*
> + * Override the PV calibration routines (if set) with more trustworthy
> + * CPUID-based calibration. The TDX module emulates CPUID, whereas any
> + * PV information is provided by the hypervisor.
> + */
> + tsc_register_calibration_routines(tdx_get_tsc_khz, NULL);
> +}
> diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
> index b4b16dafd55e..621fbdd101e2 100644
> --- a/arch/x86/include/asm/tdx.h
> +++ b/arch/x86/include/asm/tdx.h
> @@ -53,6 +53,7 @@ struct ve_info {
> #ifdef CONFIG_INTEL_TDX_GUEST
>
> void __init tdx_early_init(void);
> +void __init tdx_tsc_init(void);
>
> void tdx_get_ve_info(struct ve_info *ve);
>
> @@ -72,6 +73,7 @@ void __init tdx_dump_td_ctls(u64 td_ctls);
> #else
>
> static inline void tdx_early_init(void) { };
> +static inline void tdx_tsc_init(void) { }
> static inline void tdx_safe_halt(void) { };
>
> static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; }
> diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
> index 6a011cd1ff94..472d6c71d3a5 100644
> --- a/arch/x86/kernel/tsc.c
> +++ b/arch/x86/kernel/tsc.c
> @@ -32,6 +32,7 @@
> #include <asm/topology.h>
> #include <asm/uv/uv.h>
> #include <asm/sev.h>
> +#include <asm/tdx.h>
>
> unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
> EXPORT_SYMBOL(cpu_khz);
> @@ -1563,8 +1564,15 @@ void __init tsc_early_init(void)
> if (is_early_uv_system())
> return;
>
> + /*
> + * Do CoCo specific "secure" TSC initialization *after* hypervisor
> + * platform initialization so that the secure variant can override the
> + * hypervisor's PV calibration routine with a more trusted method.
> + */
> if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
> snp_secure_tsc_init();
> + else if (boot_cpu_has(X86_FEATURE_TDX_GUEST))
> + tdx_tsc_init();
Maybe a x86_platform.guest callback for this?
--
Kiryl Shutsemau / Kirill A. Shutemov
Powered by blists - more mailing lists