[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID:
<SN6PR02MB4157CBC183775A8C17AC590BD4C82@SN6PR02MB4157.namprd02.prod.outlook.com>
Date: Tue, 4 Mar 2025 04:18:02 +0000
From: Michael Kelley <mhklinux@...look.com>
To: Sean Christopherson <seanjc@...gle.com>, Thomas Gleixner
<tglx@...utronix.de>, Ingo Molnar <mingo@...hat.com>, Borislav Petkov
<bp@...en8.de>, Dave Hansen <dave.hansen@...ux.intel.com>, "x86@...nel.org"
<x86@...nel.org>, "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>,
Paolo Bonzini <pbonzini@...hat.com>, Juergen Gross <jgross@...e.com>, "K. Y.
Srinivasan" <kys@...rosoft.com>, Haiyang Zhang <haiyangz@...rosoft.com>, Wei
Liu <wei.liu@...nel.org>, Dexuan Cui <decui@...rosoft.com>, Ajay Kaher
<ajay.kaher@...adcom.com>, Jan Kiszka <jan.kiszka@...mens.com>, Andy
Lutomirski <luto@...nel.org>, Peter Zijlstra <peterz@...radead.org>, Daniel
Lezcano <daniel.lezcano@...aro.org>, John Stultz <jstultz@...gle.com>
CC: "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"linux-coco@...ts.linux.dev" <linux-coco@...ts.linux.dev>,
"kvm@...r.kernel.org" <kvm@...r.kernel.org>, "virtualization@...ts.linux.dev"
<virtualization@...ts.linux.dev>, "linux-hyperv@...r.kernel.org"
<linux-hyperv@...r.kernel.org>, "xen-devel@...ts.xenproject.org"
<xen-devel@...ts.xenproject.org>, Tom Lendacky <thomas.lendacky@....com>,
Nikunj A Dadhania <nikunj@....com>
Subject: RE: [PATCH v2 31/38] x86/tsc: Pass KNOWN_FREQ and RELIABLE as params
to registration
From: Sean Christopherson <seanjc@...gle.com> Sent: Wednesday, February 26, 2025 6:19 PM
>
> Add a "tsc_properties" set of flags and use it to annotate whether the
> TSC operates at a known and/or reliable frequency when registering a
> paravirtual TSC calibration routine. Currently, each PV flow manually
> sets the associated feature flags, but often in haphazard fashion that
> makes it difficult for unfamiliar readers to see the properties of the
> TSC when running under a particular hypervisor.
>
> The other, bigger issue with manually setting the feature flags is that
> it decouples the flags from the calibration routine. E.g. in theory, PV
> code could mark the TSC as having a known frequency, but then have its
> PV calibration discarded in favor of a method that doesn't use that known
> frequency. Passing the TSC properties along with the calibration routine
> will allow adding sanity checks to guard against replacing a "better"
> calibration routine with a "worse" routine.
>
> As a bonus, the flags also give developers working on new PV code a heads
> up that they should at least mark the TSC as having a known frequency.
>
> Signed-off-by: Sean Christopherson <seanjc@...gle.com>
> ---
> arch/x86/coco/sev/core.c | 6 ++----
> arch/x86/coco/tdx/tdx.c | 7 ++-----
> arch/x86/include/asm/tsc.h | 8 +++++++-
> arch/x86/kernel/cpu/acrn.c | 4 ++--
> arch/x86/kernel/cpu/mshyperv.c | 10 +++++++---
> arch/x86/kernel/cpu/vmware.c | 7 ++++---
> arch/x86/kernel/jailhouse.c | 4 ++--
> arch/x86/kernel/kvmclock.c | 4 ++--
> arch/x86/kernel/tsc.c | 8 +++++++-
> arch/x86/xen/time.c | 4 ++--
> 10 files changed, 37 insertions(+), 25 deletions(-)
>
> diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
> index dab386f782ce..29dd50552715 100644
> --- a/arch/x86/coco/sev/core.c
> +++ b/arch/x86/coco/sev/core.c
> @@ -3284,12 +3284,10 @@ void __init snp_secure_tsc_init(void)
> {
> unsigned long long tsc_freq_mhz;
>
> - setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
> - setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
> -
> rdmsrl(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz);
> snp_tsc_freq_khz = (unsigned long)(tsc_freq_mhz * 1000);
>
> tsc_register_calibration_routines(securetsc_get_tsc_khz,
> - securetsc_get_tsc_khz);
> + securetsc_get_tsc_khz,
> + TSC_FREQ_KNOWN_AND_RELIABLE);
> }
> diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> index 42cdaa98dc5e..ca31560d0dd3 100644
> --- a/arch/x86/coco/tdx/tdx.c
> +++ b/arch/x86/coco/tdx/tdx.c
> @@ -1135,14 +1135,11 @@ static unsigned long tdx_get_tsc_khz(void)
>
> void __init tdx_tsc_init(void)
> {
> - /* TSC is the only reliable clock in TDX guest */
> - setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
> - setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
> -
> /*
> * Override the PV calibration routines (if set) with more trustworthy
> * CPUID-based calibration. The TDX module emulates CPUID, whereas any
> * PV information is provided by the hypervisor.
> */
> - tsc_register_calibration_routines(tdx_get_tsc_khz, NULL);
> + tsc_register_calibration_routines(tdx_get_tsc_khz, NULL,
> + TSC_FREQ_KNOWN_AND_RELIABLE);
> }
> diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
> index 9318c74e8d13..360f47610258 100644
> --- a/arch/x86/include/asm/tsc.h
> +++ b/arch/x86/include/asm/tsc.h
> @@ -41,8 +41,14 @@ extern int cpuid_get_cpu_freq(unsigned int *cpu_khz);
> extern void tsc_early_init(void);
> extern void tsc_init(void);
> #if defined(CONFIG_HYPERVISOR_GUEST) || defined(CONFIG_AMD_MEM_ENCRYPT)
> +enum tsc_properties {
> + TSC_FREQUENCY_KNOWN = BIT(0),
> + TSC_RELIABLE = BIT(1),
> + TSC_FREQ_KNOWN_AND_RELIABLE = TSC_FREQUENCY_KNOWN | TSC_RELIABLE,
> +};
> extern void tsc_register_calibration_routines(unsigned long (*calibrate_tsc)(void),
> - unsigned long (*calibrate_cpu)(void));
> + unsigned long (*calibrate_cpu)(void),
> + enum tsc_properties properties);
> #endif
> extern void mark_tsc_unstable(char *reason);
> extern int unsynchronized_tsc(void);
> diff --git a/arch/x86/kernel/cpu/acrn.c b/arch/x86/kernel/cpu/acrn.c
> index 2da3de4d470e..4f2f4f7ec334 100644
> --- a/arch/x86/kernel/cpu/acrn.c
> +++ b/arch/x86/kernel/cpu/acrn.c
> @@ -29,9 +29,9 @@ static void __init acrn_init_platform(void)
> /* Install system interrupt handler for ACRN hypervisor callback */
> sysvec_install(HYPERVISOR_CALLBACK_VECTOR, sysvec_acrn_hv_callback);
>
> - setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
> tsc_register_calibration_routines(acrn_get_tsc_khz,
> - acrn_get_tsc_khz);
> + acrn_get_tsc_khz,
> + TSC_FREQUENCY_KNOWN);
> }
>
> static bool acrn_x2apic_available(void)
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index 174f6a71c899..445ac3adfebc 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -421,8 +421,13 @@ static void __init ms_hyperv_init_platform(void)
>
> if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS &&
> ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
> - tsc_register_calibration_routines(hv_get_tsc_khz, hv_get_tsc_khz);
> - setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
> + enum tsc_properties tsc_properties = TSC_FREQUENCY_KNOWN;
> +
> + if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT)
> + tsc_properties = TSC_FREQ_KNOWN_AND_RELIABLE;
> +
> + tsc_register_calibration_routines(hv_get_tsc_khz, hv_get_tsc_khz,
> + tsc_properties);
> }
For the Hyper-V guest code,
Reviewed-by: Michael Kelley <mhklinux@...look.com>
Tested-by: Michael Kelley <mhklinux@...look.com>
>
> if (ms_hyperv.priv_high & HV_ISOLATION) {
> @@ -525,7 +530,6 @@ static void __init ms_hyperv_init_platform(void)
> * is called.
> */
> wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL,
> HV_EXPOSE_INVARIANT_TSC);
> - setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
> }
>
> /*
> diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
> index 399cf3286a60..a3a71309214c 100644
> --- a/arch/x86/kernel/cpu/vmware.c
> +++ b/arch/x86/kernel/cpu/vmware.c
> @@ -385,10 +385,10 @@ static void __init vmware_paravirt_ops_setup(void)
> */
> static void __init vmware_set_capabilities(void)
> {
> + /* TSC is non-stop and reliable even if the frequency isn't known. */
> setup_force_cpu_cap(X86_FEATURE_CONSTANT_TSC);
> setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
> - if (vmware_tsc_khz)
> - setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
> +
> if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMCALL)
> setup_force_cpu_cap(X86_FEATURE_VMCALL);
> else if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMMCALL)
> @@ -417,7 +417,8 @@ static void __init vmware_platform_setup(void)
>
> vmware_tsc_khz = tsc_khz;
> tsc_register_calibration_routines(vmware_get_tsc_khz,
> - vmware_get_tsc_khz);
> + vmware_get_tsc_khz,
> + TSC_FREQ_KNOWN_AND_RELIABLE);
>
> #ifdef CONFIG_X86_LOCAL_APIC
> /* Skip lapic calibration since we know the bus frequency. */
> diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c
> index b0a053692161..d73a4d0fb118 100644
> --- a/arch/x86/kernel/jailhouse.c
> +++ b/arch/x86/kernel/jailhouse.c
> @@ -218,7 +218,8 @@ static void __init jailhouse_init_platform(void)
>
> machine_ops.emergency_restart = jailhouse_no_restart;
>
> - tsc_register_calibration_routines(jailhouse_get_tsc, jailhouse_get_tsc);
> + tsc_register_calibration_routines(jailhouse_get_tsc, jailhouse_get_tsc,
> + TSC_FREQUENCY_KNOWN);
>
> while (pa_data) {
> mapping = early_memremap(pa_data, sizeof(header));
> @@ -256,7 +257,6 @@ static void __init jailhouse_init_platform(void)
> pr_debug("Jailhouse: PM-Timer IO Port: %#x\n", pmtmr_ioport);
>
> precalibrated_tsc_khz = setup_data.v1.tsc_khz;
> - setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
>
> pci_probe = 0;
>
> diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
> index 1dbe12ecb26e..ce676e735ced 100644
> --- a/arch/x86/kernel/kvmclock.c
> +++ b/arch/x86/kernel/kvmclock.c
> @@ -199,7 +199,6 @@ void kvmclock_cpu_action(enum kvm_guest_cpu_action action)
> */
> static unsigned long kvm_get_tsc_khz(void)
> {
> - setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
> return pvclock_tsc_khz(this_cpu_pvti());
> }
>
> @@ -403,7 +402,8 @@ void __init kvmclock_init(void)
>
> kvm_sched_clock_init(stable);
>
> - tsc_register_calibration_routines(kvm_get_tsc_khz, kvm_get_tsc_khz);
> + tsc_register_calibration_routines(kvm_get_tsc_khz, kvm_get_tsc_khz,
> + TSC_FREQUENCY_KNOWN);
>
> x86_platform.get_wallclock = kvm_get_wallclock;
> x86_platform.set_wallclock = kvm_set_wallclock;
> diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
> index 5501d76243c8..be58df4fef66 100644
> --- a/arch/x86/kernel/tsc.c
> +++ b/arch/x86/kernel/tsc.c
> @@ -1301,11 +1301,17 @@ static void __init check_system_tsc_reliable(void)
> */
> #if defined(CONFIG_HYPERVISOR_GUEST) || defined(CONFIG_AMD_MEM_ENCRYPT)
> void tsc_register_calibration_routines(unsigned long (*calibrate_tsc)(void),
> - unsigned long (*calibrate_cpu)(void))
> + unsigned long (*calibrate_cpu)(void),
> + enum tsc_properties properties)
> {
> if (WARN_ON_ONCE(!calibrate_tsc))
> return;
>
> + if (properties & TSC_FREQUENCY_KNOWN)
> + setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
> + if (properties & TSC_RELIABLE)
> + setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
> +
> x86_platform.calibrate_tsc = calibrate_tsc;
> if (calibrate_cpu)
> x86_platform.calibrate_cpu = calibrate_cpu;
> diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
> index 13e5888c4501..4de06ea55397 100644
> --- a/arch/x86/xen/time.c
> +++ b/arch/x86/xen/time.c
> @@ -40,7 +40,6 @@ static unsigned long xen_tsc_khz(void)
> struct pvclock_vcpu_time_info *info =
> &HYPERVISOR_shared_info->vcpu_info[0].time;
>
> - setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
> return pvclock_tsc_khz(info);
> }
>
> @@ -571,7 +570,8 @@ static void __init xen_init_time_common(void)
> */
> paravirt_set_sched_clock(xen_sched_clock, NULL, NULL);
>
> - tsc_register_calibration_routines(xen_tsc_khz, NULL);
> + tsc_register_calibration_routines(xen_tsc_khz, NULL,
> + TSC_FREQUENCY_KNOWN);
> x86_platform.get_wallclock = xen_get_wallclock;
> }
>
> --
> 2.48.1.711.g2feabab25a-goog
>
Powered by blists - more mailing lists