[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250227021855.3257188-28-seanjc@google.com>
Date: Wed, 26 Feb 2025 18:18:43 -0800
From: Sean Christopherson <seanjc@...gle.com>
To: Thomas Gleixner <tglx@...utronix.de>, Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
Dave Hansen <dave.hansen@...ux.intel.com>, x86@...nel.org,
"Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>, Paolo Bonzini <pbonzini@...hat.com>,
Sean Christopherson <seanjc@...gle.com>, Juergen Gross <jgross@...e.com>,
"K. Y. Srinivasan" <kys@...rosoft.com>, Haiyang Zhang <haiyangz@...rosoft.com>, Wei Liu <wei.liu@...nel.org>,
Dexuan Cui <decui@...rosoft.com>, Ajay Kaher <ajay.kaher@...adcom.com>,
Jan Kiszka <jan.kiszka@...mens.com>, Andy Lutomirski <luto@...nel.org>,
Peter Zijlstra <peterz@...radead.org>, Daniel Lezcano <daniel.lezcano@...aro.org>,
John Stultz <jstultz@...gle.com>
Cc: linux-kernel@...r.kernel.org, linux-coco@...ts.linux.dev,
kvm@...r.kernel.org, virtualization@...ts.linux.dev,
linux-hyperv@...r.kernel.org, xen-devel@...ts.xenproject.org,
Tom Lendacky <thomas.lendacky@....com>, Nikunj A Dadhania <nikunj@....com>
Subject: [PATCH v2 27/38] x86/kvmclock: Enable kvmclock on APs during onlining
if kvmclock isn't sched_clock
In anticipation of making x86_cpuinit.early_percpu_clock_init(), i.e.
kvm_setup_secondary_clock(), a dedicated sched_clock hook that will be
invoked if and only if kvmclock is set as sched_clock, ensure APs enable
their kvmclock during CPU online. While a redundant write to the MSR is
technically ok, skip the registration when kvmclock is sched_clock so that
it's somewhat obvious that kvmclock *needs* to be enabled during early
bringup when it's being used as sched_clock.
Plumb in the BSP's resume path purely for documentation purposes. Both
KVM (as-a-guest) and timekeeping/clocksource hook syscore_ops, and it's
not super obvious that using KVM's hooks would be flawed. E.g. it would
work today, because KVM's hooks happen to run after/before timekeeping's
hooks during suspend/resume, but that's sheer dumb luck as the order in
which syscore_ops are invoked depends entirely on when a subsystem is
initialized and thus registers its hooks.
Opportunsitically make the registration messages more precise to help
debug issues where kvmclock is enabled too late.
Opportunstically WARN in kvmclock_{suspend,resume}() to harden against
future bugs.
Signed-off-by: Sean Christopherson <seanjc@...gle.com>
---
arch/x86/include/asm/kvm_para.h | 2 ++
arch/x86/kernel/kvm.c | 24 +++++++++++-------
arch/x86/kernel/kvmclock.c | 44 +++++++++++++++++++++++++++------
3 files changed, 53 insertions(+), 17 deletions(-)
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 8708598f5b8e..42d90bf8fde9 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -120,6 +120,8 @@ static inline long kvm_sev_hypercall3(unsigned int nr, unsigned long p1,
#ifdef CONFIG_KVM_GUEST
enum kvm_guest_cpu_action {
KVM_GUEST_BSP_SUSPEND,
+ KVM_GUEST_BSP_RESUME,
+ KVM_GUEST_AP_ONLINE,
KVM_GUEST_AP_OFFLINE,
KVM_GUEST_SHUTDOWN,
};
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 866b061ee0d9..5f093190df17 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -461,18 +461,24 @@ static void kvm_guest_cpu_offline(enum kvm_guest_cpu_action action)
kvmclock_cpu_action(action);
}
+static int __kvm_cpu_online(unsigned int cpu, enum kvm_guest_cpu_action action)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ kvmclock_cpu_action(action);
+ kvm_guest_cpu_init();
+ local_irq_restore(flags);
+ return 0;
+}
+
+#ifdef CONFIG_SMP
+
static int kvm_cpu_online(unsigned int cpu)
{
- unsigned long flags;
-
- local_irq_save(flags);
- kvm_guest_cpu_init();
- local_irq_restore(flags);
- return 0;
+ return __kvm_cpu_online(cpu, KVM_GUEST_AP_ONLINE);
}
-#ifdef CONFIG_SMP
-
static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
static bool pv_tlb_flush_supported(void)
@@ -737,7 +743,7 @@ static int kvm_suspend(void)
static void kvm_resume(void)
{
- kvm_cpu_online(raw_smp_processor_id());
+ __kvm_cpu_online(raw_smp_processor_id(), KVM_GUEST_BSP_RESUME);
#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL) && has_guest_poll)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 0ce23f862cbd..76884dfc77f4 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -52,6 +52,7 @@ static struct pvclock_vsyscall_time_info *hvclock_mem;
DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
EXPORT_PER_CPU_SYMBOL_GPL(hv_clock_per_cpu);
+static bool kvmclock_is_sched_clock;
static bool kvmclock_suspended;
/*
@@ -128,7 +129,7 @@ static void kvm_save_sched_clock_state(void)
#ifdef CONFIG_SMP
static void kvm_setup_secondary_clock(void)
{
- kvm_register_clock("secondary cpu clock");
+ kvm_register_clock("secondary cpu, sched_clock setup");
}
#endif
@@ -140,25 +141,51 @@ static void kvm_restore_sched_clock_state(void)
static void kvmclock_suspend(struct clocksource *cs)
{
+ if (WARN_ON_ONCE(kvmclock_is_sched_clock))
+ return;
+
kvmclock_suspended = true;
kvmclock_disable();
}
static void kvmclock_resume(struct clocksource *cs)
{
+ if (WARN_ON_ONCE(kvmclock_is_sched_clock))
+ return;
+
kvmclock_suspended = false;
kvm_register_clock("primary cpu, clocksource resume");
}
void kvmclock_cpu_action(enum kvm_guest_cpu_action action)
{
- /*
- * Don't disable kvmclock on the BSP during suspend. If kvmclock is
- * being used for sched_clock, then it needs to be kept alive until the
- * last minute, and restored as quickly as possible after resume.
- */
- if (action != KVM_GUEST_BSP_SUSPEND)
+ switch (action) {
+ /*
+ * The BSP's clock is managed via clocksource suspend/resume,
+ * to ensure it's enabled/disabled when timekeeping needs it
+ * to be, e.g. before reading wallclock (which uses kvmclock).
+ */
+ case KVM_GUEST_BSP_SUSPEND:
+ case KVM_GUEST_BSP_RESUME:
+ break;
+ case KVM_GUEST_AP_ONLINE:
+ /*
+ * Secondary CPUs use dedicated sched_clock hooks to enable
+ * kvmclock early during bringup, there's nothing to be done
+ * when during CPU online.
+ */
+ if (kvmclock_is_sched_clock)
+ break;
+ kvm_register_clock("secondary cpu, online");
+ break;
+ case KVM_GUEST_AP_OFFLINE:
+ case KVM_GUEST_SHUTDOWN:
kvmclock_disable();
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
}
/*
@@ -313,6 +340,7 @@ static void __init kvm_sched_clock_init(bool stable)
kvm_sched_clock_offset = kvm_clock_read();
__paravirt_set_sched_clock(kvm_sched_clock_read, stable,
kvm_save_sched_clock_state, kvm_restore_sched_clock_state);
+ kvmclock_is_sched_clock = true;
/*
* The BSP's clock is managed via dedicated sched_clock save/restore
@@ -356,7 +384,7 @@ void __init kvmclock_init(void)
msr_kvm_system_time, msr_kvm_wall_clock);
this_cpu_write(hv_clock_per_cpu, &hv_clock_boot[0]);
- kvm_register_clock("primary cpu clock");
+ kvm_register_clock("primary cpu, online");
pvclock_set_pvti_cpu0_va(hv_clock_boot);
if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) {
--
2.48.1.711.g2feabab25a-goog
Powered by blists - more mailing lists