[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Z5LqeGa_G_NZ_boC@google.com>
Date: Thu, 23 Jan 2025 17:18:48 -0800
From: Sean Christopherson <seanjc@...gle.com>
To: David Woodhouse <dwmw2@...radead.org>
Cc: Fred Griffoul <fgriffo@...zon.co.uk>, kvm@...r.kernel.org, griffoul@...il.com,
vkuznets@...hat.com, Paolo Bonzini <pbonzini@...hat.com>,
Thomas Gleixner <tglx@...utronix.de>, Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
Dave Hansen <dave.hansen@...ux.intel.com>, x86@...nel.org,
"H. Peter Anvin" <hpa@...or.com>, Paul Durrant <paul@....org>, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] KVM: x86: Update Xen TSC leaves during CPUID emulation
On Thu, Jan 23, 2025, David Woodhouse wrote:
> On Thu, 2025-01-23 at 19:02 +0000, Fred Griffoul wrote:
>
> > +static inline void kvm_xen_may_update_tsc_info(struct kvm_vcpu *vcpu,
> > + u32 function, u32 index,
> > + u32 *eax, u32 *ecx, u32 *edx)
>
> Should this be called kvm_xen_maybe_update_tsc_info() ?
>
> Is it worth adding if (static_branch_unlikely(&kvm_xen_enabled.key))?
Or add a helper? Especially if we end up processing KVM_REQ_CLOCK_UPDATE.
static inline bool kvm_xen_is_tsc_leaf(struct kvm_vcpu *vcpu, u32 function)
{
return static_branch_unlikely(&kvm_xen_enabled.key) &&
vcpu->arch.xen.cpuid.base &&
function < vcpu->arch.xen.cpuid.limit;
function == (vcpu->arch.xen.cpuid.base | XEN_CPUID_LEAF(3));
}
>
> > +{
> > + u32 base = vcpu->arch.xen.cpuid.base;
> > +
> > + if (base && (function == (base | XEN_CPUID_LEAF(3)))) {
Pretty sure cpuid.limit needs to be checked, e.g. to avoid a false positive in
the unlikely scenario that userspace advertised a lower limit but still filled
the CPUID entry.
> > + if (index == 1) {
> > + *ecx = vcpu->arch.hv_clock.tsc_to_system_mul;
> > + *edx = vcpu->arch.hv_clock.tsc_shift;
>
> Are these fields in vcpu->arch.hv_clock definitely going to be set?
Set, yes. Up-to-date, no. If there is a pending KVM_REQ_CLOCK_UPDATE, e.g. due
to frequency change, KVM could emulate CPUID before processing the request if
the CPUID VM-Exit occurred before the request was made.
> If so, can we have a comment to that effect? And perhaps a warning to
> assert the truth of that claim?
>
> Before this patch, if the hv_clock isn't yet set then the guest would
> see the original content of the leaves as set by userspace?
In theory, yes, but in practice that can't happen because KVM always pends a
KVM_REQ_CLOCK_UPDATE before entering the guest (it's stupidly hard to see).
On the first kvm_arch_vcpu_load(), vcpu->cpu will be -1, which results in
KVM_REQ_GLOBAL_CLOCK_UPDATE being pending.
if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
...
if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
}
That in turn triggers a KVM_REQ_CLOCK_UPDATE.
if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
kvm_gen_kvmclock_update(vcpu);
static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
{
struct kvm *kvm = v->kvm;
kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
schedule_delayed_work(&kvm->arch.kvmclock_update_work,
KVMCLOCK_UPDATE_DELAY);
}
And in the extremely unlikely failure path, which I assume handles the case where
TSC calibration hasn't completed, KVM requests another KVM_REQ_CLOCK_UPDATE and
aborts VM-Enter. So AFAICT, it's impossible to trigger CPUID emulation without
first stuffing Xen CPUID.
/* Keep irq disabled to prevent changes to the clock */
local_irq_save(flags);
tgt_tsc_khz = get_cpu_tsc_khz();
if (unlikely(tgt_tsc_khz == 0)) {
local_irq_restore(flags);
kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
return 1;
}
> Now it gets zeroes if that happens?
Somewhat of a tangent, if userspace is providing non-zero values, commit f422f853af03
("KVM: x86/xen: update Xen CPUID Leaf 4 (tsc info) sub-leaves, if present") would
have broken userspace. QEMU doesn't appear to stuff non-zero values and no one
has complained, so I think we escaped this time.
Jumping back to the code, if we add kvm_xen_is_tsc_leaf(), I would be a-ok with
handling the CPUID manipulations in kvm_cpuid(). I'd probably even prefer it,
because overall I think bleeding a few Xen details into common code is worth
making the flow easier to follow.
Putting it all together, something like this? Compile tested only.
---
arch/x86/kvm/cpuid.c | 16 ++++++++++++++++
arch/x86/kvm/x86.c | 3 +--
arch/x86/kvm/x86.h | 1 +
arch/x86/kvm/xen.c | 23 -----------------------
arch/x86/kvm/xen.h | 13 +++++++++++--
5 files changed, 29 insertions(+), 27 deletions(-)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index edef30359c19..689882326618 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -2005,6 +2005,22 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
} else if (function == 0x80000007) {
if (kvm_hv_invtsc_suppressed(vcpu))
*edx &= ~feature_bit(CONSTANT_TSC);
+ } else if (IS_ENABLED(CONFIG_KVM_XEN) &&
+ kvm_xen_is_tsc_leaf(vcpu, function)) {
+ /*
+ * Update guest TSC frequency information is necessary.
+ * Ignore failures, there is no sane value that can be
+ * provided if KVM can't get the TSC frequency.
+ */
+ if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu))
+ kvm_guest_time_update(vcpu);
+
+ if (index == 1) {
+ *ecx = vcpu->arch.hv_clock.tsc_to_system_mul;
+ *edx = vcpu->arch.hv_clock.tsc_shift;
+ } else if (index == 2) {
+ *eax = vcpu->arch.hw_tsc_khz;
+ }
}
} else {
*eax = *ebx = *ecx = *edx = 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f61d71783d07..817a7e522935 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3173,7 +3173,7 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
}
-static int kvm_guest_time_update(struct kvm_vcpu *v)
+int kvm_guest_time_update(struct kvm_vcpu *v)
{
unsigned long flags, tgt_tsc_khz;
unsigned seq;
@@ -3256,7 +3256,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
&vcpu->hv_clock.tsc_shift,
&vcpu->hv_clock.tsc_to_system_mul);
vcpu->hw_tsc_khz = tgt_tsc_khz;
- kvm_xen_update_tsc_info(v);
}
vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 7a87c5fc57f1..5fdf32ba9406 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -362,6 +362,7 @@ void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
u64 get_kvmclock_ns(struct kvm *kvm);
uint64_t kvm_get_wall_clock_epoch(struct kvm *kvm);
bool kvm_get_monotonic_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp);
+int kvm_guest_time_update(struct kvm_vcpu *v);
int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
gva_t addr, void *val, unsigned int bytes,
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index a909b817b9c0..ed5c2f088361 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -2247,29 +2247,6 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
del_timer_sync(&vcpu->arch.xen.poll_timer);
}
-void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *entry;
- u32 function;
-
- if (!vcpu->arch.xen.cpuid.base)
- return;
-
- function = vcpu->arch.xen.cpuid.base | XEN_CPUID_LEAF(3);
- if (function > vcpu->arch.xen.cpuid.limit)
- return;
-
- entry = kvm_find_cpuid_entry_index(vcpu, function, 1);
- if (entry) {
- entry->ecx = vcpu->arch.hv_clock.tsc_to_system_mul;
- entry->edx = vcpu->arch.hv_clock.tsc_shift;
- }
-
- entry = kvm_find_cpuid_entry_index(vcpu, function, 2);
- if (entry)
- entry->eax = vcpu->arch.hw_tsc_khz;
-}
-
void kvm_xen_init_vm(struct kvm *kvm)
{
mutex_init(&kvm->arch.xen.xen_lock);
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
index f5841d9000ae..5ee7f3f1b45f 100644
--- a/arch/x86/kvm/xen.h
+++ b/arch/x86/kvm/xen.h
@@ -9,6 +9,7 @@
#ifndef __ARCH_X86_KVM_XEN_H__
#define __ARCH_X86_KVM_XEN_H__
+#include <asm/xen/cpuid.h>
#include <asm/xen/hypervisor.h>
#ifdef CONFIG_KVM_XEN
@@ -35,7 +36,6 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe,
int kvm_xen_setup_evtchn(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue);
-void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu);
static inline void kvm_xen_sw_enable_lapic(struct kvm_vcpu *vcpu)
{
@@ -50,6 +50,14 @@ static inline void kvm_xen_sw_enable_lapic(struct kvm_vcpu *vcpu)
kvm_xen_inject_vcpu_vector(vcpu);
}
+static inline bool kvm_xen_is_tsc_leaf(struct kvm_vcpu *vcpu, u32 function)
+{
+ return static_branch_unlikely(&kvm_xen_enabled.key) &&
+ vcpu->arch.xen.cpuid.base &&
+ function < vcpu->arch.xen.cpuid.limit &&
+ function == (vcpu->arch.xen.cpuid.base | XEN_CPUID_LEAF(3));
+}
+
static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
{
return static_branch_unlikely(&kvm_xen_enabled.key) &&
@@ -157,8 +165,9 @@ static inline bool kvm_xen_timer_enabled(struct kvm_vcpu *vcpu)
return false;
}
-static inline void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu)
+static inline bool kvm_xen_is_tsc_leaf(struct kvm_vcpu *vcpu, u32 function)
{
+ return false;
}
#endif
base-commit: 84be94b5b6490e29a6f386cec90f8d5c6d14f0df
--
Powered by blists - more mailing lists