lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <336f5623-b380-49e4-8dbe-ffa98f5aee19@gmail.com>
Date: Fri, 24 Jan 2025 09:29:20 +0000
From: "Durrant, Paul" <xadimgnik@...il.com>
To: Sean Christopherson <seanjc@...gle.com>,
 David Woodhouse <dwmw2@...radead.org>
Cc: Fred Griffoul <fgriffo@...zon.co.uk>, kvm@...r.kernel.org,
 griffoul@...il.com, vkuznets@...hat.com, Paolo Bonzini
 <pbonzini@...hat.com>, Thomas Gleixner <tglx@...utronix.de>,
 Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
 Dave Hansen <dave.hansen@...ux.intel.com>, x86@...nel.org,
 "H. Peter Anvin" <hpa@...or.com>, Paul Durrant <paul@....org>,
 linux-kernel@...r.kernel.org
Subject: Re: [PATCH] KVM: x86: Update Xen TSC leaves during CPUID emulation

On 24/01/2025 01:18, Sean Christopherson wrote:
> On Thu, Jan 23, 2025, David Woodhouse wrote:
>> On Thu, 2025-01-23 at 19:02 +0000, Fred Griffoul wrote:
>>
>>> +static inline void kvm_xen_may_update_tsc_info(struct kvm_vcpu *vcpu,
>>> +					       u32 function, u32 index,
>>> +					       u32 *eax, u32 *ecx, u32 *edx)
>>
>> Should this be called kvm_xen_maybe_update_tsc_info() ?
>>
>> Is it worth adding if (static_branch_unlikely(&kvm_xen_enabled.key))?
> 
> Or add a helper?  Especially if we end up processing KVM_REQ_CLOCK_UPDATE.
> 
> static inline bool kvm_xen_is_tsc_leaf(struct kvm_vcpu *vcpu, u32 function)
> {
> 	return static_branch_unlikely(&kvm_xen_enabled.key) &&
> 	       vcpu->arch.xen.cpuid.base &&
> 	       function < vcpu->arch.xen.cpuid.limit;
> 	       function == (vcpu->arch.xen.cpuid.base | XEN_CPUID_LEAF(3));
> }
> 
>>
>>> +{
>>> +	u32 base = vcpu->arch.xen.cpuid.base;
>>> +
>>> +	if (base && (function == (base | XEN_CPUID_LEAF(3)))) {
> 
> Pretty sure cpuid.limit needs to be checked, e.g. to avoid a false positive in
> the unlikely scenario that userspace advertised a lower limit but still filled
> the CPUID entry.
> 
>>> +		if (index == 1) {
>>> +			*ecx = vcpu->arch.hv_clock.tsc_to_system_mul;
>>> +			*edx = vcpu->arch.hv_clock.tsc_shift;
>>
>> Are these fields in vcpu->arch.hv_clock definitely going to be set?
> 
> Set, yes.  Up-to-date, no.  If there is a pending KVM_REQ_CLOCK_UPDATE, e.g. due
> to frequency change, KVM could emulate CPUID before processing the request if
> the CPUID VM-Exit occurred before the request was made.
> 
>> If so, can we have a comment to that effect? And perhaps a warning to
>> assert the truth of that claim?
>>
>> Before this patch, if the hv_clock isn't yet set then the guest would
>> see the original content of the leaves as set by userspace?
> 
> In theory, yes, but in practice that can't happen because KVM always pends a
> KVM_REQ_CLOCK_UPDATE before entering the guest (it's stupidly hard to see).
> 
> On the first kvm_arch_vcpu_load(), vcpu->cpu will be -1, which results in
> KVM_REQ_GLOBAL_CLOCK_UPDATE being pending.
> 
>    	if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
> 		...
> 
> 		if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
> 			kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
> 
> 	}
> 
> That in turn triggers a KVM_REQ_CLOCK_UPDATE.
> 
> 		if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
> 			kvm_gen_kvmclock_update(vcpu);
> 
>    static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
>    {
> 	struct kvm *kvm = v->kvm;
> 
> 	kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
> 	schedule_delayed_work(&kvm->arch.kvmclock_update_work,
> 					KVMCLOCK_UPDATE_DELAY);
>    }
> 
> And in the extremely unlikely failure path, which I assume handles the case where
> TSC calibration hasn't completed, KVM requests another KVM_REQ_CLOCK_UPDATE and
> aborts VM-Enter.  So AFAICT, it's impossible to trigger CPUID emulation without
> first stuffing Xen CPUID.
> 
> 	/* Keep irq disabled to prevent changes to the clock */
> 	local_irq_save(flags);
> 	tgt_tsc_khz = get_cpu_tsc_khz();
> 	if (unlikely(tgt_tsc_khz == 0)) {
> 		local_irq_restore(flags);
> 		kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
> 		return 1;
> 	}
> 
>> Now it gets zeroes if that happens?
> 
> Somewhat of a tangent, if userspace is providing non-zero values, commit f422f853af03
> ("KVM: x86/xen: update Xen CPUID Leaf 4 (tsc info) sub-leaves, if present") would
> have broken userspace.  QEMU doesn't appear to stuff non-zero values and no one
> has complained, so I think we escaped this time.
> 
> Jumping back to the code, if we add kvm_xen_is_tsc_leaf(), I would be a-ok with
> handling the CPUID manipulations in kvm_cpuid().  I'd probably even prefer it,
> because overall I think bleeding a few Xen details into common code is worth
> making the flow easier to follow.
> 
> Putting it all together, something like this?  Compile tested only.
> 
> ---
>   arch/x86/kvm/cpuid.c | 16 ++++++++++++++++
>   arch/x86/kvm/x86.c   |  3 +--
>   arch/x86/kvm/x86.h   |  1 +
>   arch/x86/kvm/xen.c   | 23 -----------------------
>   arch/x86/kvm/xen.h   | 13 +++++++++++--
>   5 files changed, 29 insertions(+), 27 deletions(-)
> 
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index edef30359c19..689882326618 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -2005,6 +2005,22 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
>   		} else if (function == 0x80000007) {
>   			if (kvm_hv_invtsc_suppressed(vcpu))
>   				*edx &= ~feature_bit(CONSTANT_TSC);
> +		} else if (IS_ENABLED(CONFIG_KVM_XEN) &&
> +			   kvm_xen_is_tsc_leaf(vcpu, function)) {
> +			/*
> +			 * Update guest TSC frequency information is necessary.
> +			 * Ignore failures, there is no sane value that can be
> +			 * provided if KVM can't get the TSC frequency.
> +			 */
> +			if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu))
> +				kvm_guest_time_update(vcpu);
> +
> +			if (index == 1) {
> +				*ecx = vcpu->arch.hv_clock.tsc_to_system_mul;
> +				*edx = vcpu->arch.hv_clock.tsc_shift;
> +			} else if (index == 2) {
> +				*eax = vcpu->arch.hw_tsc_khz;
> +			}
>   		}
>   	} else {
>   		*eax = *ebx = *ecx = *edx = 0;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index f61d71783d07..817a7e522935 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3173,7 +3173,7 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
>   	trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
>   }
>   
> -static int kvm_guest_time_update(struct kvm_vcpu *v)
> +int kvm_guest_time_update(struct kvm_vcpu *v)
>   {
>   	unsigned long flags, tgt_tsc_khz;
>   	unsigned seq;
> @@ -3256,7 +3256,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
>   				   &vcpu->hv_clock.tsc_shift,
>   				   &vcpu->hv_clock.tsc_to_system_mul);
>   		vcpu->hw_tsc_khz = tgt_tsc_khz;
> -		kvm_xen_update_tsc_info(v);
>   	}
>   
>   	vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 7a87c5fc57f1..5fdf32ba9406 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -362,6 +362,7 @@ void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
>   u64 get_kvmclock_ns(struct kvm *kvm);
>   uint64_t kvm_get_wall_clock_epoch(struct kvm *kvm);
>   bool kvm_get_monotonic_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp);
> +int kvm_guest_time_update(struct kvm_vcpu *v);
>   
>   int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
>   	gva_t addr, void *val, unsigned int bytes,
> diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
> index a909b817b9c0..ed5c2f088361 100644
> --- a/arch/x86/kvm/xen.c
> +++ b/arch/x86/kvm/xen.c
> @@ -2247,29 +2247,6 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
>   	del_timer_sync(&vcpu->arch.xen.poll_timer);
>   }
>   
> -void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu)
> -{
> -	struct kvm_cpuid_entry2 *entry;
> -	u32 function;
> -
> -	if (!vcpu->arch.xen.cpuid.base)
> -		return;
> -
> -	function = vcpu->arch.xen.cpuid.base | XEN_CPUID_LEAF(3);
> -	if (function > vcpu->arch.xen.cpuid.limit)
> -		return;
> -
> -	entry = kvm_find_cpuid_entry_index(vcpu, function, 1);
> -	if (entry) {
> -		entry->ecx = vcpu->arch.hv_clock.tsc_to_system_mul;
> -		entry->edx = vcpu->arch.hv_clock.tsc_shift;
> -	}
> -
> -	entry = kvm_find_cpuid_entry_index(vcpu, function, 2);
> -	if (entry)
> -		entry->eax = vcpu->arch.hw_tsc_khz;
> -}

This LGTM. My only concern is whether vcpu->arch.hv_clock will be 
updated by anything other than a KVM_REQ_CLOCK_UPDATE? I don't think so 
but the crucial thing is that the values match what is in the vcpu_info 
struct... so maybe a safer option is to pull the values directly from that.

> -
>   void kvm_xen_init_vm(struct kvm *kvm)
>   {
>   	mutex_init(&kvm->arch.xen.xen_lock);
> diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
> index f5841d9000ae..5ee7f3f1b45f 100644
> --- a/arch/x86/kvm/xen.h
> +++ b/arch/x86/kvm/xen.h
> @@ -9,6 +9,7 @@
>   #ifndef __ARCH_X86_KVM_XEN_H__
>   #define __ARCH_X86_KVM_XEN_H__
>   
> +#include <asm/xen/cpuid.h>
>   #include <asm/xen/hypervisor.h>
>   
>   #ifdef CONFIG_KVM_XEN
> @@ -35,7 +36,6 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe,
>   int kvm_xen_setup_evtchn(struct kvm *kvm,
>   			 struct kvm_kernel_irq_routing_entry *e,
>   			 const struct kvm_irq_routing_entry *ue);
> -void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu);
>   
>   static inline void kvm_xen_sw_enable_lapic(struct kvm_vcpu *vcpu)
>   {
> @@ -50,6 +50,14 @@ static inline void kvm_xen_sw_enable_lapic(struct kvm_vcpu *vcpu)
>   		kvm_xen_inject_vcpu_vector(vcpu);
>   }
>   
> +static inline bool kvm_xen_is_tsc_leaf(struct kvm_vcpu *vcpu, u32 function)
> +{
> +	return static_branch_unlikely(&kvm_xen_enabled.key) &&
> +	       vcpu->arch.xen.cpuid.base &&
> +	       function < vcpu->arch.xen.cpuid.limit &&
> +	       function == (vcpu->arch.xen.cpuid.base | XEN_CPUID_LEAF(3));
> +}
> +
>   static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
>   {
>   	return static_branch_unlikely(&kvm_xen_enabled.key) &&
> @@ -157,8 +165,9 @@ static inline bool kvm_xen_timer_enabled(struct kvm_vcpu *vcpu)
>   	return false;
>   }
>   
> -static inline void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu)
> +static inline bool kvm_xen_is_tsc_leaf(struct kvm_vcpu *vcpu, u32 function)
>   {
> +	return false;
>   }
>   #endif
>   
> 
> base-commit: 84be94b5b6490e29a6f386cec90f8d5c6d14f0df


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ