lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250218202618.567363-3-sieberf@amazon.com>
Date: Tue, 18 Feb 2025 22:26:02 +0200
From: Fernand Sieber <sieberf@...zon.com>
To: <sieberf@...zon.com>, Ingo Molnar <mingo@...hat.com>, Peter Zijlstra
	<peterz@...radead.org>, Vincent Guittot <vincent.guittot@...aro.org>, "Paolo
 Bonzini" <pbonzini@...hat.com>, <x86@...nel.org>, <kvm@...r.kernel.org>,
	<linux-kernel@...r.kernel.org>, <nh-open-source@...zon.com>
Subject: [RFC PATCH 2/3] kvm/x86: Add support for gtime halted

The previous commit introduced the concept of guest time halted to allow
the hypervisor to track real guest CPU activity (halted cyles) with
mwait/hlt/pause pass through enabled.

This commits implements it for the x86 architecture. We track the number of
actual cycles executed by the guest by taking two reads on MSR_IA32_MPERF,
one before vcpu enter and the other after vcpu exit. These two reads happen
immediately before and after guest_timing_enter/exit_irqoff which are the
architecture independent points for gtime accounting. The difference between
the reads corresponds to the number of unhalted cycles. We get the number
of halted cycles by using the tsc difference with the unhalted cycles and
tolerate slight approximations.
---
 arch/x86/include/asm/tsc.h |  1 +
 arch/x86/kernel/tsc.c      | 13 +++++++++++++
 arch/x86/kvm/x86.c         | 26 ++++++++++++++++++++++++++
 3 files changed, 40 insertions(+)

diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 94408a784c8e..00ad09e7268e 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -37,6 +37,7 @@ extern void mark_tsc_async_resets(char *reason);
 extern unsigned long native_calibrate_cpu_early(void);
 extern unsigned long native_calibrate_tsc(void);
 extern unsigned long long native_sched_clock_from_tsc(u64 tsc);
+extern unsigned long long cycles2ns(unsigned long long cycles);

 extern int tsc_clocksource_reliable;
 #ifdef CONFIG_X86_TSC
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 34dec0b72ea8..80bb12357148 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -144,6 +144,19 @@ static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc)
 	return ns;
 }

+unsigned long long cycles2ns(unsigned long long cyc)
+{
+       struct cyc2ns_data data;
+       unsigned long long ns;
+
+       cyc2ns_read_begin(&data);
+       ns = mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift);
+       cyc2ns_read_end();
+
+       return ns;
+}
+EXPORT_SYMBOL(cycles2ns);
+
 static void __set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now)
 {
 	unsigned long long ns_now;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 02159c967d29..46975b0a63a5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10688,6 +10688,19 @@ static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
 	kvm_x86_call(set_apic_access_page_addr)(vcpu);
 }

+static bool needs_halted_accounting(struct kvm_vcpu *vcpu)
+{
+	return (vcpu->kvm->arch.mwait_in_guest ||
+			vcpu->kvm->arch.hlt_in_guest ||
+			vcpu->kvm->arch.pause_in_guest) &&
+		boot_cpu_has(X86_FEATURE_APERFMPERF);
+}
+
+static long long get_unhalted_cycles(void)
+{
+	return __rdmsr(MSR_IA32_MPERF);
+}
+
 /*
  * Called within kvm->srcu read side.
  * Returns 1 to let vcpu_run() continue the guest execution loop without
@@ -10697,6 +10710,8 @@ static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 {
 	int r;
+	unsigned long long cycles, cycles_start = 0;
+	unsigned long long unhalted_cycles, unhalted_cycles_start = 0;
 	bool req_int_win =
 		dm_request_for_irq_injection(vcpu) &&
 		kvm_cpu_accept_dm_intr(vcpu);
@@ -10968,6 +10983,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		set_debugreg(0, 7);
 	}

+	if (needs_halted_accounting(vcpu)) {
+		cycles_start = get_cycles();
+		unhalted_cycles_start = get_unhalted_cycles();
+	}
 	guest_timing_enter_irqoff();

 	for (;;) {
@@ -11060,6 +11079,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	 * acceptable for all known use cases.
 	 */
 	guest_timing_exit_irqoff();
+	if (needs_halted_accounting(vcpu)) {
+		cycles = get_cycles() - cycles_start;
+		unhalted_cycles = get_unhalted_cycles() -
+			unhalted_cycles_start;
+		if (likely(cycles > unhalted_cycles))
+			current->gtime_halted += cycles2ns(cycles - unhalted_cycles);
+	}

 	local_irq_enable();
 	preempt_enable();
--
2.43.0




Amazon Development Centre (South Africa) (Proprietary) Limited
29 Gogosoa Street, Observatory, Cape Town, Western Cape, 7925, South Africa
Registration Number: 2004 / 034463 / 07


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ