lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <BYAPR03MB41331F04C79E84B97ECEF455CD3BA@BYAPR03MB4133.namprd03.prod.outlook.com>
Date:   Mon, 17 Jul 2023 10:35:22 +0800
From:   Wang Jianchao <jianchwa@...look.com>
To:     seanjc@...gle.com, tglx@...utronix.de, mingo@...hat.com,
        bp@...en8.de, dave.hansen@...ux.intel.com, x86@...nel.org,
        hpa@...or.com, kvm@...r.kernel.org
Cc:     arkinjob@...look.com, zhi.wang.linux@...il.com,
        xiaoyao.li@...el.com, linux-kernel@...r.kernel.org
Subject: [RFC V3 5/6] KVM: X86: add lazy tscdeadline support to reduce vm-exit of msr-write

This patch adds the main logic of lazy_tscdeadline of host side.
There are 3 operations:
 - UPDATE, when the guest update msr of tsc deadline, we need to
   update the value of 'armed' field of kvm_lazy_tscdeadline
 - KICK, when the hv or sw timer is fired, we need to check the
   'pending' field to decide whether to re-arm timer or inject
   local timer vector. The sw timer is not in vcpu context, so a
   new kvm req is added to handle the kick in vcpu context.
 - CLEAR, this is a bit tricky. We need to clear the 'armed' field
   properly otherwise the guestOS can be hung.

The scenerios need to do CLEAR:
 - convert between period & onshot and tscdeadline
 - mask the lapic timer
 - tscdeadline value has expired before we arm the timer

Here is the test result of netperf TCP_RR on loopback,
                        Close               Open
--------------------------------------------------------
VM-Exit
             sum         12617503            5815737
            intr      0% 37023            0% 33002
           cpuid      0% 1                0% 0
            halt     19% 2503932         47% 2780683
       msr-write     79% 10046340        51% 2966824
           pause      0% 90               0% 84
   ept-violation      0% 584              0% 336
   ept-misconfig      0% 0                0% 2
preemption-timer      0% 29518            0% 34800
-------------------------------------------------------
MSR-Write
            sum          10046455            2966864
        apic-icr     25% 2533498         93% 2781235
    tsc-deadline     74% 7512945          6% 185629

The vm-exit caused by writing msr of tsc-deadline is reduced by 70%

Signed-off-by: Li Shujin <arkinjob@...look.com>
Signed-off-by: Wang Jianchao <jianchwa@...look.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/lapic.c            | 93 +++++++++++++++++++++++++++++++++++++----
 arch/x86/kvm/lapic.h            |  3 +-
 arch/x86/kvm/x86.c              |  3 ++
 4 files changed, 90 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b036874..b217ae7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -113,6 +113,7 @@
 	KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_HV_TLB_FLUSH \
 	KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_LAZY_TSCDEADLINE			KVM_ARCH_REQ(33)
 
 #define CR0_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 71da41e..781516f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1720,6 +1720,54 @@ void kvm_lazy_tscdeadline_exit(struct kvm_vcpu *vcpu)
 	vcpu->arch.lazy_tscdeadline.guest = NULL;
 }
 
+static void kvm_lazy_tscdeadline_update(struct kvm_vcpu *vcpu, u64 tsc)
+{
+	struct kvm_host_lazy_tscdeadline *hlt = &vcpu->arch.lazy_tscdeadline;
+
+	if (!(hlt->msr_val & KVM_MSR_ENABLED) ||
+	    !hlt->guest)
+	    return;
+
+	hlt->guest->armed = tsc;
+	hlt->cached_armed = tsc;
+}
+
+bool kvm_lazy_tscdeadline_kick(struct kvm_vcpu *vcpu)
+{
+	struct kvm_host_lazy_tscdeadline *hlt = &vcpu->arch.lazy_tscdeadline;
+	u64 next;
+	bool ret = false;
+
+	if (!hlt->cached_armed ||
+	    !(hlt->msr_val & KVM_MSR_ENABLED) ||
+	    !hlt->guest)
+	    return ret;
+
+	next = hlt->guest->pending;
+	if (next && next > hlt->guest->armed) {
+		kvm_set_lapic_tscdeadline_msr(vcpu, next);
+		ret = true;
+	} else {
+		hlt->guest->armed = 0;
+		hlt->cached_armed = 0;
+	}
+
+	return ret;
+}
+
+void kvm_lazy_tscdeadline_clear(struct kvm_vcpu *vcpu)
+{
+	struct kvm_host_lazy_tscdeadline *hlt = &vcpu->arch.lazy_tscdeadline;
+
+	if (!hlt->cached_armed ||
+	    !(hlt->msr_val & KVM_MSR_ENABLED) ||
+	    !hlt->guest)
+	    return;
+
+	hlt->guest->armed = 0;
+	hlt->cached_armed = 0;
+}
+
 static void update_divide_count(struct kvm_lapic *apic)
 {
 	u32 tmp1, tmp2, tdcr;
@@ -1765,8 +1813,12 @@ static void cancel_apic_timer(struct kvm_lapic *apic)
 
 static void apic_update_lvtt(struct kvm_lapic *apic)
 {
-	u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
-			apic->lapic_timer.timer_mode_mask;
+	u32 reg, timer_mode;
+	bool clear;
+
+	reg = kvm_lapic_get_reg(apic, APIC_LVTT);
+	clear = !!(reg & APIC_LVT_MASKED);
+	timer_mode = reg & apic->lapic_timer.timer_mode_mask;
 
 	if (apic->lapic_timer.timer_mode != timer_mode) {
 		if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
@@ -1775,10 +1827,14 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
 			kvm_lapic_set_reg(apic, APIC_TMICT, 0);
 			apic->lapic_timer.period = 0;
 			apic->lapic_timer.tscdeadline = 0;
+			clear = true;
 		}
 		apic->lapic_timer.timer_mode = timer_mode;
 		limit_periodic_timer_frequency(apic);
 	}
+
+	if (clear)
+		kvm_lazy_tscdeadline_clear(apic->vcpu);
 }
 
 /*
@@ -1966,8 +2022,15 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
 		expire = ktime_add_ns(now, ns);
 		expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
 		hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
-	} else
+	} else {
 		apic_timer_expired(apic, false);
+		/*
+		 * If the current pending tscdeadline has been expired, we need
+		 * to clear the armed_tscddl otherwise guest will skip following
+		 * msr wtite and clock event hangs
+		 */
+		kvm_lazy_tscdeadline_clear(vcpu);
+	}
 
 	local_irq_restore(flags);
 }
@@ -2145,6 +2208,9 @@ static bool start_hv_timer(struct kvm_lapic *apic)
 		}
 	}
 
+	if (apic_lvtt_tscdeadline(apic) && expired)
+		kvm_lazy_tscdeadline_clear(vcpu);
+
 	trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use);
 
 	return true;
@@ -2189,8 +2255,12 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
 	if (!apic->lapic_timer.hv_timer_in_use)
 		goto out;
 	WARN_ON(kvm_vcpu_is_blocking(vcpu));
-	apic_timer_expired(apic, false);
-	cancel_hv_timer(apic);
+
+	if (!apic_lvtt_tscdeadline(apic) ||
+	    !kvm_lazy_tscdeadline_kick(vcpu)) {
+		apic_timer_expired(apic, false);
+		cancel_hv_timer(apic);
+	}
 
 	if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
 		advance_periodic_target_expiration(apic);
@@ -2522,6 +2592,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
 
 	hrtimer_cancel(&apic->lapic_timer.timer);
 	apic->lapic_timer.tscdeadline = data;
+	kvm_lazy_tscdeadline_update(vcpu, data);
 	start_apic_timer(apic);
 }
 
@@ -2802,15 +2873,19 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
 {
 	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
 	struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
+	enum hrtimer_restart ret = HRTIMER_NORESTART;
 
 	apic_timer_expired(apic, true);
 
-	if (lapic_is_periodic(apic)) {
+	if (apic_lvtt_tscdeadline(apic)) {
+		kvm_make_request(KVM_REQ_LAZY_TSCDEADLINE, apic->vcpu);
+	} else if (lapic_is_periodic(apic)) {
 		advance_periodic_target_expiration(apic);
 		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
-		return HRTIMER_RESTART;
-	} else
-		return HRTIMER_NORESTART;
+		ret = HRTIMER_RESTART;
+	}
+
+	return ret;
 }
 
 int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 51b9d5b..0387a02 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -280,5 +280,6 @@ static inline u8 kvm_xapic_id(struct kvm_lapic *apic)
 
 int kvm_lazy_tscdeadline_init(struct kvm_vcpu *vcpu);
 void kvm_lazy_tscdeadline_exit(struct kvm_vcpu *vcpu);
-
+void kvm_lazy_tscdeadline_clear(struct kvm_vcpu *vcpu);
+bool kvm_lazy_tscdeadline_kick(struct kvm_vcpu *vcpu);
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7225fc9..26f0ef3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3879,6 +3879,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			return 1;
 
 		if (!(data & KVM_MSR_ENABLED)) {
+			kvm_lazy_tscdeadline_clear(vcpu);
 			kvm_lazy_tscdeadline_exit(vcpu);
 		} else {
 			kvm_lazy_tscdeadline_exit(vcpu);
@@ -10584,6 +10585,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		}
 		if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
 			record_steal_time(vcpu);
+		if (kvm_check_request(KVM_REQ_LAZY_TSCDEADLINE, vcpu))
+			kvm_lazy_tscdeadline_kick(vcpu);
 #ifdef CONFIG_KVM_SMM
 		if (kvm_check_request(KVM_REQ_SMI, vcpu))
 			process_smi(vcpu);
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ