lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 13 Dec 2023 21:47:22 -0500
From:   "Vineeth Pillai (Google)" <vineeth@...byteword.org>
To:     Ben Segall <bsegall@...gle.com>, Borislav Petkov <bp@...en8.de>,
        Daniel Bristot de Oliveira <bristot@...hat.com>,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        Dietmar Eggemann <dietmar.eggemann@....com>,
        "H . Peter Anvin" <hpa@...or.com>, Ingo Molnar <mingo@...hat.com>,
        Juri Lelli <juri.lelli@...hat.com>,
        Mel Gorman <mgorman@...e.de>,
        Paolo Bonzini <pbonzini@...hat.com>,
        Andy Lutomirski <luto@...nel.org>,
        Peter Zijlstra <peterz@...radead.org>,
        Sean Christopherson <seanjc@...gle.com>,
        Steven Rostedt <rostedt@...dmis.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        Valentin Schneider <vschneid@...hat.com>,
        Vincent Guittot <vincent.guittot@...aro.org>,
        Vitaly Kuznetsov <vkuznets@...hat.com>,
        Wanpeng Li <wanpengli@...cent.com>
Cc:     "Vineeth Pillai (Google)" <vineeth@...byteword.org>,
        Suleiman Souhlal <suleiman@...gle.com>,
        Masami Hiramatsu <mhiramat@...gle.com>, kvm@...r.kernel.org,
        linux-kernel@...r.kernel.org, x86@...nel.org,
        Joel Fernandes <joel@...lfernandes.org>
Subject: [RFC PATCH 5/8] kvm: x86: upper bound for preemption based boost duration

Guest requests boost on preempt disable but doesn't request unboost on
preempt enable. This may cause the guest vcpu to be boosted for longer
than what it deserves. Also, there are lots of preemption disabled paths
in kernel and some could be quite long.

This patch sets a bound on the maximum time a vcpu is boosted due to
preemption disabled in guest. Default is 3000us, and could be changed
via kvm module parameter.

Co-developed-by: Joel Fernandes (Google) <joel@...lfernandes.org>
Signed-off-by: Joel Fernandes (Google) <joel@...lfernandes.org>
Signed-off-by: Vineeth Pillai (Google) <vineeth@...byteword.org>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/x86.c              | 49 ++++++++++++++++++++++++++++++---
 2 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 474fe2d6d3e0..6a8326baa6a0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -994,6 +994,8 @@ struct kvm_vcpu_arch {
 	 */
 	struct {
 		enum kvm_vcpu_boost_state boost_status;
+		bool preempt_disabled;
+		ktime_t preempt_disabled_ts;
 		int boost_policy;
 		int boost_prio;
 		u64 msr_val;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2577e1083f91..8c15c6ff352e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -199,6 +199,15 @@ module_param(eager_page_split, bool, 0644);
 static bool __read_mostly mitigate_smt_rsb;
 module_param(mitigate_smt_rsb, bool, 0444);
 
+#ifdef CONFIG_PARAVIRT_SCHED_KVM
+/*
+ * Maximum time in micro seconds a guest vcpu can stay boosted due
+ * to preemption disabled.
+ */
+unsigned int pvsched_max_preempt_disabled_us = 3000;
+module_param(pvsched_max_preempt_disabled_us, uint, 0644);
+#endif
+
 /*
  * Restoring the host value for MSRs that are only consumed when running in
  * usermode, e.g. SYSCALL MSRs and TSC_AUX, can be deferred until the CPU
@@ -2149,17 +2158,47 @@ static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
 }
 
 #ifdef CONFIG_PARAVIRT_SCHED_KVM
+static inline void kvm_vcpu_update_preempt_disabled(struct kvm_vcpu_arch *arch,
+		bool preempt_disabled)
+{
+	if (arch->pv_sched.preempt_disabled != preempt_disabled) {
+		arch->pv_sched.preempt_disabled = preempt_disabled;
+		if (preempt_disabled)
+			arch->pv_sched.preempt_disabled_ts = ktime_get();
+		else
+			arch->pv_sched.preempt_disabled_ts = 0;
+	}
+}
+
+static inline bool kvm_vcpu_exceeds_preempt_disabled_duration(struct kvm_vcpu_arch *arch)
+{
+	s64 max_delta = pvsched_max_preempt_disabled_us * NSEC_PER_USEC;
+
+	if (max_delta && arch->pv_sched.preempt_disabled) {
+		s64 delta;
+
+		WARN_ON_ONCE(arch->pv_sched.preempt_disabled_ts == 0);
+		delta = ktime_to_ns(ktime_sub(ktime_get(),
+					arch->pv_sched.preempt_disabled_ts));
+
+		if (delta >= max_delta)
+			return true;
+	}
+
+	return false;
+}
+
 static inline bool __vcpu_needs_boost(struct kvm_vcpu *vcpu, union guest_schedinfo schedinfo)
 {
 	bool pending_event = kvm_cpu_has_pending_timer(vcpu) || kvm_cpu_has_interrupt(vcpu);
 
 	/*
 	 * vcpu needs a boost if
-	 * - A lazy boost request active, or
-	 * - Pending latency sensitive event, or
-	 * - Preemption disabled in this vcpu.
+	 * - A lazy boost request active or a pending latency sensitive event, and
+	 * - Preemption disabled duration on this vcpu has not crossed the threshold.
 	 */
-	return (schedinfo.boost_req == VCPU_REQ_BOOST || pending_event || schedinfo.preempt_disabled);
+	return ((schedinfo.boost_req == VCPU_REQ_BOOST || pending_event) &&
+			!kvm_vcpu_exceeds_preempt_disabled_duration(&vcpu->arch));
 }
 
 static inline void kvm_vcpu_do_pv_sched(struct kvm_vcpu *vcpu)
@@ -2173,6 +2212,8 @@ static inline void kvm_vcpu_do_pv_sched(struct kvm_vcpu *vcpu)
 		&schedinfo, offsetof(struct pv_sched_data, schedinfo), sizeof(schedinfo)))
 		return;
 
+	kvm_vcpu_update_preempt_disabled(&vcpu->arch, schedinfo.preempt_disabled);
+
 	kvm_vcpu_set_sched(vcpu, __vcpu_needs_boost(vcpu, schedinfo));
 }
 #else
-- 
2.43.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ