[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <a3989e7ff9cca77f680f9bdfbaee52b707693221.camel@infradead.org>
Date: Fri, 29 Sep 2023 12:36:11 +0100
From: David Woodhouse <dwmw2@...radead.org>
To: kvm <kvm@...r.kernel.org>
Cc: David Woodhouse <dwmw2@...radead.org>, Paul Durrant <paul@....org>,
Sean Christopherson <seanjc@...gle.com>,
Paolo Bonzini <pbonzini@...hat.com>,
Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
Dave Hansen <dave.hansen@...ux.intel.com>, x86@...nel.org,
"H. Peter Anvin" <hpa@...or.com>, linux-kernel@...r.kernel.org
Subject: [PATCH v2] KVM: x86: Use fast path for Xen timer delivery
From: David Woodhouse <dwmw@...zon.co.uk>
Most of the time there's no need to kick the vCPU and deliver the timer
event through kvm_xen_inject_timer_irqs(). Use kvm_xen_set_evtchn_fast()
directly from the timer callback, and only fall back to the slow path
when it's necessary to do so.
This gives a significant improvement in timer latency testing (using
nanosleep() for various periods and then measuring the actual time
elapsed).
However, there was a reason¹ the fast path was dropped when this support
was first added. The current code holds vcpu->mutex for all operations on
the kvm->arch.timer_expires field, and the fast path introduces potential
race conditions. So... ensure the hrtimer is *cancelled* before making
changes in kvm_xen_start_timer(), and also when reading the values out
for KVM_XEN_VCPU_ATTR_TYPE_TIMER.
Add some sanity checks to ensure the truth of the claim that all the
other code paths are run with the vcpu loaded. And use hrtimer_cancel()
directly from kvm_xen_destroy_vcpu() to avoid a false positive from the
check in kvm_xen_stop_timer().
¹ https://lore.kernel.org/kvm/846caa99-2e42-4443-1070-84e49d2f11d2@redhat.com/
Signed-off-by: David Woodhouse <dwmw@...zon.co.uk>
---
• v2: Remember, and deal with, those races.
arch/x86/kvm/xen.c | 64 +++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 58 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index fb1110b2385a..9d0d602a2466 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -117,6 +117,8 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
void kvm_xen_inject_timer_irqs(struct kvm_vcpu *vcpu)
{
+ WARN_ON_ONCE(vcpu != kvm_get_running_vcpu());
+
if (atomic_read(&vcpu->arch.xen.timer_pending) > 0) {
struct kvm_xen_evtchn e;
@@ -136,18 +138,41 @@ static enum hrtimer_restart xen_timer_callback(struct hrtimer *timer)
{
struct kvm_vcpu *vcpu = container_of(timer, struct kvm_vcpu,
arch.xen.timer);
+ struct kvm_xen_evtchn e;
+ int rc;
+
if (atomic_read(&vcpu->arch.xen.timer_pending))
return HRTIMER_NORESTART;
- atomic_inc(&vcpu->arch.xen.timer_pending);
- kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
- kvm_vcpu_kick(vcpu);
+ e.vcpu_id = vcpu->vcpu_id;
+ e.vcpu_idx = vcpu->vcpu_idx;
+ e.port = vcpu->arch.xen.timer_virq;
+ e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+ rc = kvm_xen_set_evtchn_fast(&e, vcpu->kvm);
+ if (rc == -EWOULDBLOCK) {
+ atomic_inc(&vcpu->arch.xen.timer_pending);
+ kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
+ kvm_vcpu_kick(vcpu);
+ } else {
+ vcpu->arch.xen.timer_expires = 0;
+ }
return HRTIMER_NORESTART;
}
static void kvm_xen_start_timer(struct kvm_vcpu *vcpu, u64 guest_abs, s64 delta_ns)
{
+ WARN_ON_ONCE(vcpu != kvm_get_running_vcpu());
+
+ /*
+ * Avoid races with the old timer firing. Checking timer_expires
+ * to avoid calling hrtimer_cancel() will only have false positives
+ * so is fine.
+ */
+ if (vcpu->arch.xen.timer_expires)
+ hrtimer_cancel(&vcpu->arch.xen.timer);
+
atomic_set(&vcpu->arch.xen.timer_pending, 0);
vcpu->arch.xen.timer_expires = guest_abs;
@@ -163,6 +188,8 @@ static void kvm_xen_start_timer(struct kvm_vcpu *vcpu, u64 guest_abs, s64 delta_
static void kvm_xen_stop_timer(struct kvm_vcpu *vcpu)
{
+ WARN_ON_ONCE(vcpu != kvm_get_running_vcpu());
+
hrtimer_cancel(&vcpu->arch.xen.timer);
vcpu->arch.xen.timer_expires = 0;
atomic_set(&vcpu->arch.xen.timer_pending, 0);
@@ -1019,13 +1046,38 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
r = 0;
break;
- case KVM_XEN_VCPU_ATTR_TYPE_TIMER:
+ case KVM_XEN_VCPU_ATTR_TYPE_TIMER: {
+ bool pending = false;
+
+ /*
+ * Ensure a consistent snapshot of state is captures, with a
+ * timer either being pending, or fully delivered. Not still
+ * lurking in the timer_pending flag for deferred delivery.
+ */
+ if (vcpu->arch.xen.timer_expires) {
+ pending = hrtimer_cancel(&vcpu->arch.xen.timer);
+ kvm_xen_inject_timer_irqs(vcpu);
+ }
+
data->u.timer.port = vcpu->arch.xen.timer_virq;
data->u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
data->u.timer.expires_ns = vcpu->arch.xen.timer_expires;
+
+ /*
+ * The timer may be delivered immediately, while the returned
+ * state causes it to be set up and delivered again on the
+ * destination system after migration. That's fine, as the
+ * guest will not even have had a chance to run and process
+ * the interrupt by that point, so it won't even notice the
+ * duplicate IRQ.
+ */
+ if (pending)
+ hrtimer_start_expires(&vcpu->arch.xen.timer,
+ HRTIMER_MODE_ABS_HARD);
+
r = 0;
break;
-
+ }
case KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR:
data->u.vector = vcpu->arch.xen.upcall_vector;
r = 0;
@@ -2085,7 +2137,7 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu)
void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
{
if (kvm_xen_timer_enabled(vcpu))
- kvm_xen_stop_timer(vcpu);
+ hrtimer_cancel(&vcpu->arch.xen.timer);
kvm_gpc_deactivate(&vcpu->arch.xen.runstate_cache);
kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache);
--
2.40.1
Download attachment "smime.p7s" of type "application/pkcs7-signature" (5965 bytes)
Powered by blists - more mailing lists