[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251110033232.12538-8-kernellwp@gmail.com>
Date: Mon, 10 Nov 2025 11:32:28 +0800
From: Wanpeng Li <kernellwp@...il.com>
To: Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...hat.com>,
Thomas Gleixner <tglx@...utronix.de>,
Paolo Bonzini <pbonzini@...hat.com>,
Sean Christopherson <seanjc@...gle.com>
Cc: Steven Rostedt <rostedt@...dmis.org>,
Vincent Guittot <vincent.guittot@...aro.org>,
Juri Lelli <juri.lelli@...hat.com>,
linux-kernel@...r.kernel.org,
kvm@...r.kernel.org,
Wanpeng Li <wanpengli@...cent.com>
Subject: [PATCH 07/10] KVM: x86: Add IPI tracking infrastructure
From: Wanpeng Li <wanpengli@...cent.com>
From: Wanpeng Li <wanpengli@...cent.com>
Introduce IPI tracking infrastructure for directed yield optimization.
Add per-vCPU IPI tracking context in kvm_vcpu_arch with
last_ipi_sender/receiver to track IPI communication pairs, pending_ipi
flag to indicate awaiting IPI response, and ipi_time_ns monotonic
timestamp for recency validation.
Add module parameters ipi_tracking_enabled (global toggle, default
true) and ipi_window_ns (recency window, default 50ms).
Add core helper functions: kvm_track_ipi_communication() to record
sender/receiver pairs, kvm_vcpu_is_ipi_receiver() to validate recent
IPI relationship, and kvm_vcpu_clear/reset_ipi_context() for lifecycle
management.
Use lockless READ_ONCE/WRITE_ONCE for minimal overhead. The short time
window prevents stale IPI information from affecting throughput
workloads.
The infrastructure is inert until integrated with interrupt delivery in
subsequent patches.
Signed-off-by: Wanpeng Li <wanpengli@...cent.com>
---
arch/x86/include/asm/kvm_host.h | 8 ++++
arch/x86/kvm/lapic.c | 65 +++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 6 +++
arch/x86/kvm/x86.h | 4 ++
include/linux/kvm_host.h | 1 +
virt/kvm/kvm_main.c | 5 +++
6 files changed, 89 insertions(+)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 48598d017d6f..b5bdc115ff45 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1052,6 +1052,14 @@ struct kvm_vcpu_arch {
int pending_external_vector;
int highest_stale_pending_ioapic_eoi;
+ /* IPI tracking for directed yield (x86 only) */
+ struct {
+ int last_ipi_sender; /* vCPU ID of last IPI sender */
+ int last_ipi_receiver; /* vCPU ID of last IPI receiver */
+ bool pending_ipi; /* Pending IPI response */
+ u64 ipi_time_ns; /* Monotonic ns when IPI was sent */
+ } ipi_context;
+
/* be preempted when it's in kernel-mode(cpl=0) */
bool preempted_in_kernel;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0ae7f913d782..98ec2b18b02c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -75,6 +75,12 @@ module_param(lapic_timer_advance, bool, 0444);
/* step-by-step approximation to mitigate fluctuation */
#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
+/* IPI tracking window and runtime toggle (runtime-adjustable) */
+static bool ipi_tracking_enabled = true;
+static unsigned long ipi_window_ns = 50 * NSEC_PER_MSEC;
+module_param(ipi_tracking_enabled, bool, 0644);
+module_param(ipi_window_ns, ulong, 0644);
+
static bool __read_mostly vector_hashing_enabled = true;
module_param_named(vector_hashing, vector_hashing_enabled, bool, 0444);
@@ -1113,6 +1119,65 @@ static int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
}
+/*
+ * Track IPI communication for directed yield when a unique receiver exists.
+ * This only writes sender/receiver context and timestamp; ignores self-IPI.
+ */
+void kvm_track_ipi_communication(struct kvm_vcpu *sender, struct kvm_vcpu *receiver)
+{
+ if (!sender || !receiver || sender == receiver)
+ return;
+ if (unlikely(!READ_ONCE(ipi_tracking_enabled)))
+ return;
+
+ WRITE_ONCE(sender->arch.ipi_context.last_ipi_receiver, receiver->vcpu_idx);
+ WRITE_ONCE(sender->arch.ipi_context.pending_ipi, true);
+ WRITE_ONCE(sender->arch.ipi_context.ipi_time_ns, ktime_get_mono_fast_ns());
+
+ WRITE_ONCE(receiver->arch.ipi_context.last_ipi_sender, sender->vcpu_idx);
+}
+
+/*
+ * Check if 'receiver' is the recent IPI target of 'sender'.
+ *
+ * Rationale:
+ * - Use a short window to avoid stale IPI inflating boost priority
+ * on throughput-sensitive workloads.
+ */
+bool kvm_vcpu_is_ipi_receiver(struct kvm_vcpu *sender, struct kvm_vcpu *receiver)
+{
+ u64 then, now;
+
+ if (unlikely(!READ_ONCE(ipi_tracking_enabled)))
+ return false;
+
+ then = READ_ONCE(sender->arch.ipi_context.ipi_time_ns);
+ now = ktime_get_mono_fast_ns();
+ if (READ_ONCE(sender->arch.ipi_context.pending_ipi) &&
+ READ_ONCE(sender->arch.ipi_context.last_ipi_receiver) ==
+ receiver->vcpu_idx &&
+ now - then <= ipi_window_ns)
+ return true;
+
+ return false;
+}
+
+void kvm_vcpu_clear_ipi_context(struct kvm_vcpu *vcpu)
+{
+ WRITE_ONCE(vcpu->arch.ipi_context.pending_ipi, false);
+ WRITE_ONCE(vcpu->arch.ipi_context.last_ipi_sender, -1);
+ WRITE_ONCE(vcpu->arch.ipi_context.last_ipi_receiver, -1);
+}
+
+/*
+ * Reset helper: clear ipi_context and zero ipi_time for hard reset paths.
+ */
+void kvm_vcpu_reset_ipi_context(struct kvm_vcpu *vcpu)
+{
+ kvm_vcpu_clear_ipi_context(vcpu);
+ WRITE_ONCE(vcpu->arch.ipi_context.ipi_time_ns, 0);
+}
+
/* Return true if the interrupt can be handled by using *bitmap as index mask
* for valid destinations in *dst array.
* Return false if kvm_apic_map_get_dest_lapic did nothing useful.
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b4b5d2d09634..649e016c131f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12708,6 +12708,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
goto free_guest_fpu;
kvm_xen_init_vcpu(vcpu);
+ /* Initialize IPI tracking */
+ kvm_vcpu_reset_ipi_context(vcpu);
vcpu_load(vcpu);
kvm_vcpu_after_set_cpuid(vcpu);
kvm_set_tsc_khz(vcpu, vcpu->kvm->arch.default_tsc_khz);
@@ -12781,6 +12783,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvm_mmu_destroy(vcpu);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
free_page((unsigned long)vcpu->arch.pio_data);
+ /* Clear IPI tracking context */
+ kvm_vcpu_reset_ipi_context(vcpu);
kvfree(vcpu->arch.cpuid_entries);
}
@@ -12846,6 +12850,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_leave_nested(vcpu);
kvm_lapic_reset(vcpu, init_event);
+ /* Clear IPI tracking context on reset */
+ kvm_vcpu_clear_ipi_context(vcpu);
WARN_ON_ONCE(is_guest_mode(vcpu) || is_smm(vcpu));
vcpu->arch.hflags = 0;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index f3dc77f006f9..86a10c653eac 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -451,6 +451,10 @@ fastpath_t handle_fastpath_wrmsr(struct kvm_vcpu *vcpu);
fastpath_t handle_fastpath_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg);
fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu);
fastpath_t handle_fastpath_invd(struct kvm_vcpu *vcpu);
+void kvm_track_ipi_communication(struct kvm_vcpu *sender,
+ struct kvm_vcpu *receiver);
+void kvm_vcpu_clear_ipi_context(struct kvm_vcpu *vcpu);
+void kvm_vcpu_reset_ipi_context(struct kvm_vcpu *vcpu);
extern struct kvm_caps kvm_caps;
extern struct kvm_host_values kvm_host;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5bd76cf394fa..5ae8327fdf21 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1532,6 +1532,7 @@ static inline void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
}
#endif
+bool kvm_vcpu_is_ipi_receiver(struct kvm_vcpu *sender, struct kvm_vcpu *receiver);
int kvm_vcpu_yield_to(struct kvm_vcpu *target);
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index cde1eddbaa91..495e769c7ddf 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3963,6 +3963,11 @@ bool __weak kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
return false;
}
+bool __weak kvm_vcpu_is_ipi_receiver(struct kvm_vcpu *sender, struct kvm_vcpu *receiver)
+{
+ return false;
+}
+
void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
{
int nr_vcpus, start, i, idx, yielded;
--
2.43.0
Powered by blists - more mailing lists