[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251219035334.39790-7-kernellwp@gmail.com>
Date: Fri, 19 Dec 2025 11:53:30 +0800
From: Wanpeng Li <kernellwp@...il.com>
To: Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...hat.com>,
Thomas Gleixner <tglx@...utronix.de>,
Paolo Bonzini <pbonzini@...hat.com>,
Sean Christopherson <seanjc@...gle.com>
Cc: K Prateek Nayak <kprateek.nayak@....com>,
Christian Borntraeger <borntraeger@...ux.ibm.com>,
Steven Rostedt <rostedt@...dmis.org>,
Vincent Guittot <vincent.guittot@...aro.org>,
Juri Lelli <juri.lelli@...hat.com>,
linux-kernel@...r.kernel.org,
kvm@...r.kernel.org,
Wanpeng Li <wanpengli@...cent.com>
Subject: [PATCH v2 6/9] KVM: x86: Add IPI tracking infrastructure
From: Wanpeng Li <wanpengli@...cent.com>
Add foundational infrastructure for tracking IPI sender/receiver
relationships to improve directed yield candidate selection.
Introduce per-vCPU ipi_context structure containing:
- last_ipi_receiver: vCPU index that received the last IPI from this vCPU
- last_ipi_time_ns: timestamp of the last IPI send
- ipi_pending: flag indicating an unacknowledged IPI
- last_ipi_sender: vCPU index that sent an IPI to this vCPU
- last_ipi_recv_time_ns: timestamp when IPI was received
Add module parameters for runtime control:
- ipi_tracking_enabled (default: true): master switch for IPI tracking
- ipi_window_ns (default: 50ms): recency window for IPI validity
Implement helper functions:
- kvm_ipi_tracking_enabled(): check if tracking is active
- kvm_vcpu_is_ipi_receiver(): determine if a vCPU is a recent IPI target
The infrastructure is inert until integrated with interrupt delivery
in subsequent patches.
v1 -> v2:
- Improve documentation for module parameters explaining the 50ms
window rationale
- Add kvm_vcpu_is_ipi_receiver() declaration to x86.h header
- Add weak function annotation comment in kvm_host.h
Signed-off-by: Wanpeng Li <wanpengli@...cent.com>
---
arch/x86/include/asm/kvm_host.h | 12 ++++++
arch/x86/kvm/lapic.c | 76 +++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 3 ++
arch/x86/kvm/x86.h | 8 ++++
include/linux/kvm_host.h | 3 ++
virt/kvm/kvm_main.c | 6 +++
6 files changed, 108 insertions(+)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5a3bfa293e8b..2464c310f0a2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1052,6 +1052,18 @@ struct kvm_vcpu_arch {
int pending_external_vector;
int highest_stale_pending_ioapic_eoi;
+ /*
+ * IPI tracking for directed yield optimization.
+ * Records sender/receiver relationships when IPIs are delivered
+ * to enable IPI-aware vCPU scheduling decisions.
+ */
+ struct {
+ int last_ipi_sender; /* vCPU index of last IPI sender */
+ int last_ipi_receiver; /* vCPU index of last IPI receiver */
+ bool pending_ipi; /* Awaiting IPI response */
+ u64 ipi_time_ns; /* Timestamp when IPI was sent */
+ } ipi_context;
+
/* be preempted when it's in kernel-mode(cpl=0) */
bool preempted_in_kernel;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 1597dd0b0cc6..23f247a3b127 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -75,6 +75,19 @@ module_param(lapic_timer_advance, bool, 0444);
/* step-by-step approximation to mitigate fluctuation */
#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
+/*
+ * IPI tracking for directed yield optimization.
+ * - ipi_tracking_enabled: global toggle (default on)
+ * - ipi_window_ns: recency window for IPI validity (default 50ms)
+ * The 50ms window is chosen to be long enough to capture IPI response
+ * patterns while short enough to avoid stale information affecting
+ * scheduling decisions in throughput-sensitive workloads.
+ */
+static bool ipi_tracking_enabled = true;
+static unsigned long ipi_window_ns = 50 * NSEC_PER_MSEC;
+module_param(ipi_tracking_enabled, bool, 0644);
+module_param(ipi_window_ns, ulong, 0644);
+
static bool __read_mostly vector_hashing_enabled = true;
module_param_named(vector_hashing, vector_hashing_enabled, bool, 0444);
@@ -1113,6 +1126,69 @@ static int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
}
+/*
+ * Track IPI communication for directed yield optimization.
+ * Records sender/receiver relationship when a unicast IPI is delivered.
+ * Only tracks when a unique receiver exists; ignores self-IPI.
+ */
+void kvm_track_ipi_communication(struct kvm_vcpu *sender, struct kvm_vcpu *receiver)
+{
+ if (!sender || !receiver || sender == receiver)
+ return;
+ if (unlikely(!READ_ONCE(ipi_tracking_enabled)))
+ return;
+
+ WRITE_ONCE(sender->arch.ipi_context.last_ipi_receiver, receiver->vcpu_idx);
+ WRITE_ONCE(sender->arch.ipi_context.pending_ipi, true);
+ WRITE_ONCE(sender->arch.ipi_context.ipi_time_ns, ktime_get_mono_fast_ns());
+
+ WRITE_ONCE(receiver->arch.ipi_context.last_ipi_sender, sender->vcpu_idx);
+}
+
+/*
+ * Check if 'receiver' is the recent IPI target of 'sender'.
+ *
+ * Rationale:
+ * - Use a short window to avoid stale IPI inflating boost priority
+ * on throughput-sensitive workloads.
+ */
+bool kvm_vcpu_is_ipi_receiver(struct kvm_vcpu *sender, struct kvm_vcpu *receiver)
+{
+ u64 then, now;
+
+ if (unlikely(!READ_ONCE(ipi_tracking_enabled)))
+ return false;
+
+ then = READ_ONCE(sender->arch.ipi_context.ipi_time_ns);
+ now = ktime_get_mono_fast_ns();
+ if (READ_ONCE(sender->arch.ipi_context.pending_ipi) &&
+ READ_ONCE(sender->arch.ipi_context.last_ipi_receiver) ==
+ receiver->vcpu_idx &&
+ now - then <= ipi_window_ns)
+ return true;
+
+ return false;
+}
+
+/*
+ * Clear IPI context for a vCPU (e.g., on EOI or reset).
+ */
+void kvm_vcpu_clear_ipi_context(struct kvm_vcpu *vcpu)
+{
+ WRITE_ONCE(vcpu->arch.ipi_context.pending_ipi, false);
+ WRITE_ONCE(vcpu->arch.ipi_context.last_ipi_sender, -1);
+ WRITE_ONCE(vcpu->arch.ipi_context.last_ipi_receiver, -1);
+}
+
+/*
+ * Reset IPI context completely (e.g., on vCPU creation/destruction).
+ */
+void kvm_vcpu_reset_ipi_context(struct kvm_vcpu *vcpu)
+{
+ kvm_vcpu_clear_ipi_context(vcpu);
+ WRITE_ONCE(vcpu->arch.ipi_context.ipi_time_ns, 0);
+}
+
/* Return true if the interrupt can be handled by using *bitmap as index mask
* for valid destinations in *dst array.
* Return false if kvm_apic_map_get_dest_lapic did nothing useful.
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0c6d899d53dd..d4c401ef04ca 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12728,6 +12728,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
goto free_guest_fpu;
kvm_xen_init_vcpu(vcpu);
+ kvm_vcpu_reset_ipi_context(vcpu);
vcpu_load(vcpu);
kvm_vcpu_after_set_cpuid(vcpu);
kvm_set_tsc_khz(vcpu, vcpu->kvm->arch.default_tsc_khz);
@@ -12795,6 +12796,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvm_mmu_destroy(vcpu);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
free_page((unsigned long)vcpu->arch.pio_data);
+ kvm_vcpu_reset_ipi_context(vcpu);
kvfree(vcpu->arch.cpuid_entries);
}
@@ -12871,6 +12873,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_leave_nested(vcpu);
kvm_lapic_reset(vcpu, init_event);
+ kvm_vcpu_clear_ipi_context(vcpu);
WARN_ON_ONCE(is_guest_mode(vcpu) || is_smm(vcpu));
vcpu->arch.hflags = 0;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index fdab0ad49098..cfc24fb207e0 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -466,6 +466,14 @@ fastpath_t handle_fastpath_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg);
fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu);
fastpath_t handle_fastpath_invd(struct kvm_vcpu *vcpu);
+/* IPI tracking helpers for directed yield */
+void kvm_track_ipi_communication(struct kvm_vcpu *sender,
+ struct kvm_vcpu *receiver);
+bool kvm_vcpu_is_ipi_receiver(struct kvm_vcpu *sender,
+ struct kvm_vcpu *receiver);
+void kvm_vcpu_clear_ipi_context(struct kvm_vcpu *vcpu);
+void kvm_vcpu_reset_ipi_context(struct kvm_vcpu *vcpu);
+
extern struct kvm_caps kvm_caps;
extern struct kvm_host_values kvm_host;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d93f75b05ae2..f42315d341b3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1535,6 +1535,9 @@ static inline void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
int kvm_vcpu_yield_to(struct kvm_vcpu *target);
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode);
+/* Weak function, overridden by arch/x86/kvm for IPI-aware directed yield */
+bool kvm_vcpu_is_ipi_receiver(struct kvm_vcpu *sender, struct kvm_vcpu *receiver);
+
void kvm_flush_remote_tlbs(struct kvm *kvm);
void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages);
void kvm_flush_remote_tlbs_memslot(struct kvm *kvm,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5fcd401a5897..ff771a872c6d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3964,6 +3964,12 @@ bool __weak kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
return false;
}
+bool __weak kvm_vcpu_is_ipi_receiver(struct kvm_vcpu *sender,
+ struct kvm_vcpu *receiver)
+{
+ return false;
+}
+
void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
{
int nr_vcpus, start, i, idx, yielded;
--
2.43.0
Powered by blists - more mailing lists