lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251110033232.12538-9-kernellwp@gmail.com>
Date: Mon, 10 Nov 2025 11:32:29 +0800
From: Wanpeng Li <kernellwp@...il.com>
To: Peter Zijlstra <peterz@...radead.org>,
	Ingo Molnar <mingo@...hat.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	Paolo Bonzini <pbonzini@...hat.com>,
	Sean Christopherson <seanjc@...gle.com>
Cc: Steven Rostedt <rostedt@...dmis.org>,
	Vincent Guittot <vincent.guittot@...aro.org>,
	Juri Lelli <juri.lelli@...hat.com>,
	linux-kernel@...r.kernel.org,
	kvm@...r.kernel.org,
	Wanpeng Li <wanpengli@...cent.com>
Subject: [PATCH 08/10] KVM: x86/lapic: Integrate IPI tracking with interrupt delivery

From: Wanpeng Li <wanpengli@...cent.com>

From: Wanpeng Li <wanpengli@...cent.com>

Integrate IPI tracking with LAPIC interrupt delivery and EOI handling.

Hook into kvm_irq_delivery_to_apic() after destination resolution to
record sender/receiver pairs when the interrupt is LAPIC-originated,
APIC_DM_FIXED mode, with exactly one destination vCPU. Use counting
for efficient single-destination detection.

Add kvm_clear_ipi_on_eoi() called from both EOI paths to ensure
complete IPI context cleanup:

1. apic_set_eoi(): Software-emulated EOI path (traditional/non-APICv)
2. kvm_apic_set_eoi_accelerated(): Hardware-accelerated EOI path
   (APICv/AVIC)

Without dual-path cleanup, APICv/AVIC-enabled guests would retain
stale IPI state, causing directed yield to rely on obsolete sender/
receiver information and potentially boosting the wrong vCPU. Both
paths must call kvm_clear_ipi_on_eoi() to maintain consistency across
different virtual interrupt delivery modes.

The cleanup implements two-stage logic to avoid premature clearing:
unconditionally clear the receiver's IPI context, and conditionally
clear the sender's pending flag only when the sender exists,
last_ipi_receiver matches, and the IPI is recent. This prevents
unrelated EOIs from disrupting valid IPI tracking state.

Use lockless accessors for minimal overhead. The tracking only
activates for unicast fixed IPIs where directed yield provides value.

Signed-off-by: Wanpeng Li <wanpengli@...cent.com>
---
 arch/x86/kvm/lapic.c | 107 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 103 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 98ec2b18b02c..d38e64691b78 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1178,6 +1178,47 @@ void kvm_vcpu_reset_ipi_context(struct kvm_vcpu *vcpu)
 	WRITE_ONCE(vcpu->arch.ipi_context.ipi_time_ns, 0);
 }
 
+/*
+ * Clear IPI context on EOI at receiver side; clear sender's pending
+ * only when matches and is fresh.
+ *
+ * This function implements precise cleanup to avoid stale IPI boosts:
+ * 1) Always clear the receiver's IPI context (unconditional cleanup)
+ * 2) Conditionally clear the sender's pending flag only when:
+ *    - The sender vCPU still exists and is valid
+ *    - The sender's last_ipi_receiver matches this receiver
+ *    - The IPI was sent recently (within ~window)
+ */
+static void kvm_clear_ipi_on_eoi(struct kvm_lapic *apic)
+{
+	struct kvm_vcpu *receiver;
+	int sender_idx;
+	u64 then, now;
+
+	if (unlikely(!READ_ONCE(ipi_tracking_enabled)))
+		return;
+
+	receiver = apic->vcpu;
+	sender_idx = READ_ONCE(receiver->arch.ipi_context.last_ipi_sender);
+
+	/* Step 1: Always clear receiver's IPI context */
+	kvm_vcpu_clear_ipi_context(receiver);
+
+	/* Step 2: Conditionally clear sender's pending flag */
+	if (sender_idx >= 0) {
+		struct kvm_vcpu *sender = kvm_get_vcpu(receiver->kvm, sender_idx);
+
+		if (sender &&
+		    READ_ONCE(sender->arch.ipi_context.last_ipi_receiver) ==
+		    receiver->vcpu_idx) {
+			then = READ_ONCE(sender->arch.ipi_context.ipi_time_ns);
+			now = ktime_get_mono_fast_ns();
+			if (now - then <= ipi_window_ns)
+				WRITE_ONCE(sender->arch.ipi_context.pending_ipi, false);
+		}
+	}
+}
+
 /* Return true if the interrupt can be handled by using *bitmap as index mask
  * for valid destinations in *dst array.
  * Return false if kvm_apic_map_get_dest_lapic did nothing useful.
@@ -1259,6 +1300,10 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 	struct kvm_lapic **dst = NULL;
 	int i;
 	bool ret;
+	/* Count actual delivered targets to identify a unique recipient. */
+	int targets = 0;
+	int delivered = 0;
+	struct kvm_vcpu *unique = NULL;
 
 	*r = -1;
 
@@ -1280,8 +1325,26 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 		for_each_set_bit(i, &bitmap, 16) {
 			if (!dst[i])
 				continue;
-			*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
+			delivered = kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
+			*r += delivered;
+			/* Fast path may still fan out; count delivered targets. */
+			if (delivered > 0) {
+				targets++;
+				unique = dst[i]->vcpu;
+			}
 		}
+
+		/*
+		 * Record unique recipient for IPI-aware boost:
+		 * only for LAPIC-originated APIC_DM_FIXED without
+		 * shorthand, and when exactly one recipient was
+		 * delivered; ignore self-IPI.
+		 */
+		if (src &&
+		    irq->delivery_mode == APIC_DM_FIXED &&
+		    irq->shorthand == APIC_DEST_NOSHORT &&
+		    targets == 1 && unique && unique != src->vcpu)
+			kvm_track_ipi_communication(src->vcpu, unique);
 	}
 
 	rcu_read_unlock();
@@ -1366,6 +1429,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 	struct kvm_vcpu *vcpu, *lowest = NULL;
 	unsigned long i, dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
 	unsigned int dest_vcpus = 0;
+	/*
+	 * Count actual delivered targets to identify a unique recipient
+	 * for IPI tracking in the slow path.
+	 */
+	int targets = 0;
+	int delivered = 0;
+	struct kvm_vcpu *unique = NULL;
 
 	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
 		return r;
@@ -1389,7 +1459,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 		if (!kvm_lowest_prio_delivery(irq)) {
 			if (r < 0)
 				r = 0;
-			r += kvm_apic_set_irq(vcpu, irq, dest_map);
+			delivered = kvm_apic_set_irq(vcpu, irq, dest_map);
+			r += delivered;
+			/* Slow path can deliver to multiple vCPUs; count them. */
+			if (delivered > 0) {
+				targets++;
+				unique = vcpu;
+			}
 		} else if (kvm_apic_sw_enabled(vcpu->arch.apic)) {
 			if (!vector_hashing_enabled) {
 				if (!lowest)
@@ -1410,8 +1486,28 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 		lowest = kvm_get_vcpu(kvm, idx);
 	}
 
-	if (lowest)
-		r = kvm_apic_set_irq(lowest, irq, dest_map);
+	if (lowest) {
+		delivered = kvm_apic_set_irq(lowest, irq, dest_map);
+		r = delivered;
+		/*
+		 * Lowest-priority / vector-hashing paths ultimately deliver to
+		 * a single vCPU.
+		 */
+		if (delivered > 0) {
+			targets = 1;
+			unique = lowest;
+		}
+	}
+
+	/*
+	 * Record unique recipient for IPI-aware boost only for LAPIC-
+	 * originated APIC_DM_FIXED without shorthand, and when exactly
+	 * one recipient was delivered; ignore self-IPI.
+	 */
+	if (src && irq->delivery_mode == APIC_DM_FIXED &&
+	    irq->shorthand == APIC_DEST_NOSHORT &&
+	    targets == 1 && unique && unique != src->vcpu)
+		kvm_track_ipi_communication(src->vcpu, unique);
 
 	return r;
 }
@@ -1632,6 +1728,7 @@ void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
 	trace_kvm_eoi(apic, vector);
 
 	kvm_ioapic_send_eoi(apic, vector);
+	kvm_clear_ipi_on_eoi(apic);
 	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
 }
 EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_set_eoi_accelerated);
@@ -2424,6 +2521,8 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 
 	case APIC_EOI:
 		apic_set_eoi(apic);
+		/* Precise cleanup for IPI-aware boost */
+		kvm_clear_ipi_on_eoi(apic);
 		break;
 
 	case APIC_LDR:
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ