[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251229111708.59402-3-khushit.shah@nutanix.com>
Date: Mon, 29 Dec 2025 11:17:07 +0000
From: Khushit Shah <khushit.shah@...anix.com>
To: seanjc@...gle.com, pbonzini@...hat.com, kai.huang@...el.com,
dwmw2@...radead.org
Cc: mingo@...hat.com, x86@...nel.org, bp@...en8.de, hpa@...or.com,
linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
dave.hansen@...ux.intel.com, tglx@...utronix.de, jon@...anix.com,
shaju.abraham@...anix.com, David Woodhouse <dwmw@...zon.co.uk>,
Khushit Shah <khushit.shah@...anix.com>
Subject: [PATCH v5 2/3] KVM: x86/ioapic: Implement support for I/O APIC version 0x20 with EOIR
From: David Woodhouse <dwmw@...zon.co.uk>
Introduce support for I/O APIC version 0x20, which includes the EOI
Register (EOIR) for directed EOI. The EOI register allows guests to
perform EOIs to individual I/O APICs instead of relying on broadcast EOIs
from the local APIC.
When Suppress EOI Broadcast (SEOIB) capability is advertised to the guest,
guests that enable it will EOI individual I/O APICs by writing to their
EOI register instead of relying on broadcast EOIs from the LAPIC. Hence,
when SEOIB is advertised (so that guests can use it if they choose), use
I/O APIC version 0x20 to provide the EOI register. This prepares for a
userspace API that will allow explicit control of SEOIB support, providing
a consistent interface for both in-kernel and split IRQCHIP mode.
Add a tracepoint (kvm_ioapic_directed_eoi) to track directed EOIs for
debugging and observability.
Signed-off-by: David Woodhouse <dwmw@...zon.co.uk>
Signed-off-by: Khushit Shah <khushit.shah@...anix.com>
---
arch/x86/kvm/ioapic.c | 31 +++++++++++++++++++++++++++++--
arch/x86/kvm/ioapic.h | 19 +++++++++++--------
arch/x86/kvm/trace.h | 17 +++++++++++++++++
3 files changed, 57 insertions(+), 10 deletions(-)
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 6bf8d110aece..eea1eb7845c4 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -48,8 +48,11 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic)
switch (ioapic->ioregsel) {
case IOAPIC_REG_VERSION:
- result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16)
- | (IOAPIC_VERSION_ID & 0xff));
+ if (kvm_lapic_advertise_suppress_eoi_broadcast(ioapic->kvm))
+ result = IOAPIC_VERSION_ID_EOIR;
+ else
+ result = IOAPIC_VERSION_ID;
+ result |= ((IOAPIC_NUM_PINS - 1) & 0xff) << 16;
break;
case IOAPIC_REG_APIC_ID:
@@ -57,6 +60,10 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic)
result = ((ioapic->id & 0xf) << 24);
break;
+ case IOAPIC_REG_BOOT_CONFIG:
+ result = 0x01; /* Processor bus */
+ break;
+
default:
{
u32 redir_index = (ioapic->ioregsel - 0x10) >> 1;
@@ -701,6 +708,26 @@ static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
ioapic_write_indirect(ioapic, data);
break;
+ case IOAPIC_REG_EOIR:
+ /*
+ * The EOI register is supported (and version 0x20 advertised)
+ * when userspace explicitly enables suppress EOI broadcast.
+ */
+ if (kvm_lapic_advertise_suppress_eoi_broadcast(vcpu->kvm)) {
+ u8 vector = data & 0xff;
+ int i;
+
+ trace_kvm_ioapic_directed_eoi(vcpu, vector);
+ rtc_irq_eoi(ioapic, vcpu, vector);
+ for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+ union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
+
+ if (ent->fields.vector != vector)
+ continue;
+ kvm_ioapic_update_eoi_one(vcpu, ioapic, ent->fields.trig_mode, i);
+ }
+ }
+ break;
default:
break;
}
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index bf28dbc11ff6..f219577f738c 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -11,7 +11,8 @@ struct kvm_vcpu;
#define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS
#define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES
-#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
+#define IOAPIC_VERSION_ID 0x11 /* Default IOAPIC version */
+#define IOAPIC_VERSION_ID_EOIR 0x20 /* IOAPIC version with EOIR support */
#define IOAPIC_EDGE_TRIG 0
#define IOAPIC_LEVEL_TRIG 1
@@ -19,13 +20,15 @@ struct kvm_vcpu;
#define IOAPIC_MEM_LENGTH 0x100
/* Direct registers. */
-#define IOAPIC_REG_SELECT 0x00
-#define IOAPIC_REG_WINDOW 0x10
-
-/* Indirect registers. */
-#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */
-#define IOAPIC_REG_VERSION 0x01
-#define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */
+#define IOAPIC_REG_SELECT 0x00
+#define IOAPIC_REG_WINDOW 0x10
+#define IOAPIC_REG_EOIR 0x40 /* version 0x20+ only */
+
+/* INDIRECT registers. */
+#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */
+#define IOAPIC_REG_VERSION 0x01
+#define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */
+#define IOAPIC_REG_BOOT_CONFIG 0x03 /* x86 IOAPIC only */
/*ioapic delivery mode*/
#define IOAPIC_FIXED 0x0
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index e79bc9cb7162..6902758353a9 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -315,6 +315,23 @@ TRACE_EVENT(kvm_ioapic_delayed_eoi_inj,
(__entry->e & (1<<15)) ? "level" : "edge",
(__entry->e & (1<<16)) ? "|masked" : "")
);
+
+TRACE_EVENT(kvm_ioapic_directed_eoi,
+ TP_PROTO(struct kvm_vcpu *vcpu, u8 vector),
+ TP_ARGS(vcpu, vector),
+
+ TP_STRUCT__entry(
+ __field( __u32, apicid )
+ __field( __u8, vector )
+ ),
+
+ TP_fast_assign(
+ __entry->apicid = vcpu->vcpu_id;
+ __entry->vector = vector;
+ ),
+
+ TP_printk("apicid %x vector %u", __entry->apicid, __entry->vector)
+);
#endif
TRACE_EVENT(kvm_msi_set_irq,
--
2.39.3
Powered by blists - more mailing lists