[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <7294A61D-A794-4599-950C-9EC9B5E94B58@infradead.org>
Date: Mon, 29 Dec 2025 11:39:27 +0000
From: David Woodhouse <dwmw2@...radead.org>
To: Khushit Shah <khushit.shah@...anix.com>, seanjc@...gle.com,
pbonzini@...hat.com, kai.huang@...el.com
CC: mingo@...hat.com, x86@...nel.org, bp@...en8.de, hpa@...or.com,
linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
dave.hansen@...ux.intel.com, tglx@...utronix.de, jon@...anix.com,
shaju.abraham@...anix.com, David Woodhouse <dwmw@...zon.co.uk>
Subject: Re: [PATCH v5 2/3] KVM: x86/ioapic: Implement support for I/O APIC version 0x20 with EOIR
On 29 December 2025 11:17:07 GMT, Khushit Shah <khushit.shah@...anix.com> wrote:
>From: David Woodhouse <dwmw@...zon.co.uk>
>
>Introduce support for I/O APIC version 0x20, which includes the EOI
>Register (EOIR) for directed EOI. The EOI register allows guests to
>perform EOIs to individual I/O APICs instead of relying on broadcast EOIs
>from the local APIC.
>
>When Suppress EOI Broadcast (SEOIB) capability is advertised to the guest,
>guests that enable it will EOI individual I/O APICs by writing to their
>EOI register instead of relying on broadcast EOIs from the LAPIC. Hence,
>when SEOIB is advertised (so that guests can use it if they choose), use
>I/O APIC version 0x20 to provide the EOI register. This prepares for a
>userspace API that will allow explicit control of SEOIB support, providing
>a consistent interface for both in-kernel and split IRQCHIP mode.
>
>Add a tracepoint (kvm_ioapic_directed_eoi) to track directed EOIs for
>debugging and observability.
>
>Signed-off-by: David Woodhouse <dwmw@...zon.co.uk>
>Signed-off-by: Khushit Shah <khushit.shah@...anix.com>
>---
> arch/x86/kvm/ioapic.c | 31 +++++++++++++++++++++++++++++--
> arch/x86/kvm/ioapic.h | 19 +++++++++++--------
> arch/x86/kvm/trace.h | 17 +++++++++++++++++
> 3 files changed, 57 insertions(+), 10 deletions(-)
>
>diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
>index 6bf8d110aece..eea1eb7845c4 100644
>--- a/arch/x86/kvm/ioapic.c
>+++ b/arch/x86/kvm/ioapic.c
>@@ -48,8 +48,11 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic)
>
> switch (ioapic->ioregsel) {
> case IOAPIC_REG_VERSION:
>- result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16)
>- | (IOAPIC_VERSION_ID & 0xff));
>+ if (kvm_lapic_advertise_suppress_eoi_broadcast(ioapic->kvm))
>+ result = IOAPIC_VERSION_ID_EOIR;
>+ else
>+ result = IOAPIC_VERSION_ID;
>+ result |= ((IOAPIC_NUM_PINS - 1) & 0xff) << 16;
I think that wants to depend on _respect_ not _advertise_? Otherwise you're changing existing behaviour in the legacy/quirk case where the VMM neither explicitly enables not disables the feature.
> break;
>
> case IOAPIC_REG_APIC_ID:
>@@ -57,6 +60,10 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic)
> result = ((ioapic->id & 0xf) << 24);
> break;
>
>+ case IOAPIC_REG_BOOT_CONFIG:
>+ result = 0x01; /* Processor bus */
>+ break;
>+
> default:
> {
> u32 redir_index = (ioapic->ioregsel - 0x10) >> 1;
>@@ -701,6 +708,26 @@ static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
> ioapic_write_indirect(ioapic, data);
> break;
>
>+ case IOAPIC_REG_EOIR:
>+ /*
>+ * The EOI register is supported (and version 0x20 advertised)
>+ * when userspace explicitly enables suppress EOI broadcast.
>+ */
>+ if (kvm_lapic_advertise_suppress_eoi_broadcast(vcpu->kvm)) {
I'm torn, but I suspect this one should be conditional on _respect_ too. A guest shouldn't be trying this register unless the version register suggests that it exists anyway.
>+ u8 vector = data & 0xff;
>+ int i;
>+
>+ trace_kvm_ioapic_directed_eoi(vcpu, vector);
>+ rtc_irq_eoi(ioapic, vcpu, vector);
>+ for (i = 0; i < IOAPIC_NUM_PINS; i++) {
>+ union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
>+
>+ if (ent->fields.vector != vector)
>+ continue;
>+ kvm_ioapic_update_eoi_one(vcpu, ioapic, ent->fields.trig_mode, i);
>+ }
>+ }
>+ break;
> default:
> break;
> }
>diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
>index bf28dbc11ff6..f219577f738c 100644
>--- a/arch/x86/kvm/ioapic.h
>+++ b/arch/x86/kvm/ioapic.h
>@@ -11,7 +11,8 @@ struct kvm_vcpu;
>
> #define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS
> #define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES
>-#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
>+#define IOAPIC_VERSION_ID 0x11 /* Default IOAPIC version */
>+#define IOAPIC_VERSION_ID_EOIR 0x20 /* IOAPIC version with EOIR support */
> #define IOAPIC_EDGE_TRIG 0
> #define IOAPIC_LEVEL_TRIG 1
>
>@@ -19,13 +20,15 @@ struct kvm_vcpu;
> #define IOAPIC_MEM_LENGTH 0x100
>
> /* Direct registers. */
>-#define IOAPIC_REG_SELECT 0x00
>-#define IOAPIC_REG_WINDOW 0x10
>-
>-/* Indirect registers. */
>-#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */
>-#define IOAPIC_REG_VERSION 0x01
>-#define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */
>+#define IOAPIC_REG_SELECT 0x00
>+#define IOAPIC_REG_WINDOW 0x10
>+#define IOAPIC_REG_EOIR 0x40 /* version 0x20+ only */
>+
>+/* INDIRECT registers. */
>+#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */
>+#define IOAPIC_REG_VERSION 0x01
>+#define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */
>+#define IOAPIC_REG_BOOT_CONFIG 0x03 /* x86 IOAPIC only */
>
> /*ioapic delivery mode*/
> #define IOAPIC_FIXED 0x0
>diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
>index e79bc9cb7162..6902758353a9 100644
>--- a/arch/x86/kvm/trace.h
>+++ b/arch/x86/kvm/trace.h
>@@ -315,6 +315,23 @@ TRACE_EVENT(kvm_ioapic_delayed_eoi_inj,
> (__entry->e & (1<<15)) ? "level" : "edge",
> (__entry->e & (1<<16)) ? "|masked" : "")
> );
>+
>+TRACE_EVENT(kvm_ioapic_directed_eoi,
>+ TP_PROTO(struct kvm_vcpu *vcpu, u8 vector),
>+ TP_ARGS(vcpu, vector),
>+
>+ TP_STRUCT__entry(
>+ __field( __u32, apicid )
>+ __field( __u8, vector )
>+ ),
>+
>+ TP_fast_assign(
>+ __entry->apicid = vcpu->vcpu_id;
>+ __entry->vector = vector;
>+ ),
>+
>+ TP_printk("apicid %x vector %u", __entry->apicid, __entry->vector)
>+);
> #endif
>
> TRACE_EVENT(kvm_msi_set_irq,
Powered by blists - more mailing lists