lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <7294A61D-A794-4599-950C-9EC9B5E94B58@infradead.org>
Date: Mon, 29 Dec 2025 11:39:27 +0000
From: David Woodhouse <dwmw2@...radead.org>
To: Khushit Shah <khushit.shah@...anix.com>, seanjc@...gle.com,
 pbonzini@...hat.com, kai.huang@...el.com
CC: mingo@...hat.com, x86@...nel.org, bp@...en8.de, hpa@...or.com,
 linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
 dave.hansen@...ux.intel.com, tglx@...utronix.de, jon@...anix.com,
 shaju.abraham@...anix.com, David Woodhouse <dwmw@...zon.co.uk>
Subject: Re: [PATCH v5 2/3] KVM: x86/ioapic: Implement support for I/O APIC version 0x20 with EOIR

On 29 December 2025 11:17:07 GMT, Khushit Shah <khushit.shah@...anix.com> wrote:
>From: David Woodhouse <dwmw@...zon.co.uk>
>
>Introduce support for I/O APIC version 0x20, which includes the EOI
>Register (EOIR) for directed EOI.  The EOI register allows guests to
>perform EOIs to individual I/O APICs instead of relying on broadcast EOIs
>from the local APIC.
>
>When Suppress EOI Broadcast (SEOIB) capability is advertised to the guest,
>guests that enable it will EOI individual I/O APICs by writing to their
>EOI register instead of relying on broadcast EOIs from the LAPIC.  Hence,
>when SEOIB is advertised (so that guests can use it if they choose), use
>I/O APIC version 0x20 to provide the EOI register.  This prepares for a
>userspace API that will allow explicit control of SEOIB support, providing
>a consistent interface for both in-kernel and split IRQCHIP mode.
>
>Add a tracepoint (kvm_ioapic_directed_eoi) to track directed EOIs for
>debugging and observability.
>
>Signed-off-by: David Woodhouse <dwmw@...zon.co.uk>
>Signed-off-by: Khushit Shah <khushit.shah@...anix.com>
>---
> arch/x86/kvm/ioapic.c | 31 +++++++++++++++++++++++++++++--
> arch/x86/kvm/ioapic.h | 19 +++++++++++--------
> arch/x86/kvm/trace.h  | 17 +++++++++++++++++
> 3 files changed, 57 insertions(+), 10 deletions(-)
>
>diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
>index 6bf8d110aece..eea1eb7845c4 100644
>--- a/arch/x86/kvm/ioapic.c
>+++ b/arch/x86/kvm/ioapic.c
>@@ -48,8 +48,11 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic)
> 
> 	switch (ioapic->ioregsel) {
> 	case IOAPIC_REG_VERSION:
>-		result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16)
>-			  | (IOAPIC_VERSION_ID & 0xff));
>+		if (kvm_lapic_advertise_suppress_eoi_broadcast(ioapic->kvm))
>+			result = IOAPIC_VERSION_ID_EOIR;
>+		else
>+			result = IOAPIC_VERSION_ID;
>+		result |= ((IOAPIC_NUM_PINS - 1) & 0xff) << 16;

I think that wants to depend on _respect_ not _advertise_? Otherwise you're changing existing behaviour in the legacy/quirk case where the VMM neither explicitly enables not disables the feature.

> 		break;
> 
> 	case IOAPIC_REG_APIC_ID:
>@@ -57,6 +60,10 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic)
> 		result = ((ioapic->id & 0xf) << 24);
> 		break;
> 
>+	case IOAPIC_REG_BOOT_CONFIG:
>+		result = 0x01; /* Processor bus */
>+		break;
>+
> 	default:
> 		{
> 			u32 redir_index = (ioapic->ioregsel - 0x10) >> 1;
>@@ -701,6 +708,26 @@ static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
> 		ioapic_write_indirect(ioapic, data);
> 		break;
> 
>+	case IOAPIC_REG_EOIR:
>+		/*
>+		 * The EOI register is supported (and version 0x20 advertised)
>+		 * when userspace explicitly enables suppress EOI broadcast.
>+		 */
>+		if (kvm_lapic_advertise_suppress_eoi_broadcast(vcpu->kvm)) {

I'm torn, but I suspect this one should be conditional on _respect_ too. A guest shouldn't be trying this register unless the version register suggests that it exists anyway.


>+			u8 vector = data & 0xff;
>+			int i;
>+
>+			trace_kvm_ioapic_directed_eoi(vcpu, vector);
>+			rtc_irq_eoi(ioapic, vcpu, vector);
>+			for (i = 0; i < IOAPIC_NUM_PINS; i++) {
>+				union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
>+
>+				if (ent->fields.vector != vector)
>+					continue;
>+				kvm_ioapic_update_eoi_one(vcpu, ioapic, ent->fields.trig_mode, i);
>+			}
>+		}
>+		break;
> 	default:
> 		break;
> 	}
>diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
>index bf28dbc11ff6..f219577f738c 100644
>--- a/arch/x86/kvm/ioapic.h
>+++ b/arch/x86/kvm/ioapic.h
>@@ -11,7 +11,8 @@ struct kvm_vcpu;
> 
> #define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
> #define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES
>-#define IOAPIC_VERSION_ID 0x11	/* IOAPIC version */
>+#define IOAPIC_VERSION_ID	0x11	/* Default IOAPIC version */
>+#define IOAPIC_VERSION_ID_EOIR	0x20	/* IOAPIC version with EOIR support */
> #define IOAPIC_EDGE_TRIG  0
> #define IOAPIC_LEVEL_TRIG 1
> 
>@@ -19,13 +20,15 @@ struct kvm_vcpu;
> #define IOAPIC_MEM_LENGTH            0x100
> 
> /* Direct registers. */
>-#define IOAPIC_REG_SELECT  0x00
>-#define IOAPIC_REG_WINDOW  0x10
>-
>-/* Indirect registers. */
>-#define IOAPIC_REG_APIC_ID 0x00	/* x86 IOAPIC only */
>-#define IOAPIC_REG_VERSION 0x01
>-#define IOAPIC_REG_ARB_ID  0x02	/* x86 IOAPIC only */
>+#define IOAPIC_REG_SELECT	0x00
>+#define IOAPIC_REG_WINDOW	0x10
>+#define IOAPIC_REG_EOIR	0x40	/* version 0x20+ only */
>+
>+/* INDIRECT registers. */
>+#define IOAPIC_REG_APIC_ID	0x00	/* x86 IOAPIC only */
>+#define IOAPIC_REG_VERSION	0x01
>+#define IOAPIC_REG_ARB_ID	0x02	/* x86 IOAPIC only */
>+#define IOAPIC_REG_BOOT_CONFIG	0x03	/* x86 IOAPIC only */
> 
> /*ioapic delivery mode*/
> #define	IOAPIC_FIXED			0x0
>diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
>index e79bc9cb7162..6902758353a9 100644
>--- a/arch/x86/kvm/trace.h
>+++ b/arch/x86/kvm/trace.h
>@@ -315,6 +315,23 @@ TRACE_EVENT(kvm_ioapic_delayed_eoi_inj,
> 		  (__entry->e & (1<<15)) ? "level" : "edge",
> 		  (__entry->e & (1<<16)) ? "|masked" : "")
> );
>+
>+TRACE_EVENT(kvm_ioapic_directed_eoi,
>+	    TP_PROTO(struct kvm_vcpu *vcpu, u8 vector),
>+	    TP_ARGS(vcpu, vector),
>+
>+	TP_STRUCT__entry(
>+		__field(	__u32,		apicid		)
>+		__field(	__u8,		vector		)
>+	),
>+
>+	TP_fast_assign(
>+		__entry->apicid		= vcpu->vcpu_id;
>+		__entry->vector		= vector;
>+	),
>+
>+	TP_printk("apicid %x vector %u", __entry->apicid, __entry->vector)
>+);
> #endif
> 
> TRACE_EVENT(kvm_msi_set_irq,


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ