lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20231112041643.2868316-11-jacob.jun.pan@linux.intel.com>
Date:   Sat, 11 Nov 2023 20:16:40 -0800
From:   Jacob Pan <jacob.jun.pan@...ux.intel.com>
To:     LKML <linux-kernel@...r.kernel.org>, X86 Kernel <x86@...nel.org>,
        iommu@...ts.linux.dev, Thomas Gleixner <tglx@...utronix.de>,
        "Lu Baolu" <baolu.lu@...ux.intel.com>, kvm@...r.kernel.org,
        Dave Hansen <dave.hansen@...el.com>,
        Joerg Roedel <joro@...tes.org>,
        "H. Peter Anvin" <hpa@...or.com>, "Borislav Petkov" <bp@...en8.de>,
        "Ingo Molnar" <mingo@...hat.com>
Cc:     Raj Ashok <ashok.raj@...el.com>,
        "Tian, Kevin" <kevin.tian@...el.com>, maz@...nel.org,
        peterz@...radead.org, seanjc@...gle.com,
        "Robin Murphy" <robin.murphy@....com>,
        Jacob Pan <jacob.jun.pan@...ux.intel.com>
Subject: [PATCH RFC 10/13] x86/irq: Handle potential lost IRQ during migration and CPU offline

Though IRTE modification for IRQ affinity change is a atomic operation,
it does not guarantee the timing of IRQ posting at PID.

considered the following scenario:
	Device		system agent		iommu		memory 		CPU/LAPIC
1	FEEX_XXXX
2			Interrupt request
3						Fetch IRTE	->
4						->Atomic Swap PID.PIR(vec)
						Push to Global Observable(GO)
5						if (ON*)
	i						done;*
						else
6							send a notification ->

* ON: outstanding notification, 1 will suppress new notifications

If IRQ affinity change happens between 3 and 5 in IOMMU, old CPU's PIR could
have pending bit set for the vector being moved. We must check PID.PIR
to prevent the lost of interrupts.

Suggested-by: Thomas Gleixner <tglx@...utronix.de>
Signed-off-by: Jacob Pan <jacob.jun.pan@...ux.intel.com>
---
 arch/x86/kernel/apic/vector.c |  8 +++++++-
 arch/x86/kernel/irq.c         | 20 +++++++++++++++++---
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 319448d87b99..14fc33cfdb37 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -19,6 +19,7 @@
 #include <asm/apic.h>
 #include <asm/i8259.h>
 #include <asm/desc.h>
+#include <asm/posted_intr.h>
 #include <asm/irq_remapping.h>
 
 #include <asm/trace/irq_vectors.h>
@@ -978,9 +979,14 @@ static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr)
 		 * Do not check IRR when called from lapic_offline(), because
 		 * fixup_irqs() was just called to scan IRR for set bits and
 		 * forward them to new destination CPUs via IPIs.
+		 *
+		 * If the vector to be cleaned is delivered as posted intr,
+		 * it is possible that the interrupt has been posted but
+		 * not made to the IRR due to coalesced notifications.
+		 * Therefore, check PIR to see if the interrupt was posted.
 		 */
 		irr = check_irr ? apic_read(APIC_IRR + (vector / 32 * 0x10)) : 0;
-		if (irr & (1U << (vector % 32))) {
+		if (irr & (1U << (vector % 32)) || is_pi_pending_this_cpu(vector)) {
 			pr_warn_once("Moved interrupt pending in old target APIC %u\n", apicd->irq);
 			rearm = true;
 			continue;
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 786c2c8330f4..7732cb9bbf0c 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -444,11 +444,26 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification)
 }
 #endif /* X86_POSTED_MSI */
 
+/*
+ * Check if a given vector is pending in APIC IRR or PIR if posted interrupt
+ * is enabled for coalesced interrupt delivery (CID).
+ */
+static inline bool is_vector_pending(unsigned int vector)
+{
+	unsigned int irr;
+
+	irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
+	if (irr  & (1 << (vector % 32)))
+		return true;
+
+	return is_pi_pending_this_cpu(vector);
+}
+
 #ifdef CONFIG_HOTPLUG_CPU
 /* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
 void fixup_irqs(void)
 {
-	unsigned int irr, vector;
+	unsigned int vector;
 	struct irq_desc *desc;
 	struct irq_data *data;
 	struct irq_chip *chip;
@@ -475,8 +490,7 @@ void fixup_irqs(void)
 		if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq[vector])))
 			continue;
 
-		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
-		if (irr  & (1 << (vector % 32))) {
+		if (is_vector_pending(vector)) {
 			desc = __this_cpu_read(vector_irq[vector]);
 
 			raw_spin_lock(&desc->lock);
-- 
2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ