lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 24 Jan 2020 15:34:43 +0100
From:   Thomas Gleixner <tglx@...utronix.de>
To:     Evan Green <evgreen@...omium.org>
Cc:     Rajat Jain <rajatja@...gle.com>,
        Bjorn Helgaas <bhelgaas@...gle.com>,
        linux-pci <linux-pci@...r.kernel.org>,
        Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
        x86@...nel.org
Subject: Re: [PATCH v2] PCI/MSI: Avoid torn updates to MSI pairs

Evan,

Evan Green <evgreen@...omium.org> writes:
> I did another experiment that I think lends credibility to my torn MSI
> hypothesis. I have the following change:
>
> And indeed, I get a machine check, despite the fact that MSI_DATA is
> overwritten just after address is updated.

I don't have to understand why a SoC released in 2019 still has
unmaskable MSI especially as Inhell's own XHCI spec clearly documents
and recommends MSI-X.

While your workaround (disabling MSI) works in this particular case it's
not really a good option:

 1) Quite some devices have a bug where the legacy INTX disable does not
    work reliably or is outright broken. That means MSI disable will
    reroute to INTX.

 2) I digged out old debug data which confirms that some silly devices
    lose interrupts accross MSI disable/reenable if the INTX fallback is
    disabled.

    And no, it's not a random weird device, it's part of a chipset which
    was pretty popular a few years ago. I leave it as an excercise for
    the reader to guess the vendor.

Can you please apply the patch below? It enforces an IPI to the new
vector/target CPU when the interrupt is MSI w/o masking. It should
cure the issue. It goes without saying that I'm not proud of it.

Thanks,

        tglx

8<--------------

--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -498,6 +498,7 @@ extern bool default_check_apicid_used(ph
 extern void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap);
 extern int default_cpu_present_to_apicid(int mps_cpu);
 extern int default_check_phys_apicid_present(int phys_apicid);
+extern bool apic_hotplug_force_retrigger(struct irq_data *irqd);
 
 #endif /* CONFIG_X86_LOCAL_APIC */
 
--- a/arch/x86/include/asm/irqdomain.h
+++ b/arch/x86/include/asm/irqdomain.h
@@ -10,6 +10,7 @@ enum {
 	/* Allocate contiguous CPU vectors */
 	X86_IRQ_ALLOC_CONTIGUOUS_VECTORS		= 0x1,
 	X86_IRQ_ALLOC_LEGACY				= 0x2,
+	X86_IRQ_MSI_NOMASK_TRAINWRECK			= 0x4,
 };
 
 extern struct irq_domain *x86_vector_domain;
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -103,6 +103,14 @@ int pci_msi_prepare(struct irq_domain *d
 	} else {
 		arg->type = X86_IRQ_ALLOC_TYPE_MSI;
 		arg->flags |= X86_IRQ_ALLOC_CONTIGUOUS_VECTORS;
+		/*
+		 * If the MSI implementation does not provide masking
+		 * enable the workaround for the CPU hotplug forced
+		 * migration problem which is caused by the torn write of
+		 * the address/data pair.
+		 */
+		if (!desc->msi_attrib.maskbit)
+			arg->flags |= X86_IRQ_MSI_NOMASK_TRAINWRECK;
 	}
 
 	return 0;
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -34,7 +34,8 @@ struct apic_chip_data {
 	unsigned int		move_in_progress	: 1,
 				is_managed		: 1,
 				can_reserve		: 1,
-				has_reserved		: 1;
+				has_reserved		: 1,
+				force_retrigger		: 1;
 };
 
 struct irq_domain *x86_vector_domain;
@@ -99,6 +100,18 @@ struct irq_cfg *irq_cfg(unsigned int irq
 	return irqd_cfg(irq_get_irq_data(irq));
 }
 
+bool apic_hotplug_force_retrigger(struct irq_data *irqd)
+{
+	struct apic_chip_data *apicd;
+
+	irqd = __irq_domain_get_irq_data(x86_vector_domain, irqd);
+	if (!irqd)
+		return false;
+
+	apicd = apic_chip_data(irqd);
+	return apicd && apicd->force_retrigger;
+}
+
 static struct apic_chip_data *alloc_apic_chip_data(int node)
 {
 	struct apic_chip_data *apicd;
@@ -552,6 +565,8 @@ static int x86_vector_alloc_irqs(struct
 		}
 
 		apicd->irq = virq + i;
+		if (info->flags & X86_IRQ_MSI_NOMASK_TRAINWRECK)
+			apicd->force_retrigger = true;
 		irqd->chip = &lapic_controller;
 		irqd->chip_data = apicd;
 		irqd->hwirq = virq + i;
@@ -624,6 +639,7 @@ static void x86_vector_debug_show(struct
 	seq_printf(m, "%*scan_reserve:      %u\n", ind, "", apicd.can_reserve ? 1 : 0);
 	seq_printf(m, "%*shas_reserved:     %u\n", ind, "", apicd.has_reserved ? 1 : 0);
 	seq_printf(m, "%*scleanup_pending:  %u\n", ind, "", !hlist_unhashed(&apicd.clist));
+	seq_printf(m, "%*sforce_retrigger:  %u\n", ind, "", apicd.force_retrigger ? 1 : 0);
 }
 #endif
 
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -350,6 +350,7 @@ void fixup_irqs(void)
 	struct irq_desc *desc;
 	struct irq_data *data;
 	struct irq_chip *chip;
+	bool retrigger;
 
 	irq_migrate_all_off_this_cpu();
 
@@ -370,24 +371,29 @@ void fixup_irqs(void)
 	 * nothing else will touch it.
 	 */
 	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
-		if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq[vector])))
+		desc = __this_cpu_read(vector_irq[vector]);
+		if (IS_ERR_OR_NULL(desc))
 			continue;
 
+		raw_spin_lock(&desc->lock);
+		data = irq_desc_get_irq_data(desc);
+		retrigger = apic_hotplug_force_retrigger(data);
+
 		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
-		if (irr  & (1 << (vector % 32))) {
-			desc = __this_cpu_read(vector_irq[vector]);
+		if (irr  & (1 << (vector % 32)))
+			retrigger = true;
 
-			raw_spin_lock(&desc->lock);
-			data = irq_desc_get_irq_data(desc);
+		if (!retrigger) {
+			__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
+		} else {
 			chip = irq_data_get_irq_chip(data);
 			if (chip->irq_retrigger) {
 				chip->irq_retrigger(data);
-				__this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED);
+				__this_cpu_write(vector_irq[vector],
+						 VECTOR_RETRIGGERED);
 			}
-			raw_spin_unlock(&desc->lock);
 		}
-		if (__this_cpu_read(vector_irq[vector]) != VECTOR_RETRIGGERED)
-			__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
+		raw_spin_unlock(&desc->lock);
 	}
 }
 #endif
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -432,6 +432,8 @@ int irq_reserve_ipi(struct irq_domain *d
 int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest);
 
 /* V2 interfaces to support hierarchy IRQ domains. */
+struct irq_data *__irq_domain_get_irq_data(struct irq_domain *domain,
+					   struct irq_data *irq_data);
 extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain,
 						unsigned int virq);
 extern void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1165,6 +1165,21 @@ static int irq_domain_alloc_irq_data(str
 }
 
 /**
+ * __irq_domain_get_irq_data - Get irq_data associated with @domain for @data
+ * @domain:	domain to match
+ * @irq_data:	initial irq data to start hierarchy search
+ */
+struct irq_data *__irq_domain_get_irq_data(struct irq_domain *domain,
+					   struct irq_data *irq_data)
+{
+	for (; irq_data; irq_data = irq_data->parent_data) {
+		if (irq_data->domain == domain)
+			return irq_data;
+	}
+	return NULL;
+}
+
+/**
  * irq_domain_get_irq_data - Get irq_data associated with @virq and @domain
  * @domain:	domain to match
  * @virq:	IRQ number to get irq_data
@@ -1172,14 +1187,7 @@ static int irq_domain_alloc_irq_data(str
 struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain,
 					 unsigned int virq)
 {
-	struct irq_data *irq_data;
-
-	for (irq_data = irq_get_irq_data(virq); irq_data;
-	     irq_data = irq_data->parent_data)
-		if (irq_data->domain == domain)
-			return irq_data;
-
-	return NULL;
+	return __irq_domain_get_irq_data(domain, irq_get_irq_data(virq));
 }
 EXPORT_SYMBOL_GPL(irq_domain_get_irq_data);
 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ