lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87eevqkpgn.fsf@nanos.tec.linutronix.de>
Date:   Thu, 23 Jan 2020 19:16:56 +0100
From:   Thomas Gleixner <tglx@...utronix.de>
To:     Evan Green <evgreen@...omium.org>, Rajat Jain <rajatja@...gle.com>
Cc:     Bjorn Helgaas <bhelgaas@...gle.com>,
        linux-pci <linux-pci@...r.kernel.org>,
        Linux Kernel Mailing List <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH v2] PCI/MSI: Avoid torn updates to MSI pairs

Evan,

Thomas Gleixner <tglx@...utronix.de> writes:
> This is not yet debugged fully and as this is happening on MSI-X I'm not
> really convinced yet that your 'torn write' theory holds.

can you please apply the debug patch below and run your test. When the
failure happens, stop the tracer and collect the trace.

Another question. Did you ever try to change the affinity of that
interrupt without hotplug rapidly while the device makes traffic? If
not, it would be interesting whether this leads to a failure as well.

Thanks

        tglx

8<---------------

--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -964,6 +964,8 @@ void irq_force_complete_move(struct irq_
 	if (!vector)
 		goto unlock;
 
+	trace_printk("IRQ %u vector %u irq inprogress %u\n", vector,
+		     irqd->irq, apicd->move_in_progress);
 	/*
 	 * This is tricky. If the cleanup of the old vector has not been
 	 * done yet, then the following setaffinity call will fail with
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -244,6 +244,8 @@ u64 arch_irq_stat(void)
 
 	desc = __this_cpu_read(vector_irq[vector]);
 	if (likely(!IS_ERR_OR_NULL(desc))) {
+		trace_printk("Handle vector %u IRQ %u\n", vector,
+			     desc->irq_data.irq);
 		if (IS_ENABLED(CONFIG_X86_32))
 			handle_irq(desc, regs);
 		else
@@ -252,10 +254,18 @@ u64 arch_irq_stat(void)
 		ack_APIC_irq();
 
 		if (desc == VECTOR_UNUSED) {
+			trace_printk("Handle unused vector %u\n", vector);
 			pr_emerg_ratelimited("%s: %d.%d No irq handler for vector\n",
 					     __func__, smp_processor_id(),
 					     vector);
 		} else {
+			if (desc == VECTOR_SHUTDOWN) {
+				trace_printk("Handle shutdown vector %u\n",
+					     vector);
+			} else if (desc == VECTOR_RETRIGGERED) {
+				trace_printk("Handle retriggered vector %u\n",
+					     vector);
+			}
 			__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
 		}
 	}
@@ -373,9 +383,14 @@ void fixup_irqs(void)
 		if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq[vector])))
 			continue;
 
+		desc = __this_cpu_read(vector_irq[vector]);
+		trace_printk("FIXUP: %u\n", desc->irq_data.irq);
+
 		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
 		if (irr  & (1 << (vector % 32))) {
 			desc = __this_cpu_read(vector_irq[vector]);
+			trace_printk("FIXUP: %u IRR pending\n",
+				     desc->irq_data.irq);
 
 			raw_spin_lock(&desc->lock);
 			data = irq_desc_get_irq_data(desc);
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -122,6 +122,10 @@ static bool migrate_one_irq(struct irq_d
 		affinity = cpu_online_mask;
 		brokeaff = true;
 	}
+
+	trace_printk("IRQ: %d maskchip %d wasmasked %d break %d\n",
+		     d->irq, maskchip, irqd_irq_masked(d), brokeaff);
+
 	/*
 	 * Do not set the force argument of irq_do_set_affinity() as this
 	 * disables the masking of offline CPUs from the supplied affinity

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ