--- 2.6.23-rc1/arch/x86_64/kernel/crash.c.orig 2007-06-05 11:19:07.000000000 +0200 +++ 2.6.23-rc1/arch/x86_64/kernel/crash.c 2007-08-08 18:19:15.000000000 +0200 @@ -18,18 +18,51 @@ #include #include #include +#include #include #include #include #include #include +#include /* This keeps a track of which one is crashing cpu. */ static int crashing_cpu; #ifdef CONFIG_SMP static atomic_t waiting_for_crash_ipi; +static atomic_t crash_ipi_stage2 = ATOMIC_INIT(0); + +extern void remove_siblinginfo(int); +extern void remove_cpu_from_maps(void); +extern void crash_mask_IO_APIC(int); + +static void ack_pending_irqs(int cpu) +{ + int i, j, k; + unsigned int value; + printk("APIC ISR %d:", cpu); + for (i = APIC_ISR_NR - 1; i >= 0; i--) { + for (k = 0; k < 100; k++) { + value = apic_read(APIC_ISR + i*0x10); + if (value == 0) + break; + for (j = 31; j >= 0; j--) { + if (value & (1<= 100) + printk("!!!!\n"); + } + printk("\n"); +} static int crash_nmi_callback(struct notifier_block *self, unsigned long val, void *data) @@ -53,13 +86,27 @@ static int crash_nmi_callback(struct not local_irq_disable(); crash_save_cpu(regs, cpu); - disable_local_APIC(); - atomic_dec(&waiting_for_crash_ipi); + disable_APIC_timer(); + atomic_dec(&waiting_for_crash_ipi); + + while(atomic_read(&crash_ipi_stage2) == 0) + cpu_relax(); + + remove_siblinginfo(cpu); + cpu_clear(cpu, cpu_online_map); + remove_cpu_from_maps(); + + ack_pending_irqs(cpu); + + disable_local_APIC(); + atomic_dec(&crash_ipi_stage2); + + /* Assume hlt works */ for(;;) halt(); - return 1; + return NOTIFY_OK; } static void smp_send_nmi_allbutself(void) @@ -79,7 +126,7 @@ static struct notifier_block crash_nmi_n static void nmi_shootdown_cpus(void) { - unsigned long msecs; + unsigned long usecs; atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); if (register_die_notifier(&crash_nmi_nb)) @@ -93,19 +140,33 @@ static void nmi_shootdown_cpus(void) smp_send_nmi_allbutself(); + usecs = 1000000; /* Wait at most a second for the other cpus to stop */ + while ((atomic_read(&waiting_for_crash_ipi) > 0) && usecs) { + udelay(1); + usecs--; + } +} + +static void nmi_shootdown_cpus_stage2(void) +{ + + unsigned long msecs; + atomic_set(&crash_ipi_stage2, num_online_cpus()); + msecs = 1000; /* Wait at most a second for the other cpus to stop */ - while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { + while ((atomic_read(&crash_ipi_stage2) > 1) && msecs) { mdelay(1); msecs--; } - /* Leave the nmi callback set */ - disable_local_APIC(); } #else static void nmi_shootdown_cpus(void) { /* There are no cpus to shootdown */ } +static void nmi_shootdown_cpus_stage2(void) +{ +} #endif void machine_crash_shutdown(struct pt_regs *regs) @@ -121,15 +182,29 @@ void machine_crash_shutdown(struct pt_re */ /* The kernel is broken so disable interrupts */ local_irq_disable(); - /* Make a note of crashing cpu. Will be used in NMI callback.*/ crashing_cpu = smp_processor_id(); + crash_save_cpu(regs, crashing_cpu); + + /* disable timer interrupts */ + disable_irq_nosync(0); + disable_APIC_timer(); + nmi_shootdown_cpus(); + /* Mask all IRQs, and make sure they are delivered to this CPU */ + crash_mask_IO_APIC(crashing_cpu); + nmi_shootdown_cpus_stage2(); + + ack_pending_irqs(crashing_cpu); + if(cpu_has_apic) disable_local_APIC(); + /* Send EOI for pending IRQs */ + /* local_irq_enable(); + udelay(10000); + local_irq_disable(); */ + disable_IO_APIC(); - - crash_save_cpu(regs, smp_processor_id()); }