lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <4FA1E527.1090807@gmail.com>
Date:	Thu, 03 May 2012 09:53:43 +0800
From:	Wang Sheng-Hui <shhuiw@...il.com>
To:	Benjamin Herrenschmidt <benh@...nel.crashing.org>,
	Milton Miller <miltonm@....com>,
	Grant Likely <grant.likely@...retlab.ca>,
	Stephen Rothwell <sfr@...b.auug.org.au>,
	Anton Blanchard <anton@...ba.org>,
	linuxppc-dev@...ts.ozlabs.org, linux-kernel@...r.kernel.org
Subject: [PATCH] powerpc: use local var instead of local_paca->irq_happened
 directly in __check_irq_replay

local_paca->irq_happened may be changed asychronously. 

In my test env (IBM Power 9117-MMA), I installed the RHEL6.2 with the shipped
oprofile. Then I run into kernel v3.4-rc4, setup/start oprofile and start the
LTP test suite.

In a short while, the system would crash. Seems that oprofile may change
the irq_happened.
======================================================================
      KERNEL: /boot/vmlinux-3.4.0-rc4-00104-gaf3a3ab
    DUMPFILE: vmcore  [PARTIAL DUMP]
        CPUS: 10
        DATE: Fri Apr 27 18:54:34 2012
      UPTIME: 00:02:34
LOAD AVERAGE: 0.60, 0.27, 0.10
       TASKS: 369
    NODENAME: feastlp3.upt.austin.ibm.com
     RELEASE: 3.4.0-rc4-00104-gaf3a3ab
     VERSION: #4 SMP Fri Apr 27 03:13:43 CDT 2012
     MACHINE: ppc64  (4704 Mhz)
      MEMORY: 9.8 GB
       PANIC: "kernel BUG at /usr/src/kernels/linux/arch/powerpc/kernel/irq.c:188!"
         PID: 0
     COMMAND: "swapper/4"
        TASK: c0000002694e3cc0  (1 of 10)  [THREAD_INFO: c0000002694f8000]
         CPU: 4
       STATE: TASK_RUNNING (PANIC)

crash> bt
PID: 0      TASK: c0000002694e3cc0  CPU: 4   COMMAND: "swapper/4"
 #0 [c00000026ffcb6e0] .crash_kexec at c0000000000f22e8
 #1 [c00000026ffcb8e0] .oops_end at c00000000060aed8
 #2 [c00000026ffcb980] ._exception at c000000000020900
 #3 [c00000026ffcbb40] program_check_common at c0000000000053b4
 Breakpoint trap  [700] exception frame:
 R0:  0000000000000001    R1:  c00000026ffcbe30    R2:  c000000000edd170   
 R3:  0000000000000500    R4:  0000000000000000    R5:  00000000000007fd   
 R6:  000000000124a180    R7:  003450cf9bd1233b    R8:  0000000000940000   
 R9:  c000000003400c00    R10: 0000000000000001    R11: 0000000000000000   
 R12: 0000000000000002    R13: c000000003400c00    R14: c0000002694fbf90   
 R15: 0000000002000040    R16: 0000000000000004    R17: 0000000000000000   
 R18: 0000000000000000    R19: 0000000000000000    R20: c000000000f42100   
 R21: 0000000000000000    R22: c000000000955b80    R23: c000000000955b80   
 R24: 000000000000000a    R25: 0000000000000004    R26: c0000002694f8100   
 R27: c00000026ffc8000    R28: 0000000000000000    R29: c000000000f42100   
 R30: c000000000e60810    R31: 0000000000000040   
 NIP: c00000000000ea9c    MSR: 8000000000029032    OR3: c00000000000ea3c
 CTR: c000000000063e40    LR:  c000000000010578    XER: 0000000000000000
 CCR: 0000000028000048    MQ:  0000000000000000    DAR: c000000001295d00
 DSISR: 0000000000000000     Syscall Result: 0000000000000000

 #4 [c00000026ffcbe30] .__check_irq_replay at c00000000000ea9c
 [Link Register ]  [c00000026ffcbe30] .arch_local_irq_restore at c000000000010578
 #5 [c00000026ffcbea0] .__do_softirq at c000000000085724
 #6 [c00000026ffcbf90] .call_do_softirq at c000000000022928
 #7 [c0000002694fb8d0] .do_softirq at c0000000000106c8
 #8 [c0000002694fb970] .irq_exit at c000000000085414
 #9 [c0000002694fb9f0] .do_IRQ at c0000000000100a4
#10 [c0000002694fbab0] hardware_interrupt_common at c0000000000038c0
 Hardware Interrupt  [501] exception frame:
 R0:  0000000000000001    R1:  c0000002694fbda0    R2:  c000000000edd170   
 R3:  0000000000000000    R4:  0000000000000000    R5:  0000000000000000   
 R6:  00000000000000e0    R7:  003450cf9bd1233b    R8:  0000000000940000   
 R9:  ffffffffffffffff    R10: 0000000000243694    R11: 0000000000000001   
 R12: 0000000000000002    R13: c000000003400c00   
 NIP: c0000000000105b4    MSR: 8000000000009032    OR3: 0000000000000c00
 CTR: c0000000004de3a0    LR:  c0000000000105b4    XER: 0000000000000000
 CCR: 0000000044000044    MQ:  0000000000000001    DAR: c0000000012990b0
 DSISR: c0000002694fbce0     Syscall Result: 0000000000000000

#11 [c0000002694fbda0] .arch_local_irq_restore at c0000000000105b4  (unreliable)
#12 [c0000002694fbe10] .cpu_idle at c000000000017d20
#13 [c0000002694fbed0] .start_secondary at c00000000061a934
#14 [c0000002694fbf90] .start_secondary_prolog at c00000000000936c


Use local var instead of local_paca->irq_happened directly in this function here.

Please check this patch. Any comments are welcome.


Signed-off-by: Wang Sheng-Hui <shhuiw@...il.com>
---
 arch/powerpc/kernel/irq.c |   46 +++++++++++++++++++++++++++++---------------
 1 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 5ec1b23..3d48b23 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -137,15 +137,17 @@ static inline notrace int decrementer_check_overflow(void)
  */
 notrace unsigned int __check_irq_replay(void)
 {
+	unsigned int ret_val;
 	/*
 	 * We use local_paca rather than get_paca() to avoid all
 	 * the debug_smp_processor_id() business in this low level
 	 * function
 	 */
-	unsigned char happened = local_paca->irq_happened;
+	unsigned char happened, irq_happened;
+	happened = irq_happened = local_paca->irq_happened;
 
 	/* Clear bit 0 which we wouldn't clear otherwise */
-	local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+	irq_happened &= ~PACA_IRQ_HARD_DIS;
 
 	/*
 	 * Force the delivery of pending soft-disabled interrupts on PS3.
@@ -161,33 +163,45 @@ notrace unsigned int __check_irq_replay(void)
 	 * decrementer itself rather than the paca irq_happened field
 	 * in case we also had a rollover while hard disabled
 	 */
-	local_paca->irq_happened &= ~PACA_IRQ_DEC;
-	if (decrementer_check_overflow())
-		return 0x900;
+	irq_happened &= ~PACA_IRQ_DEC;
+	if (decrementer_check_overflow()) {
+		ret_val = 0x900;
+		goto replay;
+	}
 
 	/* Finally check if an external interrupt happened */
-	local_paca->irq_happened &= ~PACA_IRQ_EE;
-	if (happened & PACA_IRQ_EE)
-		return 0x500;
+	irq_happened &= ~PACA_IRQ_EE;
+	if (happened & PACA_IRQ_EE) {
+		ret_val = 0x500;
+		goto replay;
+	}
 
 #ifdef CONFIG_PPC_BOOK3E
 	/* Finally check if an EPR external interrupt happened
 	 * this bit is typically set if we need to handle another
 	 * "edge" interrupt from within the MPIC "EPR" handler
 	 */
-	local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE;
-	if (happened & PACA_IRQ_EE_EDGE)
-		return 0x500;
+	irq_happened &= ~PACA_IRQ_EE_EDGE;
+	if (happened & PACA_IRQ_EE_EDGE) {
+		ret_val = 0x500;
+		goto replay;
+	}
 
-	local_paca->irq_happened &= ~PACA_IRQ_DBELL;
-	if (happened & PACA_IRQ_DBELL)
-		return 0x280;
+	irq_happened &= ~PACA_IRQ_DBELL;
+	if (happened & PACA_IRQ_DBELL) {
+		ret_val = 0x280;
+		goto replay;
+	}
 #endif /* CONFIG_PPC_BOOK3E */
 
 	/* There should be nothing left ! */
-	BUG_ON(local_paca->irq_happened != 0);
+	BUG_ON(irq_happened != 0);
+	ret_val = 0;
 
-	return 0;
+replay:
+	local_paca->irq_happened = irq_happened;
+
+	return ret_val;
 }
 
 notrace void arch_local_irq_restore(unsigned long en)
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ