To make sure that a crashed irq thread does not cause more trouble when the irq code tries to wake up a gone thread or the device code calling free_irq and trying to kthread_stop the dead thread, we plug a pointer to irqaction into task_struct, which is evaluated in do_exit(). When the thread crashes the do_exit code marks the thread as DIED in irqaction->flags to prevent further wakeups from the interrupt handler code. On thread creation we get a reference to task_struct so it stays around until the free_irq code releases it again. The procedure vs. the crashed irq handler thread is slightly racy, but we do not want to have additional locking in the hard interrupt code path. The worst things which can happen are a warning that we tried to wakeup a dead task and a hung kthread_stop in free_irq. I'm not worried about that at all, as removing a module which had a crashed interrupt handler is critical anyway. The main purpose of this is to keep the system alive w/o the affected device working. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar --- include/linux/interrupt.h | 3 +++ include/linux/sched.h | 1 + kernel/exit.c | 2 ++ kernel/irq/handle.c | 13 +++++++++++-- kernel/irq/manage.c | 44 +++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 58 insertions(+), 5 deletions(-) Index: linux-2.6-tip/include/linux/interrupt.h =================================================================== --- linux-2.6-tip.orig/include/linux/interrupt.h +++ linux-2.6-tip/include/linux/interrupt.h @@ -61,6 +61,7 @@ #define IRQF_THREADED 0x00002000 #define IRQF_RUNTHREAD 0x00004000 #define IRQF_WARNED_THREADED 0x00008000 +#define IRQF_THREAD_DIED 0x00010000 typedef irqreturn_t (*irq_handler_t)(int, void *); @@ -114,6 +115,8 @@ static inline int irq_thread_should_run( return test_and_clear_bit(IRQF_RUNTHREAD, &action->flags); } +extern void exit_irq_thread(struct task_struct *tsk); + /* * On lockdep we dont want to enable hardirqs in hardirq * context. Use local_irq_enable_in_hardirq() to annotate Index: linux-2.6-tip/include/linux/sched.h =================================================================== --- linux-2.6-tip.orig/include/linux/sched.h +++ linux-2.6-tip/include/linux/sched.h @@ -1301,6 +1301,7 @@ struct task_struct { int latency_record_count; struct latency_record latency_record[LT_SAVECOUNT]; #endif + struct irqaction *irqaction; }; /* Index: linux-2.6-tip/kernel/exit.c =================================================================== --- linux-2.6-tip.orig/kernel/exit.c +++ linux-2.6-tip/kernel/exit.c @@ -1030,6 +1030,8 @@ NORET_TYPE void do_exit(long code) schedule(); } + exit_irq_thread(tsk); + exit_signals(tsk); /* sets PF_EXITING */ /* * tsk->flags are checked in the futex code to protect against Index: linux-2.6-tip/kernel/irq/handle.c =================================================================== --- linux-2.6-tip.orig/kernel/irq/handle.c +++ linux-2.6-tip/kernel/irq/handle.c @@ -161,8 +161,17 @@ irqreturn_t handle_IRQ_event(unsigned in set_bit(IRQF_WARNED_THREADED, &action->flags); case IRQ_WAKE_THREAD: - set_bit(IRQF_RUNTHREAD, &action->flags); - wake_up_process(action->thread); + /* + * In case the thread crashed and was killed + * we just pretend that we handled the + * interrupt. The quick check handler has + * disabled the device interrupt, so no irq + * storm is lurking. + */ + if (likely(!(action->flags & IRQF_THREAD_DIED))) { + set_bit(IRQF_RUNTHREAD, &action->flags); + wake_up_process(action->thread); + } /* * Set it to handled so the spurious check * does not trigger. Index: linux-2.6-tip/kernel/irq/manage.c =================================================================== --- linux-2.6-tip.orig/kernel/irq/manage.c +++ linux-2.6-tip/kernel/irq/manage.c @@ -338,6 +338,8 @@ static int irq_thread(void *data) { struct irqaction *action = data; + current->irqaction = action; + set_current_state(TASK_INTERRUPTIBLE); while (!kthread_should_stop()) { @@ -351,11 +353,36 @@ static int irq_thread(void *data) action->handler(action->irq, action->dev_id); set_current_state(TASK_INTERRUPTIBLE); } + /* + * Clear irqaction. Otherwise exit_irq_thread() would make + * fuzz about an active irq thread going into nirvana. + */ + current->irqaction = NULL; __set_current_state(TASK_RUNNING); return 0; } /* + * Called from do_exit() + */ +void exit_irq_thread(struct task_struct *tsk) +{ + if (!tsk->irqaction) + return; + + printk(KERN_ERR + "exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n", + tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq); + + /* + * Set the THREAD DIED flag to prevent further wakeups of the + * soon to be gone threaded handler. + */ + set_bit(IRQF_THREAD_DIED, &tsk->irqaction->flags); + tsk->irqaction = NULL; +} + +/* * Internal function to register an irqaction - typically used to * allocate special interrupts that are part of the architecture. */ @@ -439,7 +466,12 @@ int setup_irq(unsigned int irq, struct i new->name); if (IS_ERR(t)) return PTR_ERR(t); - + /* + * We keep the reference to the task struct even if + * the thread dies to avoid that the interrupt code + * references an already gone task_struct. + */ + get_task_struct(t); new->thread = t; } @@ -565,8 +597,14 @@ void free_irq(unsigned int irq, void *de if (desc->chip->release) desc->chip->release(irq, dev_id); #endif - if (action->thread) - kthread_stop(action->thread); + if (action->thread) { + struct task_struct *t = action->thread; + + action->thread = NULL; + if (likely(!(action->flags & IRQF_THREAD_DIED))) + kthread_stop(t); + put_task_struct(t); + } if (!desc->action) { desc->status |= IRQ_DISABLED; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/