[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220930064042.14564-1-zhangxincheng@uniontech.com>
Date: Fri, 30 Sep 2022 14:40:42 +0800
From: Zhang Xincheng <zhangxincheng@...ontech.com>
To: tglx@...utronix.de
Cc: linux-kernel@...r.kernel.org, maz@...nel.org,
oleksandr@...alenko.name, hdegoede@...hat.com,
bigeasy@...utronix.de, mark.rutland@....com, michael@...le.cc,
zhangxincheng <zhangxincheng@...ontech.com>
Subject: [PATCH] interrupt: discover and disable very frequent interrupts
From: zhangxincheng <zhangxincheng@...ontech.com>
In some cases, a peripheral's interrupt will be triggered frequently,
which will keep the CPU processing the interrupt and eventually cause
the RCU to report rcu_sched self-detected stall on the CPU.
[ 838.131628] rcu: INFO: rcu_sched self-detected stall on CPU
[ 838.137189] rcu: 0-....: (194839 ticks this GP) idle=f02/1/0x4000000000000004
softirq=9993/9993 fqs=97428
[ 838.146912] rcu: (t=195015 jiffies g=6773 q=0)
[ 838.151516] Task dump for CPU 0:
[ 838.154730] systemd-sleep R running task 0 3445 1 0x0000000a
Signed-off-by: zhangxincheng <zhangxincheng@...ontech.com>
Change-Id: I9c92146f2772eae383c16c8c10de028b91e07150
Signed-off-by: zhangxincheng <zhangxincheng@...ontech.com>
---
include/linux/irqdesc.h | 2 ++
kernel/irq/spurious.c | 52 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 54 insertions(+)
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 1cd4e36890fb..a3bd521c3557 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -102,6 +102,8 @@ struct irq_desc {
int parent_irq;
struct module *owner;
const char *name;
+ u32 gap_count;
+ u64 gap_time;
} ____cacheline_internodealigned_in_smp;
#ifdef CONFIG_SPARSE_IRQ
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 02b2daf07441..b7162a10d92c 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -222,6 +222,38 @@ static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
raw_spin_unlock_irqrestore(&desc->lock, flags);
}
+/*
+ * Some bad hardware will trigger interrupts very frequently, which will
+ * cause the CPU to process hardware interrupts all the time. So when
+ * we find this out, the interrupt should be disabled.
+ */
+static void __report_so_frequently_irq(struct irq_desc *desc, irqreturn_t action_ret)
+{
+ unsigned int irq = irq_desc_get_irq(desc);
+ struct irqaction *action;
+ unsigned long flags;
+
+ printk(KERN_ERR "irq %d: triggered too frequently\n",irq);
+ dump_stack();
+ printk(KERN_ERR "handlers:\n");
+
+ /*
+ * We need to take desc->lock here. note_interrupt() is called
+ * w/o desc->lock held, but IRQ_PROGRESS set. We might race
+ * with something else removing an action. It's ok to take
+ * desc->lock here. See synchronize_irq().
+ */
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ for_each_action_of_desc(desc, action) {
+ printk(KERN_ERR "[<%p>] %pf", action->handler, action->handler);
+ if (action->thread_fn)
+ printk(KERN_CONT " threaded [<%p>] %pf",
+ action->thread_fn, action->thread_fn);
+ printk(KERN_CONT "\n");
+ }
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+}
+
static void report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
{
static int count = 100;
@@ -273,6 +305,26 @@ void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
{
unsigned int irq;
+ if((desc->gap_count & 0xffff0000) == 0)
+ desc->gap_time = get_jiffies_64();
+
+ desc->gap_count ++;
+
+ if((desc->gap_count & 0x0000ffff) >= 2000) {
+ if((get_jiffies_64() - desc->gap_time) < HZ) {
+ desc->gap_count += 0x00010000;
+ desc->gap_count &= 0xffff0000;
+ } else {
+ desc->gap_count = 0;
+ }
+
+ if((desc->gap_count >> 16) > 30) {
+ __report_so_frequently_irq(desc, action_ret);
+ irq_disable(desc);
+ }
+ }
+
+
if (desc->istate & IRQS_POLL_INPROGRESS ||
irq_settings_is_polled(desc))
return;
--
2.20.1
Powered by blists - more mailing lists