[<prev] [next>] [day] [month] [year] [list]
Message-ID: <tencent_2F94F13B31CBCD8A6A5942E7@qq.com>
Date: Fri, 30 Sep 2022 16:31:05 +0800
From: "张鑫城" <zhangxincheng@...ontech.com>
To: "hdegoede" <hdegoede@...hat.com>,
"tglx" <tglx@...utronix.de>
Cc: "linux-kernel" <linux-kernel@...r.kernel.org>,
"maz" <maz@...nel.org>,
"oleksandr" <oleksandr@...alenko.name>,
"bigeasy" <bigeasy@...utronix.de>,
"mark.rutland" <mark.rutland@....com>,
"michael" <michael@...le.cc>
Subject: Re: [PATCH] interrupt: discover and disable very frequent interrupts
Hi,
Thank you very much for your valuable suggestions, I have modified the patch as follows:
Subject: [PATCH] interrupt: discover and disable very frequent interrupts
In some cases, a peripheral's interrupt will be triggered frequently,
which will keep the CPU processing the interrupt and eventually cause
the RCU to report rcu_sched self-detected stall on the CPU.
[ 838.131628] rcu: INFO: rcu_sched self-detected stall on CPU
[ 838.137189] rcu: 0-....: (194839 ticks this GP) idle=f02/1/0x4000000000000004
softirq=9993/9993 fqs=97428
[ 838.146912] rcu: (t=195015 jiffies g=6773 q=0)
[ 838.151516] Task dump for CPU 0:
[ 838.154730] systemd-sleep R running task 0 3445 1 0x0000000a
Signed-off-by: zhangxincheng <zhangxincheng@...ontech.com>
Change-Id: I9c92146f2772eae383c16c8c10de028b91e07150
---
include/linux/irqdesc.h | 2 ++
kernel/irq/spurious.c | 36 +++++++++++++++++++++++++++++-------
2 files changed, 31 insertions(+), 7 deletions(-)
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 1cd4e36890fb..a3bd521c3557 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -102,6 +102,8 @@ struct irq_desc {
int parent_irq;
struct module *owner;
const char *name;
+ u32 gap_count;
+ u64 gap_time;
} ____cacheline_internodealigned_in_smp;
#ifdef CONFIG_SPARSE_IRQ
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 02b2daf07441..75bd0088446a 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -188,19 +188,21 @@ static inline int bad_action_ret(irqreturn_t action_ret)
*
* (The other 100-of-100,000 interrupts may have been a correctly
* functioning device sharing an IRQ with the failing one)
+ *
+ * Some bad hardware will trigger interrupts very frequently, which will
+ * cause the CPU to process hardware interrupts all the time. So when
+ * we find this out, the interrupt should be disabled.
*/
-static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
+static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret, const char *msg)
{
unsigned int irq = irq_desc_get_irq(desc);
struct irqaction *action;
unsigned long flags;
if (bad_action_ret(action_ret)) {
- printk(KERN_ERR "irq event %d: bogus return value %x\n",
- irq, action_ret);
+ printk(msg, irq, action_ret);
} else {
- printk(KERN_ERR "irq %d: nobody cared (try booting with "
- "the \"irqpoll\" option)\n", irq);
+ printk(msg, irq);
}
dump_stack();
printk(KERN_ERR "handlers:\n");
@@ -228,7 +230,7 @@ static void report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
if (count > 0) {
count--;
- __report_bad_irq(desc, action_ret);
+ __report_bad_irq(desc, action_ret, KERN_ERR "irq event %d: bogus return value %x\n");
}
}
@@ -282,6 +284,25 @@ void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
return;
}
+ if((desc->gap_count & 0xffff0000) == 0)
+ desc->gap_time = get_jiffies_64();
+
+ desc->gap_count ++;
+
+ if((desc->gap_count & 0x0000ffff) >= 2000) {
+ if((get_jiffies_64() - desc->gap_time) < HZ) {
+ desc->gap_count += 0x00010000;
+ desc->gap_count &= 0xffff0000;
+ } else {
+ desc->gap_count = 0;
+ }
+
+ if((desc->gap_count >> 16) > 30) {
+ __report_bad_irq(desc, action_ret, KERN_ERR "irq %d: triggered too frequently\n");
+ irq_disable(desc);
+ }
+ }
+
/*
* We cannot call note_interrupt from the threaded handler
* because we need to look at the compound of all handlers
@@ -416,7 +437,8 @@ void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
/*
* The interrupt is stuck
*/
- __report_bad_irq(desc, action_ret);
+ __report_bad_irq(desc, action_ret, KERN_ERR "irq %d: nobody cared (try booting"
+ "with the \"irqpoll\" option)\n");
/*
* Now kill the IRQ
*/
--
2.20.1
Regards,
Zhang Xincheng
Powered by blists - more mailing lists