[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <DM5PR21MB0748278B86FB5103AF6E8A37CEA20@DM5PR21MB0748.namprd21.prod.outlook.com>
Date: Thu, 29 Aug 2019 06:15:00 +0000
From: Long Li <longli@...rosoft.com>
To: Ming Lei <ming.lei@...hat.com>,
Thomas Gleixner <tglx@...utronix.de>
CC: "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
Ingo Molnar <mingo@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
Keith Busch <keith.busch@...el.com>, Jens Axboe <axboe@...com>,
Christoph Hellwig <hch@....de>,
Sagi Grimberg <sagi@...mberg.me>,
John Garry <john.garry@...wei.com>,
Hannes Reinecke <hare@...e.com>,
"linux-nvme@...ts.infradead.org" <linux-nvme@...ts.infradead.org>,
"linux-scsi@...r.kernel.org" <linux-scsi@...r.kernel.org>
Subject: RE: [PATCH 1/4] softirq: implement IRQ flood detection mechanism
>>>For some high performance IO devices, interrupt may come very frequently,
>>>meantime IO request completion may take a bit time. Especially on some
>>>devices(SCSI or NVMe), IO requests can be submitted concurrently from
>>>multiple CPU cores, however IO completion is only done on one of these
>>>submission CPU cores.
>>>
>>>Then IRQ flood can be easily triggered, and CPU lockup.
>>>
>>>Implement one simple generic CPU IRQ flood detection mechanism. This
>>>mechanism uses the CPU average interrupt interval to evaluate if IRQ flood
>>>is triggered. The Exponential Weighted Moving Average(EWMA) is used to
>>>compute CPU average interrupt interval.
>>>
>>>Cc: Long Li <longli@...rosoft.com>
>>>Cc: Ingo Molnar <mingo@...hat.com>,
>>>Cc: Peter Zijlstra <peterz@...radead.org>
>>>Cc: Keith Busch <keith.busch@...el.com>
>>>Cc: Jens Axboe <axboe@...com>
>>>Cc: Christoph Hellwig <hch@....de>
>>>Cc: Sagi Grimberg <sagi@...mberg.me>
>>>Cc: John Garry <john.garry@...wei.com>
>>>Cc: Thomas Gleixner <tglx@...utronix.de>
>>>Cc: Hannes Reinecke <hare@...e.com>
>>>Cc: linux-nvme@...ts.infradead.org
>>>Cc: linux-scsi@...r.kernel.org
>>>Signed-off-by: Ming Lei <ming.lei@...hat.com>
>>>---
>>> drivers/base/cpu.c | 25 ++++++++++++++++++++++
>>> include/linux/hardirq.h | 2 ++
>>> kernel/softirq.c | 46
>>>+++++++++++++++++++++++++++++++++++++++++
>>> 3 files changed, 73 insertions(+)
>>>
>>>diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index
>>>cc37511de866..7277d1aa0906 100644
>>>--- a/drivers/base/cpu.c
>>>+++ b/drivers/base/cpu.c
>>>@@ -20,6 +20,7 @@
>>> #include <linux/tick.h>
>>> #include <linux/pm_qos.h>
>>> #include <linux/sched/isolation.h>
>>>+#include <linux/hardirq.h>
>>>
>>> #include "base.h"
>>>
>>>@@ -183,10 +184,33 @@ static struct attribute_group
>>>crash_note_cpu_attr_group = { }; #endif
>>>
>>>+static ssize_t show_irq_interval(struct device *dev,
>>>+ struct device_attribute *attr, char *buf) {
>>>+ struct cpu *cpu = container_of(dev, struct cpu, dev);
>>>+ ssize_t rc;
>>>+ int cpunum;
>>>+
>>>+ cpunum = cpu->dev.id;
>>>+
>>>+ rc = sprintf(buf, "%llu\n", irq_get_avg_interval(cpunum));
>>>+ return rc;
>>>+}
>>>+
>>>+static DEVICE_ATTR(irq_interval, 0400, show_irq_interval, NULL); static
>>>+struct attribute *irq_interval_cpu_attrs[] = {
>>>+ &dev_attr_irq_interval.attr,
>>>+ NULL
>>>+};
>>>+static struct attribute_group irq_interval_cpu_attr_group = {
>>>+ .attrs = irq_interval_cpu_attrs,
>>>+};
>>>+
>>> static const struct attribute_group *common_cpu_attr_groups[] = { #ifdef
>>>CONFIG_KEXEC
>>> &crash_note_cpu_attr_group,
>>> #endif
>>>+ &irq_interval_cpu_attr_group,
>>> NULL
>>> };
>>>
>>>@@ -194,6 +218,7 @@ static const struct attribute_group
>>>*hotplugable_cpu_attr_groups[] = { #ifdef CONFIG_KEXEC
>>> &crash_note_cpu_attr_group,
>>> #endif
>>>+ &irq_interval_cpu_attr_group,
>>> NULL
>>> };
>>>
>>>diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index
>>>da0af631ded5..fd394060ddb3 100644
>>>--- a/include/linux/hardirq.h
>>>+++ b/include/linux/hardirq.h
>>>@@ -8,6 +8,8 @@
>>> #include <linux/vtime.h>
>>> #include <asm/hardirq.h>
>>>
>>>+extern u64 irq_get_avg_interval(int cpu); extern bool
>>>+irq_flood_detected(void);
>>>
>>> extern void synchronize_irq(unsigned int irq); extern bool
>>>synchronize_hardirq(unsigned int irq); diff --git a/kernel/softirq.c
>>>b/kernel/softirq.c index 0427a86743a4..96e01669a2e0 100644
>>>--- a/kernel/softirq.c
>>>+++ b/kernel/softirq.c
>>>@@ -25,6 +25,7 @@
>>> #include <linux/smpboot.h>
>>> #include <linux/tick.h>
>>> #include <linux/irq.h>
>>>+#include <linux/sched/clock.h>
>>>
>>> #define CREATE_TRACE_POINTS
>>> #include <trace/events/irq.h>
>>>@@ -52,6 +53,12 @@ DEFINE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat);
>>>EXPORT_PER_CPU_SYMBOL(irq_stat); #endif
>>>
>>>+struct irq_interval {
>>>+ u64 last_irq_end;
>>>+ u64 avg;
>>>+};
>>>+DEFINE_PER_CPU(struct irq_interval, avg_irq_interval);
>>>+
>>> static struct softirq_action softirq_vec[NR_SOFTIRQS]
>>>__cacheline_aligned_in_smp;
>>>
>>> DEFINE_PER_CPU(struct task_struct *, ksoftirqd); @@ -339,6 +346,41 @@
>>>asmlinkage __visible void do_softirq(void)
>>> local_irq_restore(flags);
>>> }
>>>
>>>+/*
>>>+ * Update average irq interval with the Exponential Weighted Moving
>>>+ * Average(EWMA)
>>>+ */
>>>+static void irq_update_interval(void)
>>>+{
>>>+#define IRQ_INTERVAL_EWMA_WEIGHT 128
>>>+#define IRQ_INTERVAL_EWMA_PREV_FACTOR 127
>>>+#define IRQ_INTERVAL_EWMA_CURR_FACTOR
>>> (IRQ_INTERVAL_EWMA_WEIGHT - \
>>>+ IRQ_INTERVAL_EWMA_PREV_FACTOR)
>>>+
>>>+ int cpu = raw_smp_processor_id();
>>>+ struct irq_interval *inter = per_cpu_ptr(&avg_irq_interval, cpu);
>>>+ u64 delta = sched_clock_cpu(cpu) - inter->last_irq_end;
>>>+
>>>+ inter->avg = (inter->avg * IRQ_INTERVAL_EWMA_PREV_FACTOR +
inter->avg will start with 0? maybe use a bigger value like IRQ_FLOOD_THRESHOLD_NS
>>>+ delta * IRQ_INTERVAL_EWMA_CURR_FACTOR) /
>>>+ IRQ_INTERVAL_EWMA_WEIGHT;
>>>+}
>>>+
>>>+u64 irq_get_avg_interval(int cpu)
>>>+{
>>>+ return per_cpu_ptr(&avg_irq_interval, cpu)->avg; }
>>>+
>>>+/*
>>>+ * If the average CPU irq interval is less than 8us, we think interrupt
>>>+ * flood is detected on this CPU
>>>+ */
>>>+bool irq_flood_detected(void)
>>>+{
>>>+#define IRQ_FLOOD_THRESHOLD_NS 8000
>>>+ return raw_cpu_ptr(&avg_irq_interval)->avg <=
>>>IRQ_FLOOD_THRESHOLD_NS;
>>>+}
>>>+
>>> /*
>>> * Enter an interrupt context.
>>> */
>>>@@ -356,6 +398,7 @@ void irq_enter(void)
>>> }
>>>
>>> __irq_enter();
>>>+ irq_update_interval();
>>> }
>>>
>>> static inline void invoke_softirq(void) @@ -402,6 +445,8 @@ static inline
>>>void tick_irq_exit(void)
>>> */
>>> void irq_exit(void)
>>> {
>>>+ struct irq_interval *inter = raw_cpu_ptr(&avg_irq_interval);
>>>+
>>> #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
>>> local_irq_disable();
>>> #else
>>>@@ -413,6 +458,7 @@ void irq_exit(void)
>>> invoke_softirq();
>>>
>>> tick_irq_exit();
>>>+ inter->last_irq_end = sched_clock_cpu(smp_processor_id());
>>> rcu_irq_exit();
>>> trace_hardirq_exit(); /* must be last! */ }
>>>--
>>>2.20.1
Powered by blists - more mailing lists