lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20190827085344.30799-2-ming.lei@redhat.com>
Date:   Tue, 27 Aug 2019 16:53:41 +0800
From:   Ming Lei <ming.lei@...hat.com>
To:     Thomas Gleixner <tglx@...utronix.de>
Cc:     linux-kernel@...r.kernel.org, Ming Lei <ming.lei@...hat.com>,
        Long Li <longli@...rosoft.com>, Ingo Molnar <mingo@...hat.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Keith Busch <keith.busch@...el.com>, Jens Axboe <axboe@...com>,
        Christoph Hellwig <hch@....de>,
        Sagi Grimberg <sagi@...mberg.me>,
        John Garry <john.garry@...wei.com>,
        Hannes Reinecke <hare@...e.com>,
        linux-nvme@...ts.infradead.org, linux-scsi@...r.kernel.org
Subject: [PATCH 1/4] softirq: implement IRQ flood detection mechanism

For some high performance IO devices, interrupt may come very frequently,
meantime IO request completion may take a bit time. Especially on some
devices(SCSI or NVMe), IO requests can be submitted concurrently from
multiple CPU cores, however IO completion is only done on one of
these submission CPU cores.

Then IRQ flood can be easily triggered, and CPU lockup.

Implement one simple generic CPU IRQ flood detection mechanism. This
mechanism uses the CPU average interrupt interval to evaluate if IRQ flood
is triggered. The Exponential Weighted Moving Average(EWMA) is used to
compute CPU average interrupt interval.

Cc: Long Li <longli@...rosoft.com>
Cc: Ingo Molnar <mingo@...hat.com>,
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: Keith Busch <keith.busch@...el.com>
Cc: Jens Axboe <axboe@...com>
Cc: Christoph Hellwig <hch@....de>
Cc: Sagi Grimberg <sagi@...mberg.me>
Cc: John Garry <john.garry@...wei.com>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Hannes Reinecke <hare@...e.com>
Cc: linux-nvme@...ts.infradead.org
Cc: linux-scsi@...r.kernel.org
Signed-off-by: Ming Lei <ming.lei@...hat.com>
---
 drivers/base/cpu.c      | 25 ++++++++++++++++++++++
 include/linux/hardirq.h |  2 ++
 kernel/softirq.c        | 46 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 73 insertions(+)

diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index cc37511de866..7277d1aa0906 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -20,6 +20,7 @@
 #include <linux/tick.h>
 #include <linux/pm_qos.h>
 #include <linux/sched/isolation.h>
+#include <linux/hardirq.h>
 
 #include "base.h"
 
@@ -183,10 +184,33 @@ static struct attribute_group crash_note_cpu_attr_group = {
 };
 #endif
 
+static ssize_t show_irq_interval(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct cpu *cpu = container_of(dev, struct cpu, dev);
+	ssize_t rc;
+	int cpunum;
+
+	cpunum = cpu->dev.id;
+
+	rc = sprintf(buf, "%llu\n", irq_get_avg_interval(cpunum));
+	return rc;
+}
+
+static DEVICE_ATTR(irq_interval, 0400, show_irq_interval, NULL);
+static struct attribute *irq_interval_cpu_attrs[] = {
+	&dev_attr_irq_interval.attr,
+	NULL
+};
+static struct attribute_group irq_interval_cpu_attr_group = {
+	.attrs = irq_interval_cpu_attrs,
+};
+
 static const struct attribute_group *common_cpu_attr_groups[] = {
 #ifdef CONFIG_KEXEC
 	&crash_note_cpu_attr_group,
 #endif
+	&irq_interval_cpu_attr_group,
 	NULL
 };
 
@@ -194,6 +218,7 @@ static const struct attribute_group *hotplugable_cpu_attr_groups[] = {
 #ifdef CONFIG_KEXEC
 	&crash_note_cpu_attr_group,
 #endif
+	&irq_interval_cpu_attr_group,
 	NULL
 };
 
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index da0af631ded5..fd394060ddb3 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -8,6 +8,8 @@
 #include <linux/vtime.h>
 #include <asm/hardirq.h>
 
+extern u64 irq_get_avg_interval(int cpu);
+extern bool irq_flood_detected(void);
 
 extern void synchronize_irq(unsigned int irq);
 extern bool synchronize_hardirq(unsigned int irq);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 0427a86743a4..96e01669a2e0 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -25,6 +25,7 @@
 #include <linux/smpboot.h>
 #include <linux/tick.h>
 #include <linux/irq.h>
+#include <linux/sched/clock.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/irq.h>
@@ -52,6 +53,12 @@ DEFINE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat);
 EXPORT_PER_CPU_SYMBOL(irq_stat);
 #endif
 
+struct irq_interval {
+	u64                     last_irq_end;
+	u64                     avg;
+};
+DEFINE_PER_CPU(struct irq_interval, avg_irq_interval);
+
 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
 
 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
@@ -339,6 +346,41 @@ asmlinkage __visible void do_softirq(void)
 	local_irq_restore(flags);
 }
 
+/*
+ * Update average irq interval with the Exponential Weighted Moving
+ * Average(EWMA)
+ */
+static void irq_update_interval(void)
+{
+#define IRQ_INTERVAL_EWMA_WEIGHT	128
+#define IRQ_INTERVAL_EWMA_PREV_FACTOR	127
+#define IRQ_INTERVAL_EWMA_CURR_FACTOR	(IRQ_INTERVAL_EWMA_WEIGHT - \
+		IRQ_INTERVAL_EWMA_PREV_FACTOR)
+
+	int cpu = raw_smp_processor_id();
+	struct irq_interval *inter = per_cpu_ptr(&avg_irq_interval, cpu);
+	u64 delta = sched_clock_cpu(cpu) - inter->last_irq_end;
+
+	inter->avg = (inter->avg * IRQ_INTERVAL_EWMA_PREV_FACTOR +
+		delta * IRQ_INTERVAL_EWMA_CURR_FACTOR) /
+		IRQ_INTERVAL_EWMA_WEIGHT;
+}
+
+u64 irq_get_avg_interval(int cpu)
+{
+	return per_cpu_ptr(&avg_irq_interval, cpu)->avg;
+}
+
+/*
+ * If the average CPU irq interval is less than 8us, we think interrupt
+ * flood is detected on this CPU
+ */
+bool irq_flood_detected(void)
+{
+#define  IRQ_FLOOD_THRESHOLD_NS	8000
+	return raw_cpu_ptr(&avg_irq_interval)->avg <= IRQ_FLOOD_THRESHOLD_NS;
+}
+
 /*
  * Enter an interrupt context.
  */
@@ -356,6 +398,7 @@ void irq_enter(void)
 	}
 
 	__irq_enter();
+	irq_update_interval();
 }
 
 static inline void invoke_softirq(void)
@@ -402,6 +445,8 @@ static inline void tick_irq_exit(void)
  */
 void irq_exit(void)
 {
+	struct irq_interval *inter = raw_cpu_ptr(&avg_irq_interval);
+
 #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
 	local_irq_disable();
 #else
@@ -413,6 +458,7 @@ void irq_exit(void)
 		invoke_softirq();
 
 	tick_irq_exit();
+	inter->last_irq_end = sched_clock_cpu(smp_processor_id());
 	rcu_irq_exit();
 	trace_hardirq_exit(); /* must be last! */
 }
-- 
2.20.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ