lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1323643735-1999-1-git-send-email-vincent.mc.li@gmail.com>
Date:	Sun, 11 Dec 2011 14:48:55 -0800
From:	Vincent Li <vincent.mc.li@...il.com>
To:	Ingo Molnar <mingo@...e.hu>
Cc:	Don Zickus <dzickus@...hat.com>,
	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Mandeep Singh Baines <msb@...omium.org>,
	linux-kernel@...r.kernel.org, Vincent Li <vincent.mc.li@...il.com>
Subject: [PATCH] Softlockup (out of cpu) killer

In kernel, there is out of memory (OOM) killer, why not make an out of cpu (OOC) killer?
I tested following patch by running an user-space cpu hogging process and the softlockukp
detector killed the process successfully.

 Softlockup could be caused by user-space process hogging cpu, add softlockup_kill kernel
 config to allow kernel to kill the user space cpu hogging process. this feature is
 useful for high availability systems that have uptime gurantees and where a softlockup
 must be resolved ASAP

echo 1 > /proc/sys/kernel/softlockukp_kill to enable cpu hog process killer
echo 0 > /proc/sys/kernel/softlockup_kill to disable cpu hog process killer

Signed-off-by: Vincent Li <vincent.mc.li@...il.com>
---
 Documentation/kernel-parameters.txt |    4 ++++
 include/linux/sched.h               |    1 +
 kernel/sysctl.c                     |    9 +++++++++
 kernel/watchdog.c                   |   18 ++++++++++++++++++
 lib/Kconfig.debug                   |   21 +++++++++++++++++++++
 5 files changed, 53 insertions(+), 0 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 81c287f..1609387 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2418,6 +2418,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			[KNL] Should the soft-lockup detector generate panics.
 			Format: <integer>
 
+	softlockup_panic=
+			[KNL] Should the soft-lockup detector kill cpu hog process.
+			Format: <integer>
+
 	sonypi.*=	[HW] Sony Programmable I/O Control Device driver
 			See Documentation/laptops/sonypi.txt
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1c4f3e9..4783fac 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -315,6 +315,7 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
 				  void __user *buffer,
 				  size_t *lenp, loff_t *ppos);
 extern unsigned int  softlockup_panic;
+extern unsigned int  softlockup_kill;
 void lockup_detector_init(void);
 #else
 static inline void touch_softlockup_watchdog(void)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ae27196..e79ea9c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -770,6 +770,15 @@ static struct ctl_table kern_table[] = {
 		.extra2		= &one,
 	},
 	{
+		.procname	= "softlockup_kill",
+		.data		= &softlockup_kill,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
+	{
 		.procname       = "nmi_watchdog",
 		.data           = &watchdog_enabled,
 		.maxlen         = sizeof (int),
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 1d7bca7..5832a90 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -75,6 +75,17 @@ static int __init softlockup_panic_setup(char *str)
 }
 __setup("softlockup_panic=", softlockup_panic_setup);
 
+unsigned int __read_mostly softlockup_kill =
+			CONFIG_BOOTPARAM_SOFTLOCKUP_KILL_VALUE;
+
+static int __init softlockup_kill_setup(char *str)
+{
+	softlockup_kill = simple_strtoul(str, NULL, 0);
+
+	return 1;
+}
+__setup("softlockup_kill=", softlockup_kill_setup);
+
 static int __init nowatchdog_setup(char *str)
 {
 	watchdog_enabled = 0;
@@ -306,6 +317,13 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 		else
 			dump_stack();
 
+		if (softlockup_kill) {
+			printk(KERN_ERR "Kill softlockup process [%s:%d] on CPU#%d\n",
+				current->comm, task_pid_nr(current),
+				smp_processor_id());
+			force_sig(SIGKILL, current);
+		}
+
 		if (softlockup_panic)
 			panic("softlockup: hung tasks");
 		__this_cpu_write(soft_watchdog_warn, true);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 82928f5..e4afc98 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -224,6 +224,27 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
 	default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
 	default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
 
+config BOOTPARAM_SOFTLOCKUP_KILL
+	bool "Kill (cpu hog process) On Soft Lockups"
+	depends on LOCKUP_DETECTOR
+	help
+	  Say Y here to enable the kernel to kill cpu hog process on
+	  "soft lockups", which are bugs that cause the kernel to
+	  loop in kernel mode for more than 60 seconds, without giving
+	  other tasks a chance to run.
+
+	  This feature is useful for high-availability systems that
+	  have uptime guarantees and where a lockup must be resolved ASAP.
+
+	Say N if unsure.
+
+config BOOTPARAM_SOFTLOCKUP_KILL_VALUE
+	int
+	depends on LOCKUP_DETECTOR
+	range 0 1
+	default 0 if !BOOTPARAM_SOFTLOCKUP_KILL
+	default 1 if BOOTPARAM_SOFTLOCKUP_KILL
+
 config DETECT_HUNG_TASK
 	bool "Detect Hung Tasks"
 	depends on DEBUG_KERNEL
-- 
1.7.0.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ