lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <48297FD4.1060508@miraclelinux.com>
Date:	Tue, 13 May 2008 20:47:32 +0900
From:	Hirofumi Nakagawa <hnakagawa@...aclelinux.com>
To:	linux-kernel@...r.kernel.org
CC:	akpm@...ux-foundation.org
Subject: [RFC][PATCH 1/1] MAZE: Mazed processes monitor

This patch for linux-2.6.25-mm1 that is tested on x86_64 hardware.

Signed-off-by: Hirofumi Nakagawa <hnakagawa@...aclelinux.com>
---
 include/linux/maze.h  |   92 +++++++++
 include/linux/sched.h |    3
 init/Kconfig          |   10 +
 init/main.c           |    2
 kernel/Makefile       |    1
 kernel/exit.c         |    2
 kernel/fork.c         |    2
 kernel/maze.c         |  466 ++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched.c        |    3
 9 files changed, 581 insertions(+)

--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.25-mm1-maze/include/linux/maze.h	2008-05-13 17:52:23.000000000 +0900
@@ -0,0 +1,92 @@
+/*
+ *  Copyright (C) 2007-2008 MIRACLE LINUX Corp.
+ *
+ *  Written by Hirofumi Nakagawa <hnakagawa@...aclelinux.com>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License as
+ *	published by the Free Software Foundation, version 2.
+ */
+#ifndef _LINUX_MAZE_H
+#define _LINUX_MAZE_H
+
+#ifdef __KERNEL__
+
+#include <linux/sched.h>
+#include <linux/list.h>
+
+struct maze_context {
+	struct task_struct *task;
+	int pid;
+
+	/* The stime of task when the maze count was reseted */
+	cputime_t reset_stime;
+	/* The utime of task when the maze count was reseted */
+	cputime_t reset_utime;
+
+	/* The soft limit of maze count */
+	unsigned long soft_limit;
+	/* THe hard limit of maze count */
+	unsigned long hard_limit;
+
+	/* The send signal when the maze count reach soft limit */
+	int soft_signal;
+	/* The send signal when the maze count reach hard limit */
+	int hard_signal;
+
+#define MAZE_SENT_SOFT_SIG  1
+#define MAZE_SENT_HARD_SIG  2
+	/* The flags of sent signal */
+	unsigned long flags;
+
+	/* This value is 1, if maze_context is enqueueing */
+	int enqueue;
+	/* This value is cpu number of enqueueing */
+	int enqueue_cpu;
+
+	/* This value is 1, if preempt_notifier is registered */
+	atomic_t registered_notifier;
+
+	struct preempt_notifier notifier;
+
+	/* This list_head is linked from the maze_queue */
+	struct list_head queue;
+	/* This list_head is linked from the maze_list */
+	struct list_head list;
+};
+
+#ifdef CONFIG_MAZE
+
+/* This function is called start_kernel() */
+extern void maze_init_early(void);
+
+/* This function is called do_exit() */
+extern void maze_exit(struct task_struct *task);
+
+/* This function is called copy_process() */
+extern void maze_fork(struct task_struct *task);
+
+/* This function is called sys_sched_yield() */
+extern void maze_sched_yield(struct task_struct *task);
+
+#else  /* !CONFIG_MAZE */
+
+static inline void maze_init_early(void)
+{
+}
+
+static inline maze_exit(struct task_struct *task)
+{
+}
+
+static inline void maze_fork(struct task_struct *task)
+{
+}
+
+static inline void maze_sched_yield(struct task_struct *task)
+{
+}
+#endif /* CONFIG_MAZE */
+
+#endif /* __KERNEL__    */
+#endif /* _LINUX_MAZE_H */
--- linux-2.6.25-mm1-maze.orig/include/linux/sched.h	2008-05-13 17:40:43.000000000 +0900
+++ linux-2.6.25-mm1-maze/include/linux/sched.h	2008-05-13 17:52:23.000000000 +0900
@@ -1254,6 +1254,9 @@ struct task_struct {
 	/* cg_list protected by css_set_lock and tsk->alloc_lock */
 	struct list_head cg_list;
 #endif
+#ifdef CONFIG_MAZE
+	struct maze_context *maze_context;
+#endif
 #ifdef CONFIG_FUTEX
 	struct robust_list_head __user *robust_list;
 #ifdef CONFIG_COMPAT
--- linux-2.6.25-mm1-maze.orig/init/Kconfig	2008-05-13 17:40:43.000000000 +0900
+++ linux-2.6.25-mm1-maze/init/Kconfig	2008-05-13 17:52:23.000000000 +0900
@@ -371,6 +371,16 @@ config RESOURCE_COUNTERS
           infrastructure that works with cgroups
 	depends on CGROUPS

+config MAZE
+	bool "MAZE monitor support"
+	select PREEMPT_NOTIFIERS
+	help
+	  MAZE is a function to monitor mazed processes which use excessive CPU cycles.
+	  This is a CGL (Carrier Grade Linux) requirement (AVL.14.0). MAZE detects such
+	  processes and sends specified signals to the processes for their termination.
+
+	  Say N if unsure.
+
 config MM_OWNER
 	bool

--- linux-2.6.25-mm1-maze.orig/init/main.c	2008-05-13 17:40:43.000000000 +0900
+++ linux-2.6.25-mm1-maze/init/main.c	2008-05-13 17:52:23.000000000 +0900
@@ -61,6 +61,7 @@
 #include <linux/sched.h>
 #include <linux/signal.h>
 #include <linux/idr.h>
+#include <linux/maze.h>

 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -543,6 +544,7 @@ asmlinkage void __init start_kernel(void
 	boot_init_stack_canary();
 	debug_objects_early_init();
 	cgroup_init_early();
+	maze_init_early();

 	local_irq_disable();
 	early_boot_irqs_off();
--- linux-2.6.25-mm1-maze.orig/kernel/Makefile	2008-05-13 17:40:43.000000000 +0900
+++ linux-2.6.25-mm1-maze/kernel/Makefile	2008-05-13 17:52:23.000000000 +0900
@@ -80,6 +80,7 @@ obj-$(CONFIG_MARKERS) += marker.o
 obj-$(CONFIG_LATENCYTOP) += latencytop.o
 obj-$(CONFIG_FTRACE) += trace/
 obj-$(CONFIG_TRACING) += trace/
+obj-$(CONFIG_MAZE) += maze.o

 ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@...uxcare.com.au>, the -fno-omit-frame-pointer is
--- linux-2.6.25-mm1-maze.orig/kernel/exit.c	2008-05-13 17:40:43.000000000 +0900
+++ linux-2.6.25-mm1-maze/kernel/exit.c	2008-05-13 17:52:23.000000000 +0900
@@ -44,6 +44,7 @@
 #include <linux/resource.h>
 #include <linux/blkdev.h>
 #include <linux/task_io_accounting_ops.h>
+#include <linux/maze.h>

 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -1058,6 +1059,7 @@ NORET_TYPE void do_exit(long code)
 	__exit_fs(tsk);
 	check_stack_usage();
 	exit_thread();
+	maze_exit(tsk);
 	cgroup_exit(tsk, 1);
 	exit_keys(tsk);

--- linux-2.6.25-mm1-maze.orig/kernel/fork.c	2008-05-13 17:40:43.000000000 +0900
+++ linux-2.6.25-mm1-maze/kernel/fork.c	2008-05-13 17:52:23.000000000 +0900
@@ -53,6 +53,7 @@
 #include <linux/tty.h>
 #include <linux/proc_fs.h>
 #include <linux/blkdev.h>
+#include <linux/maze.h>

 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -1363,6 +1364,7 @@ static struct task_struct *copy_process(
 	write_unlock_irq(&tasklist_lock);
 	proc_fork_connector(p);
 	cgroup_post_fork(p);
+	maze_fork(p);
 	return p;

 bad_fork_free_pid:
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.25-mm1-maze/kernel/maze.c	2008-05-13 20:07:55.000000000 +0900
@@ -0,0 +1,466 @@
+/*
+ *  Copyright (C) 2007-2008 MIRACLE LINUX Corp.
+ *
+ *  Written by Hirofumi Nakagawa <hnakagawa@...aclelinux.com>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License as
+ *	published by the Free Software Foundation, version 2.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include <linux/timer.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/list.h>
+#include <linux/percpu.h>
+#include <linux/capability.h>
+#include <linux/maze.h>
+
+#define ERRPRINT(str, ...) printk("ERROR [%s] " str, __FUNCTION__, ##__VA_ARGS__);
+
+/* The root directory of procfs*/
+static struct proc_dir_entry *maze_proc_dir;
+
+static int maze_initialized;
+
+static struct list_head maze_list;
+static spinlock_t maze_list_lock;
+
+static int maze_timer_interval = 10;
+
+DEFINE_PER_CPU(struct list_head, maze_queue);
+DEFINE_PER_CPU(spinlock_t, maze_queue_lock);
+
+DEFINE_PER_CPU(struct timer_list, maze_timer);
+
+static inline void reset_maze_count(struct maze_context *context)
+{
+	context->reset_utime = context->task->utime;
+	context->reset_stime = context->task->stime;
+	context->flags = 0;
+}
+
+static inline cputime_t get_maze_count(struct maze_context *context)
+{
+	return (context->task->utime - context->reset_utime) +
+		(context->task->stime - context->reset_stime);
+}
+
+/*
+ * This function is called start_kernel()
+ */
+void maze_init_early(void)
+{
+	init_task.maze_context = NULL;
+}
+
+/*
+ * This function is called do_exit()
+ */
+void maze_exit(struct task_struct *task)
+{
+	unsigned long flags;
+
+	task_lock(task);
+	if (task->maze_context) {
+		spin_lock_irqsave(&per_cpu(maze_queue_lock,
+								   task->maze_context->enqueue_cpu), flags);
+		if (task->maze_context->enqueue)
+			list_del(&task->maze_context->queue);
+		spin_unlock_irqrestore(&per_cpu(maze_queue_lock,
+										task->maze_context->enqueue_cpu), flags);
+
+		spin_lock(&maze_list_lock);
+		list_del(&task->maze_context->list);
+		spin_unlock(&maze_list_lock);
+
+		kfree(task->maze_context);
+		task->maze_context = NULL;
+	}
+	task_unlock(task);
+}
+
+static inline void copy_limit_and_signal(struct maze_context *to,
+										 struct maze_context *from)
+{
+	to->soft_limit = from->soft_limit;
+	to->hard_limit = from->hard_limit;
+	to->soft_signal = from->soft_signal;
+	to->hard_signal = from->hard_signal;
+}
+
+/*
+ * Must be called under task_lock().
+ */
+static inline void enqueue(struct maze_context *context)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&__get_cpu_var(maze_queue_lock), flags);
+	if (!context->enqueue) {
+		list_add(&context->queue, &__get_cpu_var(maze_queue));
+		context->enqueue_cpu = smp_processor_id();
+		context->enqueue = 1;
+	}
+	spin_unlock_irqrestore(&__get_cpu_var(maze_queue_lock), flags);
+}
+
+static void sched_in_event(struct preempt_notifier *notifier, int cpu)
+{
+	task_lock(current);
+	if (current->maze_context) {
+		if (current->state != TASK_RUNNING)
+			reset_maze_count(current->maze_context);
+		enqueue(current->maze_context);
+	}
+	task_unlock(current);
+}
+
+static void sched_out_event(struct preempt_notifier *notifier,
+							struct task_struct *next)
+{
+	task_lock(next);
+	if (next->maze_context)
+		enqueue(next->maze_context);
+	task_unlock(next);
+}
+
+static struct preempt_ops preempt_ops = {
+	.sched_in = sched_in_event,
+	.sched_out = sched_out_event,
+};
+
+/*
+ * This function is called copy_process()
+ */
+void maze_fork(struct task_struct *task)
+{
+	task->maze_context = NULL;
+
+	if (!current->maze_context)
+		return;
+
+	task_lock(task);
+
+	task->maze_context = kzalloc(sizeof(struct maze_context), GFP_ATOMIC);
+	if (unlikely(!task->maze_context)) {
+		ERRPRINT("fail to alloc maze_context.\n");
+		goto task_unlock;
+	}
+	task->maze_context->task = task;
+	task->maze_context->pid = task->pid;
+
+	copy_limit_and_signal(task->maze_context, current->maze_context);
+	preempt_notifier_init(&task->maze_context->notifier, &preempt_ops);
+
+	spin_lock(&maze_list_lock);
+	list_add(&task->maze_context->list, &maze_list);
+	spin_unlock(&maze_list_lock);
+
+task_unlock:
+	task_unlock(task);
+}
+
+/*
+ * This function is called sys_sched_yield()
+ */
+void maze_sched_yield(struct task_struct *task)
+{
+	if (task->maze_context)
+		reset_maze_count(task->maze_context);
+}
+
+static void soft_limit_event(struct task_struct *task)
+{
+	send_sig(task->maze_context->soft_signal, task, 1);
+	task->maze_context->flags |= MAZE_SENT_SOFT_SIG;
+}
+
+static void hard_limit_event(struct task_struct *task)
+{
+	send_sig(task->maze_context->hard_signal, task, 1);
+	task->maze_context->flags |= MAZE_SENT_HARD_SIG;
+}
+
+static inline void set_preempt_notifir(struct maze_context *context)
+{
+	if (!atomic_xchg(&context->registered_notifier, 1))
+		preempt_notifier_register(&context->notifier);
+}
+
+static int add_maze_context(struct maze_context *context)
+{
+	struct task_struct *task;
+	int ret = 0;
+
+	rcu_read_lock();
+
+	task = find_task_by_pid(context->pid);
+	if (unlikely(!task))
+		goto read_unlock;
+
+	task_lock(task);
+
+	if (!task->maze_context) {
+		task->maze_context = context;
+		context->task = task;
+
+		spin_lock(&maze_list_lock);
+		list_add(&context->list, &maze_list);
+		spin_unlock(&maze_list_lock);
+
+		reset_maze_count(context);
+
+		preempt_notifier_init(&context->notifier, &preempt_ops);
+		if (current == task)
+		   set_preempt_notifir(context);
+	} else {
+		copy_limit_and_signal(task->maze_context, context);
+		ret = 1;
+	}
+
+	task_unlock(task);
+read_unlock:
+	rcu_read_unlock();
+	return ret;
+}
+
+static int build_maze_context(const char *read_line, struct maze_context *context)
+{
+	unsigned long soft_limit, hard_limit;
+	int soft_signal, hard_signal;
+	int pid, res;
+
+	res = sscanf(read_line, "%d %ld %ld %d %d", &pid, &soft_limit,
+				 &hard_limit, &soft_signal, &hard_signal);
+
+	if (res != 5 || pid < 0 ||
+		soft_limit < 0 || hard_limit < 0 ||
+		soft_signal < 0 || hard_signal < 0)
+		return -EINVAL;
+
+	context->pid = pid;
+	context->soft_limit = soft_limit;
+	context->hard_limit = hard_limit;
+	context->soft_signal = soft_signal;
+	context->hard_signal = hard_signal;
+
+	return 0;
+}
+
+static void timer_handler(unsigned long data);
+
+/*
+ * Setup softirq timer
+ */
+static void continue_timer(int cpu)
+{
+	struct timer_list *t;
+
+	t = &per_cpu(maze_timer, cpu);
+	t->function = timer_handler;
+	t->expires = jiffies + maze_timer_interval;
+	init_timer(t);
+	add_timer_on(t, cpu);
+}
+
+static inline void check_limit(struct maze_context *context)
+{
+	cputime_t t = get_maze_count(context);
+
+	if (!(context->flags & MAZE_SENT_SOFT_SIG)) {
+		if (t >= context->soft_limit)
+			soft_limit_event(context->task);
+	} else if (!(context->flags & MAZE_SENT_HARD_SIG)) {
+		if (t >= context->hard_limit)
+			hard_limit_event(context->task);
+	}
+}
+
+static void timer_handler(unsigned long data)
+{
+	struct maze_context *context, *next;
+
+	spin_lock(&__get_cpu_var(maze_queue_lock));
+
+	if (current->maze_context) {
+		set_preempt_notifir(current->maze_context);
+		if (!current->maze_context->enqueue)
+			check_limit(current->maze_context);
+	}
+
+	if (!list_empty(&__get_cpu_var(maze_queue))) {
+		list_for_each_entry_safe (context, next,
+								  &__get_cpu_var(maze_queue), queue) {
+			check_limit(context);
+			list_del(&context->queue);
+			context->enqueue = 0;
+		}
+	}
+
+	spin_unlock(&__get_cpu_var(maze_queue_lock));
+
+	continue_timer(smp_processor_id());
+}
+
+
+static int maze_entries_file_show(struct seq_file *seq, void *nouse)
+{
+	struct maze_context *context;
+
+	spin_lock(&maze_list_lock);
+
+	list_for_each_entry(context, &maze_list, list) {
+		seq_printf(seq, "pid:%5d   ", context->pid);
+		seq_printf(seq, "count:%6ld  ", get_maze_count(context));
+		seq_printf(seq, "soft limit:%6ld  ", context->soft_limit);
+		seq_printf(seq, "hard limit:%6ld  ", context->hard_limit);
+		seq_printf(seq, "soft signal:%2d  ", context->soft_signal);
+		seq_printf(seq, "hard signal:%2d  ", context->hard_signal);
+		seq_printf(seq, "\n");
+	}
+
+	spin_unlock(&maze_list_lock);
+
+	return 0;
+}
+
+/*
+ * Open operation of /proc/maze/entries
+ */
+static int maze_entries_file_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, maze_entries_file_show, NULL);
+}
+
+/*
+ * Release operation of /proc/maze/entries
+ */
+static int maze_entries_file_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+/*
+ * Write operation of /proc/maze/entries
+ */
+static ssize_t maze_entries_file_write(struct file *file,
+					const char __user *buffer,
+					size_t count, loff_t *ppos)
+{
+	struct maze_context *context;
+	char read_line[32];
+	int ret;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (count > sizeof(read_line) - 1)
+		return -EINVAL;
+
+	if (copy_from_user(read_line, buffer, count))
+		return -EFAULT;
+
+	read_line[count] = '\0';
+
+	context = kzalloc(sizeof(struct maze_context), GFP_ATOMIC);
+	if (unlikely(!context)) {
+		ERRPRINT("fail to alloc maze_context.\n");
+		return -ENOMEM;
+	}
+
+	ret = build_maze_context(read_line, context);
+	if (ret) {
+		kfree(context);
+		return ret;
+	}
+
+	if (add_maze_context(context))
+		kfree(context);
+
+	return count;
+}
+
+static struct file_operations maze_entries_file_ops = {
+	.owner	 = THIS_MODULE,
+	.open	 = maze_entries_file_open,
+	.read	 = seq_read,
+	.write   = maze_entries_file_write,
+	.llseek	 = seq_lseek,
+	.release = maze_entries_file_release,
+};
+
+/*
+ * Creating /proc/maze/
+ */
+static inline int init_dir(void)
+{
+	maze_proc_dir =
+		create_proc_entry("maze",
+						  S_IFDIR | S_IRUGO | S_IXUGO,
+						  NULL);
+
+	if (!maze_proc_dir)
+		panic("fail to create /proc/maze\n");
+
+	return 0;
+}
+
+
+/*
+ * Creating /proc/maze/entries
+ */
+static inline int init_entries(void)
+{
+	struct proc_dir_entry *p;
+
+	p = create_proc_entry("entries", S_IRUGO, maze_proc_dir);
+	if (p == NULL)
+		panic("fail to create /proc/maze/entries\n");
+
+	p->proc_fops = &maze_entries_file_ops;
+
+	return 0;
+}
+
+/*
+ * Initializing maze procfs
+ */
+static void __init init_proc(void)
+{
+	init_dir();
+	init_entries();
+}
+
+static void __init init_cpu(int cpu)
+{
+	spin_lock_init(&per_cpu(maze_queue_lock, cpu));
+	INIT_LIST_HEAD(&per_cpu(maze_queue, cpu));
+
+	/* Setup softirq timer */
+	continue_timer(cpu);
+}
+
+static int __init maze_init(void)
+{
+	int cpu;
+
+	printk(KERN_INFO "Maze: Initializing\n");
+	maze_initialized = 1;
+	init_proc();
+
+	spin_lock_init(&maze_list_lock);
+	INIT_LIST_HEAD(&maze_list);
+
+	for_each_online_cpu(cpu)
+		init_cpu(cpu);
+
+	return 0;
+}
+late_initcall(maze_init);
--- linux-2.6.25-mm1-maze.orig/kernel/sched.c	2008-05-13 17:40:43.000000000 +0900
+++ linux-2.6.25-mm1-maze/kernel/sched.c	2008-05-13 17:52:23.000000000 +0900
@@ -68,6 +68,7 @@
 #include <linux/hrtimer.h>
 #include <linux/tick.h>
 #include <linux/ftrace.h>
+#include <linux/maze.h>

 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
@@ -5157,6 +5158,8 @@ asmlinkage long sys_sched_yield(void)
 	_raw_spin_unlock(&rq->lock);
 	preempt_enable_no_resched();

+	maze_sched_yield(current);
+
 	schedule();

 	return 0;



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ