lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20081016210430.GC25738@redhat.com>
Date:	Thu, 16 Oct 2008 17:04:30 -0400
From:	Jason Baron <jbaron@...hat.com>
To:	linux-kernel@...r.kernel.org
Cc:	compudj@...stal.dyndns.org, fche@...hat.com, fweisbec@...il.com,
	edwintorok@...il.com, mingo@...e.hu, peterz@...radead.org
Subject: tracepoints for kernel/mutex.c

hi,

Below are 3 tracepoints I've been playing with in kernel/mutex.c using
a SystemTap script. The idea is to detect and determine the cause of
lock contention. Currently I get the following output:

<contended mutex nam> <process name and pid of the contention> <time of
contention> <pid that woke me up(caused the contention?)>

for example:

name: &inode->i_mutex, pid name: udevd, pid: 26567, jiffies: 1 cause:
26566
 

Using SystemTap I can now modify my script to figure out the inode number, the 
file and pathname, or the backtrace and get to the bottom of the
contention.

I think this approach has a number of advantages. It has low
overhead in the off case, since its based on tracepoints. It is
minimally invasive in the code path (3 tracepoints). It also allows me
to explore data structures and parts of the kernel by simply modifying
the SystemTap script. I do not need to re-compile the kernel and reboot.

You can run the script via: $stap mutex.stp

thanks,

-Jason


diff --git a/include/trace/mutex.h b/include/trace/mutex.h
new file mode 100644
index 0000000..8d365d6
--- /dev/null
+++ b/include/trace/mutex.h
@@ -0,0 +1,10 @@
+#ifndef _TRACE_MUTEX_H
+#define _TRACE_MUTEX_H
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(mutex_lock_contention, TPPROTO(int pid, unsigned long time), TPARGS(pid, time));
+DEFINE_TRACE(mutex_lock_contention_length, TPPROTO(char *name, int pid, unsigned long time), TPARGS(name, pid, time));
+DEFINE_TRACE(mutex_wakeup, TPPROTO(int target_pid, int sender_pid), TPARGS(target_pid, sender_pid));
+
+#endif
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index 1d94160..847a9fb 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -97,6 +97,7 @@ void debug_mutex_init(struct mutex *lock, const char *name,
 #endif
 	lock->owner = NULL;
 	lock->magic = lock;
+	lock->name = name;
 }
 
 /***
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 12c779d..62c2aab 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -18,6 +18,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/debug_locks.h>
+#include <trace/mutex.h>
 
 /*
  * In the DEBUG case we are using the "NULL fastpath" for mutexes,
@@ -147,6 +148,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		goto done;
 
 	lock_contended(&lock->dep_map, ip);
+	trace_mutex_lock_contention((int)task->pid, jiffies);
 
 	for (;;) {
 		/*
@@ -182,7 +184,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		schedule();
 		spin_lock_mutex(&lock->wait_lock, flags);
 	}
-
+	trace_mutex_lock_contention_length(lock->name, (int)task->pid, jiffies);
 done:
 	lock_acquired(&lock->dep_map);
 	/* got the lock - rejoice! */
@@ -256,7 +258,7 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
 					   struct mutex_waiter, list);
 
 		debug_mutex_wake_waiter(lock, waiter);
-
+		trace_mutex_wakeup(waiter->task->pid, current->pid);
 		wake_up_process(waiter->task);
 	}
 
diff --git a/ltt/probes/Makefile b/ltt/probes/Makefile
index 2ee8a46..0c54d01 100644
--- a/ltt/probes/Makefile
+++ b/ltt/probes/Makefile
@@ -10,7 +10,8 @@ CFLAGS_REMOVE_rcu-trace.o = -pg
 endif
 
 obj-$(CONFIG_LTT_TRACEPROBES)	+= kernel-trace.o mm-trace.o fs-trace.o \
-				ipc-trace.o lockdep-trace.o rcu-trace.o
+				ipc-trace.o lockdep-trace.o rcu-trace.o \
+				mutex-trace.o
 
 ifeq ($(CONFIG_NET),y)
 ifdef CONFIG_FTRACE
diff --git a/ltt/probes/mutex-trace.c b/ltt/probes/mutex-trace.c
new file mode 100644
index 0000000..3b1ebeb
--- /dev/null
+++ b/ltt/probes/mutex-trace.c
@@ -0,0 +1,43 @@
+/*
+ * ltt/probes/fs-trace.c
+ *
+ * Mutex tracepoint probes.
+ */
+
+#include <linux/module.h>
+#include <trace/mutex.h>
+
+static unsigned long mutex_times[PID_MAX_DEFAULT+1];
+static int mutex_wakeup[PID_MAX_DEFAULT+1];
+
+void probe_mutex_lock_contention(int pid, unsigned long time)
+{
+	mutex_times[pid] = (unsigned long)time;	
+
+	trace_mark_tp(mutex_lock_contention, mutex_lock_contention,
+		probe_mutex_lock_contention, "pid %d time %lu", pid, time);
+}
+
+void probe_mutex_lock_contention_length(char *name, int pid, unsigned long time)
+{
+	unsigned long delay = time - mutex_times[pid];
+	int reason = mutex_wakeup[pid];
+
+	/* FIXME: why is name sometimes null? */
+	if (!name)
+		return;
+
+	trace_mark_tp(mutex_lock_contention_length, mutex_lock_contention_length,
+		probe_mutex_lock_contention_length, 
+		"name %s pid %d delay %lu cause %d", 
+		name, pid, delay, reason);
+}
+
+void probe_mutex_wakeup(int target_pid, int sender_pid)
+{
+	mutex_wakeup[target_pid] = sender_pid;
+
+	trace_mark_tp(mutex_wakeup, mutex_wakeup,
+		probe_mutex_wakeup, "pid %d pid %d", target_pid, sender_pid);
+}
+


# mutex.stp

probe kernel.mark("mutex_lock_contention") 
{ 
}

probe kernel.mark("mutex_wakeup") 
{ 
}

probe kernel.mark("mutex_lock_contention_length") 
{ 
	name = $arg1
	pid = $arg2
	delay = $arg3
	cause = $arg4

	printf("name: %s, pid name: %s, pid: %d, jiffies: %d cause: %d\n", 
		name, execname(), pid, delay, cause);
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ