lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20081016221046.GD21877@Krystal>
Date:	Thu, 16 Oct 2008 18:10:46 -0400
From:	Mathieu Desnoyers <compudj@...stal.dyndns.org>
To:	Jason Baron <jbaron@...hat.com>
Cc:	linux-kernel@...r.kernel.org, fche@...hat.com, fweisbec@...il.com,
	edwintorok@...il.com, mingo@...e.hu, peterz@...radead.org
Subject: Re: tracepoints for kernel/mutex.c

* Jason Baron (jbaron@...hat.com) wrote:
> hi,
> 
> Below are 3 tracepoints I've been playing with in kernel/mutex.c using
> a SystemTap script. The idea is to detect and determine the cause of
> lock contention. Currently I get the following output:
> 
> <contended mutex nam> <process name and pid of the contention> <time of
> contention> <pid that woke me up(caused the contention?)>
> 
> for example:
> 
> name: &inode->i_mutex, pid name: udevd, pid: 26567, jiffies: 1 cause:
> 26566
>  
> 
> Using SystemTap I can now modify my script to figure out the inode number, the 
> file and pathname, or the backtrace and get to the bottom of the
> contention.
> 
> I think this approach has a number of advantages. It has low
> overhead in the off case, since its based on tracepoints. It is
> minimally invasive in the code path (3 tracepoints). It also allows me
> to explore data structures and parts of the kernel by simply modifying
> the SystemTap script. I do not need to re-compile the kernel and reboot.
> 
> You can run the script via: $stap mutex.stp
> 
> thanks,
> 
> -Jason
> 
> 
> diff --git a/include/trace/mutex.h b/include/trace/mutex.h
> new file mode 100644
> index 0000000..8d365d6
> --- /dev/null
> +++ b/include/trace/mutex.h
> @@ -0,0 +1,10 @@
> +#ifndef _TRACE_MUTEX_H
> +#define _TRACE_MUTEX_H
> +
> +#include <linux/tracepoint.h>
> +
> +DEFINE_TRACE(mutex_lock_contention, TPPROTO(int pid, unsigned long time), TPARGS(pid, time));
> +DEFINE_TRACE(mutex_lock_contention_length, TPPROTO(char *name, int pid, unsigned long time), TPARGS(name, pid, time));
> +DEFINE_TRACE(mutex_wakeup, TPPROTO(int target_pid, int sender_pid), TPARGS(target_pid, sender_pid));
> +
> +#endif
> diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
> index 1d94160..847a9fb 100644
> --- a/kernel/mutex-debug.c
> +++ b/kernel/mutex-debug.c
> @@ -97,6 +97,7 @@ void debug_mutex_init(struct mutex *lock, const char *name,
>  #endif
>  	lock->owner = NULL;
>  	lock->magic = lock;
> +	lock->name = name;
>  }
>  
>  /***
> diff --git a/kernel/mutex.c b/kernel/mutex.c
> index 12c779d..62c2aab 100644
> --- a/kernel/mutex.c
> +++ b/kernel/mutex.c
> @@ -18,6 +18,7 @@
>  #include <linux/spinlock.h>
>  #include <linux/interrupt.h>
>  #include <linux/debug_locks.h>
> +#include <trace/mutex.h>
>  
>  /*
>   * In the DEBUG case we are using the "NULL fastpath" for mutexes,
> @@ -147,6 +148,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  		goto done;
>  
>  	lock_contended(&lock->dep_map, ip);
> +	trace_mutex_lock_contention((int)task->pid, jiffies);
>  
>  	for (;;) {
>  		/*
> @@ -182,7 +184,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  		schedule();
>  		spin_lock_mutex(&lock->wait_lock, flags);
>  	}
> -
> +	trace_mutex_lock_contention_length(lock->name, (int)task->pid, jiffies);
>  done:
>  	lock_acquired(&lock->dep_map);
>  	/* got the lock - rejoice! */
> @@ -256,7 +258,7 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
>  					   struct mutex_waiter, list);
>  
>  		debug_mutex_wake_waiter(lock, waiter);
> -
> +		trace_mutex_wakeup(waiter->task->pid, current->pid);
>  		wake_up_process(waiter->task);
>  	}
>  
> diff --git a/ltt/probes/Makefile b/ltt/probes/Makefile
> index 2ee8a46..0c54d01 100644
> --- a/ltt/probes/Makefile
> +++ b/ltt/probes/Makefile
> @@ -10,7 +10,8 @@ CFLAGS_REMOVE_rcu-trace.o = -pg
>  endif
>  
>  obj-$(CONFIG_LTT_TRACEPROBES)	+= kernel-trace.o mm-trace.o fs-trace.o \
> -				ipc-trace.o lockdep-trace.o rcu-trace.o
> +				ipc-trace.o lockdep-trace.o rcu-trace.o \
> +				mutex-trace.o
>  
>  ifeq ($(CONFIG_NET),y)
>  ifdef CONFIG_FTRACE
> diff --git a/ltt/probes/mutex-trace.c b/ltt/probes/mutex-trace.c
> new file mode 100644
> index 0000000..3b1ebeb
> --- /dev/null
> +++ b/ltt/probes/mutex-trace.c
> @@ -0,0 +1,43 @@
> +/*
> + * ltt/probes/fs-trace.c
> + *

Perhaps you meant mutex-trace.c ? :)

> + * Mutex tracepoint probes.
> + */
> +
> +#include <linux/module.h>
> +#include <trace/mutex.h>
> +
> +static unsigned long mutex_times[PID_MAX_DEFAULT+1];
> +static int mutex_wakeup[PID_MAX_DEFAULT+1];
> +

What happens if PID_MAX_LIMIT > PID_MAX_DEFAULT ?

What happens when a process ID gets reused (exit/fork reusing a PID) ?

Could you add this information to the task_structs or does it really
have to be separate ?

Mathieu

> +void probe_mutex_lock_contention(int pid, unsigned long time)
> +{
> +	mutex_times[pid] = (unsigned long)time;	
> +
> +	trace_mark_tp(mutex_lock_contention, mutex_lock_contention,
> +		probe_mutex_lock_contention, "pid %d time %lu", pid, time);
> +}
> +
> +void probe_mutex_lock_contention_length(char *name, int pid, unsigned long time)
> +{
> +	unsigned long delay = time - mutex_times[pid];
> +	int reason = mutex_wakeup[pid];
> +
> +	/* FIXME: why is name sometimes null? */
> +	if (!name)
> +		return;
> +
> +	trace_mark_tp(mutex_lock_contention_length, mutex_lock_contention_length,
> +		probe_mutex_lock_contention_length, 
> +		"name %s pid %d delay %lu cause %d", 
> +		name, pid, delay, reason);
> +}
> +
> +void probe_mutex_wakeup(int target_pid, int sender_pid)
> +{
> +	mutex_wakeup[target_pid] = sender_pid;
> +
> +	trace_mark_tp(mutex_wakeup, mutex_wakeup,
> +		probe_mutex_wakeup, "pid %d pid %d", target_pid, sender_pid);
> +}
> +
> 
> 
> # mutex.stp
> 
> probe kernel.mark("mutex_lock_contention") 
> { 
> }
> 
> probe kernel.mark("mutex_wakeup") 
> { 
> }
> 
> probe kernel.mark("mutex_lock_contention_length") 
> { 
> 	name = $arg1
> 	pid = $arg2
> 	delay = $arg3
> 	cause = $arg4
> 
> 	printf("name: %s, pid name: %s, pid: %d, jiffies: %d cause: %d\n", 
> 		name, execname(), pid, delay, cause);
> }
> 

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ