lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 3 Nov 2009 22:11:59 +0800
From:	Wu Fengguang <fengguang.wu@...el.com>
To:	Jens Axboe <jens.axboe@...cle.com>
Cc:	Linux Kernel <linux-kernel@...r.kernel.org>, jack@...e.cz,
	chris.mason@...cle.com
Subject: Re: writeback tracing

Hi Jens,

Thanks, it's a very good step. I'll take advantage of it for writeback
testing/debugging and watch how things work :)

However you didn't plan to submit it soon?  Because tracepoints are
expected to be relatively stable, and you know that writeback is
somehow a hot topic now, hehe.

Thanks,
Fengguang


On Thu, Oct 29, 2009 at 09:55:01AM +0100, Jens Axboe wrote:
> Hi,
> 
> This is a first stab at providing some general writeback tracing
> infrastructure. I'd like some input on whatever trace points you think
> would be interesting. The goal would be to ensure that we never again
> have to do debug printk() patches to trace this stuff.
> 
> Patch is against latest -git.
> 
> commit 607eadcebf9f137ec445cb9883312c821b37a3a7
> Author: Jens Axboe <jens.axboe@...cle.com>
> Date:   Thu Oct 29 10:54:00 2009 +0100
> 
>     writeback: initial tracing support
>     
>     Trace queue/sched/exec parts of the writeback loop.
>     
>     Signed-off-by: Jens Axboe <jens.axboe@...cle.com>
> 
> diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
> index 9d5360c..056dd07 100644
> --- a/fs/fs-writeback.c
> +++ b/fs/fs-writeback.c
> @@ -25,7 +25,9 @@
>  #include <linux/blkdev.h>
>  #include <linux/backing-dev.h>
>  #include <linux/buffer_head.h>
> +#include <linux/ftrace.h>
>  #include "internal.h"
> +#include <trace/events/writeback.h>
>  
>  #define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
>  
> @@ -34,33 +36,6 @@
>   */
>  int nr_pdflush_threads;
>  
> -/*
> - * Passed into wb_writeback(), essentially a subset of writeback_control
> - */
> -struct wb_writeback_args {
> -	long nr_pages;
> -	struct super_block *sb;
> -	enum writeback_sync_modes sync_mode;
> -	int for_kupdate:1;
> -	int range_cyclic:1;
> -	int for_background:1;
> -};
> -
> -/*
> - * Work items for the bdi_writeback threads
> - */
> -struct bdi_work {
> -	struct list_head list;		/* pending work list */
> -	struct rcu_head rcu_head;	/* for RCU free/clear of work */
> -
> -	unsigned long seen;		/* threads that have seen this work */
> -	atomic_t pending;		/* number of threads still to do work */
> -
> -	struct wb_writeback_args args;	/* writeback arguments */
> -
> -	unsigned long state;		/* flag bits, see WS_* */
> -};
> -
>  enum {
>  	WS_USED_B = 0,
>  	WS_ONSTACK_B,
> @@ -135,6 +110,8 @@ static void wb_work_complete(struct bdi_work *work)
>  
>  static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
>  {
> +	trace_writeback_clear(work);
> +
>  	/*
>  	 * The caller has retrieved the work arguments from this work,
>  	 * drop our reference. If this is the last ref, delete and free it
> @@ -170,12 +147,16 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
>  	 * If the default thread isn't there, make sure we add it. When
>  	 * it gets created and wakes up, we'll run this work.
>  	 */
> -	if (unlikely(list_empty_careful(&bdi->wb_list)))
> +	if (unlikely(list_empty_careful(&bdi->wb_list))) {
> +		trace_writeback_sched(bdi, work, "default");
>  		wake_up_process(default_backing_dev_info.wb.task);
> -	else {
> +	} else {
>  		struct bdi_writeback *wb = &bdi->wb;
> +		struct task_struct *task = wb->task;
>  
> -		if (wb->task)
> +		trace_writeback_sched(bdi, work, task ? "task" : "notask");
> +
> +		if (task)
>  			wake_up_process(wb->task);
>  	}
>  }
> @@ -202,6 +183,7 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
>  	work = kmalloc(sizeof(*work), GFP_ATOMIC);
>  	if (work) {
>  		bdi_work_init(work, args);
> +		trace_writeback_queue(bdi, args);
>  		bdi_queue_work(bdi, work);
>  	} else {
>  		struct bdi_writeback *wb = &bdi->wb;
> @@ -235,6 +217,7 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
>  	bdi_work_init(&work, &args);
>  	work.state |= WS_ONSTACK;
>  
> +	trace_writeback_queue(bdi, &args);
>  	bdi_queue_work(bdi, &work);
>  	bdi_wait_on_work_clear(&work);
>  }
> @@ -901,6 +884,8 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
>  		if (force_wait)
>  			work->args.sync_mode = args.sync_mode = WB_SYNC_ALL;
>  
> +		trace_writeback_exec(work);
> +
>  		/*
>  		 * If this isn't a data integrity operation, just notify
>  		 * that we have seen this work and we are now starting it.
> @@ -936,9 +921,13 @@ int bdi_writeback_task(struct bdi_writeback *wb)
>  	unsigned long wait_jiffies = -1UL;
>  	long pages_written;
>  
> +	trace_writeback_thread_start(1);
> +
>  	while (!kthread_should_stop()) {
>  		pages_written = wb_do_writeback(wb, 0);
>  
> +		trace_writeback_pages_written(pages_written);
> +
>  		if (pages_written)
>  			last_active = jiffies;
>  		else if (wait_jiffies != -1UL) {
> @@ -959,6 +948,8 @@ int bdi_writeback_task(struct bdi_writeback *wb)
>  		try_to_freeze();
>  	}
>  
> +	trace_writeback_thread_start(0);
> +
>  	return 0;
>  }
>  
> diff --git a/include/linux/writeback.h b/include/linux/writeback.h
> index 66ebddc..b12a077 100644
> --- a/include/linux/writeback.h
> +++ b/include/linux/writeback.h
> @@ -22,6 +22,33 @@ enum writeback_sync_modes {
>  };
>  
>  /*
> + * Passed into wb_writeback(), essentially a subset of writeback_control
> + */
> +struct wb_writeback_args {
> +	long nr_pages;
> +	struct super_block *sb;
> +	enum writeback_sync_modes sync_mode;
> +	int for_kupdate:1;
> +	int range_cyclic:1;
> +	int for_background:1;
> +};
> +
> +/*
> + * Work items for the bdi_writeback threads
> + */
> +struct bdi_work {
> +	struct list_head list;		/* pending work list */
> +	struct rcu_head rcu_head;	/* for RCU free/clear of work */
> +
> +	unsigned long seen;		/* threads that have seen this work */
> +	atomic_t pending;		/* number of threads still to do work */
> +
> +	struct wb_writeback_args args;	/* writeback arguments */
> +
> +	unsigned long state;		/* flag bits, see WS_* */
> +};
> +
> +/*
>   * A control structure which tells the writeback code what to do.  These are
>   * always on the stack, and hence need no locking.  They are always initialised
>   * in a manner such that unspecified fields are set to zero.
> diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
> new file mode 100644
> index 0000000..d58c75b
> --- /dev/null
> +++ b/include/trace/events/writeback.h
> @@ -0,0 +1,172 @@
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM writeback
> +
> +#if !defined(_TRACE_WRITEBACK_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_WRITEBACK_H
> +
> +#include <linux/backing-dev.h>
> +#include <linux/writeback.h>
> +
> +TRACE_EVENT(writeback_queue,
> +
> +	TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_args *args),
> +
> +	TP_ARGS(bdi, args),
> +
> +	TP_STRUCT__entry(
> +		__array(char,		name,		16)
> +		__field(long,		nr_pages)
> +		__field(int,		sb)
> +		__field(int,		sync_mode)
> +		__field(int,		for_kupdate)
> +		__field(int,		range_cyclic)
> +		__field(int,		for_background)
> +	),
> +
> +	TP_fast_assign(
> +		strncpy(__entry->name, dev_name(bdi->dev), 16);
> +		__entry->nr_pages	= args->nr_pages;
> +		__entry->sb		= !!args->sb;
> +		__entry->for_kupdate	= args->for_kupdate;
> +		__entry->range_cyclic	= args->range_cyclic;
> +		__entry->for_background	= args->for_background;
> +	),
> +
> +	TP_printk("%s: pages=%ld, sb=%d, kupdate=%d, range_cyclic=%d "
> +		  "for_background=%d", __entry->name, __entry->nr_pages,
> +			__entry->sb, __entry->for_kupdate,
> +			__entry->range_cyclic, __entry->for_background)
> +);
> +
> +TRACE_EVENT(writeback_sched,
> +
> +	TP_PROTO(struct backing_dev_info *bdi, struct bdi_work *work,
> +		 const char *msg),
> +
> +	TP_ARGS(bdi, work, msg),
> +
> +	TP_STRUCT__entry(
> +		__array(char,			name,		16)
> +		__field(unsigned int,		work)
> +		__array(char,			task,		8)
> +	),
> +
> +	TP_fast_assign(
> +		strncpy(__entry->name, dev_name(bdi->dev), 16);
> +		__entry->work = (unsigned long) work & 0xffff;
> +		snprintf(__entry->task, 8, "%s", msg);
> +	),
> +
> +	TP_printk("work=%x, task=%s", __entry->work, __entry->task)
> +);
> +
> +TRACE_EVENT(writeback_exec,
> +
> +	TP_PROTO(struct bdi_work *work),
> +
> +	TP_ARGS(work),
> +
> +	TP_STRUCT__entry(
> +		__field(unsigned int,	work)
> +		__field(long,		nr_pages)
> +		__field(int,		sb)
> +		__field(int,		sync_mode)
> +		__field(int,		for_kupdate)
> +		__field(int,		range_cyclic)
> +		__field(int,		for_background)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->work		= (unsigned long) work & 0xffff;
> +		__entry->nr_pages	= work->args.nr_pages;
> +		__entry->sb		= !!work->args.sb;
> +		__entry->for_kupdate	= work->args.for_kupdate;
> +		__entry->range_cyclic	= work->args.range_cyclic;
> +		__entry->for_background	= work->args.for_background;
> +
> +	),
> +
> +	TP_printk("work=%x pages=%ld, sb=%d, kupdate=%d, range_cyclic=%d"
> +		  " for_background=%d", __entry->work,
> +			__entry->nr_pages, __entry->sb, __entry->for_kupdate,
> +			__entry->range_cyclic, __entry->for_background)
> +);
> +
> +TRACE_EVENT(writeback_clear,
> +
> +	TP_PROTO(struct bdi_work *work),
> +
> +	TP_ARGS(work),
> +
> +	TP_STRUCT__entry(
> +		__field(struct bdi_work *,	work)
> +		__field(int,			refs)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->work		= work;
> +		__entry->refs		= atomic_read(&work->pending);
> +	),
> +
> +	TP_printk("work=%p, refs=%d", __entry->work, __entry->refs)
> +);
> +
> +TRACE_EVENT(writeback_pages_written,
> +
> +	TP_PROTO(long pages_written),
> +
> +	TP_ARGS(pages_written),
> +
> +	TP_STRUCT__entry(
> +		__field(long,		pages)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->pages		= pages_written;
> +	),
> +
> +	TP_printk("%ld", __entry->pages)
> +);
> +
> +
> +TRACE_EVENT(writeback_thread_start,
> +
> +	TP_PROTO(int start),
> +
> +	TP_ARGS(start),
> +
> +	TP_STRUCT__entry(
> +		__field(int,	start)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->start = start;
> +	),
> +
> +	TP_printk("%s", __entry->start ? "started" : "exited")
> +);
> +
> +TRACE_EVENT(writeback_bdi_register,
> +
> +	TP_PROTO(const char *name, int start),
> +
> +	TP_ARGS(name, start),
> +
> +	TP_STRUCT__entry(
> +		__array(char,	name,		16)
> +		__field(int,	start)
> +	),
> +
> +	TP_fast_assign(
> +		strncpy(__entry->name, name, 16);
> +		__entry->start = start;
> +	),
> +
> +	TP_printk("%s: %s", __entry->name,
> +			__entry->start ? "registered" : "unregistered")
> +);
> +
> +#endif /* _TRACE_WRITEBACK_H */
> +
> +/* This part must be outside protection */
> +#include <trace/define_trace.h>
> diff --git a/mm/backing-dev.c b/mm/backing-dev.c
> index 5a37e20..4f53a6d 100644
> --- a/mm/backing-dev.c
> +++ b/mm/backing-dev.c
> @@ -11,6 +11,9 @@
>  #include <linux/writeback.h>
>  #include <linux/device.h>
>  
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/writeback.h>
> +
>  void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
>  {
>  }
> @@ -570,6 +573,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
>  
>  	bdi_debug_register(bdi, dev_name(dev));
>  	set_bit(BDI_registered, &bdi->state);
> +	trace_writeback_bdi_register(dev_name(dev), 1);
>  exit:
>  	return ret;
>  }
> @@ -613,6 +617,7 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
>  void bdi_unregister(struct backing_dev_info *bdi)
>  {
>  	if (bdi->dev) {
> +		trace_writeback_bdi_register(dev_name(bdi->dev), 0);
>  		if (!bdi_cap_flush_forker(bdi))
>  			bdi_wb_shutdown(bdi);
>  		bdi_debug_unregister(bdi);
> 
> -- 
> Jens Axboe
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ