lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20111223134854.GA674@quack.suse.cz>
Date:	Fri, 23 Dec 2011 14:48:54 +0100
From:	Jan Kara <jack@...e.cz>
To:	Wu Fengguang <fengguang.wu@...el.com>
Cc:	Andrew Morton <akpm@...ux-foundation.org>,
	Andi Kleen <andi@...stfloor.org>, Ingo Molnar <mingo@...e.hu>,
	Jens Axboe <axboe@...nel.dk>,
	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	Rik van Riel <riel@...hat.com>,
	Linux Memory Management List <linux-mm@...ck.org>,
	"linux-fsdevel@...r.kernel.org" <linux-fsdevel@...r.kernel.org>,
	LKML <linux-kernel@...r.kernel.org>, Jan Kara <jack@...e.cz>,
	Dave Chinner <david@...morbit.com>
Subject: Re: [PATCH 07/10 v2] readahead: add /debug/readahead/stats

On Fri 23-12-11 20:59:12, Wu Fengguang wrote:
> The accounting code will be compiled in by default (CONFIG_READAHEAD_STATS=y),
> and will remain inactive by default.
> 
> It can be runtime enabled/disabled through the debugfs interface
> 
> 	echo 1 > /debug/readahead/stats_enable
> 	echo 0 > /debug/readahead/stats_enable
> 
> Example output:
> (taken from a fresh booted NFS-ROOT console box with rsize=524288)
> 
> $ cat /debug/readahead/stats
> pattern     readahead    eof_hit  cache_hit         io    sync_io    mmap_io    meta_io       size async_size    io_size
> initial           702        511          0        692        692          0          0          2          0          2
> subsequent          7          0          1          7          1          1          0         23         22         23
> context           160        161          0          2          0          1          0          0          0         16
> around            184        184        177        184        184        184          0         58          0         53
> backwards           2          0          2          2          2          0          0          4          0          3
> fadvise          2593         47          8       2588       2588          0          0          1          0          1
> oversize            0          0          0          0          0          0          0          0          0          0
> random             45         20          0         44         44          0          0          1          0          1
> all              3697        923        188       3519       3511        186          0          4          0          4
> 
> The two most important columns are
> - io		number of readahead IO
> - io_size	average readahead IO size
> 
> CC: Ingo Molnar <mingo@...e.hu>
> CC: Jens Axboe <axboe@...nel.dk>
> CC: Peter Zijlstra <a.p.zijlstra@...llo.nl>
> Acked-by: Rik van Riel <riel@...hat.com>
> Signed-off-by: Wu Fengguang <fengguang.wu@...el.com>
  Looks good to me.

  Acked-by: Jan Kara <jack@...e.cz>

								Honza

> ---
>  mm/Kconfig     |   15 +++
>  mm/readahead.c |  202 +++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 217 insertions(+)
> 
> This switches to the percpu_counter facilities.
> 
> --- linux-next.orig/mm/readahead.c	2011-12-23 20:29:14.000000000 +0800
> +++ linux-next/mm/readahead.c	2011-12-23 20:50:04.000000000 +0800
> @@ -33,6 +33,202 @@ EXPORT_SYMBOL_GPL(file_ra_state_init);
>  
>  #define list_to_page(head) (list_entry((head)->prev, struct page, lru))
>  
> +#ifdef CONFIG_READAHEAD_STATS
> +#include <linux/ftrace_event.h>
> +#include <linux/seq_file.h>
> +#include <linux/debugfs.h>
> +
> +static u32 readahead_stats_enable __read_mostly;
> +
> +static const struct trace_print_flags ra_pattern_names[] = {
> +	READAHEAD_PATTERNS
> +};
> +
> +enum ra_account {
> +	/* number of readaheads */
> +	RA_ACCOUNT_COUNT,	/* readahead request */
> +	RA_ACCOUNT_EOF,		/* readahead request covers EOF */
> +	RA_ACCOUNT_CACHE_HIT,	/* readahead request covers some cached pages */
> +	RA_ACCOUNT_IOCOUNT,	/* readahead IO */
> +	RA_ACCOUNT_SYNC,	/* readahead IO that is synchronous */
> +	RA_ACCOUNT_MMAP,	/* readahead IO by mmap page faults */
> +	RA_ACCOUNT_METADATA,	/* readahead IO on metadata */
> +	/* number of readahead pages */
> +	RA_ACCOUNT_SIZE,	/* readahead size */
> +	RA_ACCOUNT_ASYNC_SIZE,	/* readahead async size */
> +	RA_ACCOUNT_ACTUAL,	/* readahead actual IO size */
> +	/* end mark */
> +	RA_ACCOUNT_MAX,
> +};
> +
> +#define RA_STAT_BATCH	(INT_MAX / 2)
> +static struct percpu_counter ra_stat[RA_PATTERN_ALL][RA_ACCOUNT_MAX];
> +
> +static inline void add_ra_stat(int i, int j, s64 amount)
> +{
> +	__percpu_counter_add(&ra_stat[i][j], amount, RA_STAT_BATCH);
> +}
> +
> +static inline void inc_ra_stat(int i, int j)
> +{
> +	add_ra_stat(i, j, 1);
> +}
> +
> +static void readahead_stats(struct address_space *mapping,
> +			    pgoff_t offset,
> +			    unsigned long req_size,
> +			    bool for_mmap,
> +			    bool for_metadata,
> +			    enum readahead_pattern pattern,
> +			    pgoff_t start,
> +			    unsigned long size,
> +			    unsigned long async_size,
> +			    int actual)
> +{
> +	pgoff_t eof = ((i_size_read(mapping->host)-1) >> PAGE_CACHE_SHIFT) + 1;
> +
> +	inc_ra_stat(pattern, RA_ACCOUNT_COUNT);
> +	add_ra_stat(pattern, RA_ACCOUNT_SIZE, size);
> +	add_ra_stat(pattern, RA_ACCOUNT_ASYNC_SIZE, async_size);
> +	add_ra_stat(pattern, RA_ACCOUNT_ACTUAL, actual);
> +
> +	if (start + size >= eof)
> +		inc_ra_stat(pattern, RA_ACCOUNT_EOF);
> +	if (actual < size)
> +		inc_ra_stat(pattern, RA_ACCOUNT_CACHE_HIT);
> +
> +	if (actual) {
> +		inc_ra_stat(pattern, RA_ACCOUNT_IOCOUNT);
> +
> +		if (start <= offset && offset < start + size)
> +			inc_ra_stat(pattern, RA_ACCOUNT_SYNC);
> +
> +		if (for_mmap)
> +			inc_ra_stat(pattern, RA_ACCOUNT_MMAP);
> +		if (for_metadata)
> +			inc_ra_stat(pattern, RA_ACCOUNT_METADATA);
> +	}
> +}
> +
> +static void readahead_stats_reset(void)
> +{
> +	int i, j;
> +
> +	for (i = 0; i < RA_PATTERN_ALL; i++)
> +		for (j = 0; j < RA_ACCOUNT_MAX; j++)
> +			percpu_counter_set(&ra_stat[i][j], 0);
> +}
> +
> +static void
> +readahead_stats_sum(long long ra_stats[RA_PATTERN_MAX][RA_ACCOUNT_MAX])
> +{
> +	int i, j;
> +
> +	for (i = 0; i < RA_PATTERN_ALL; i++)
> +		for (j = 0; j < RA_ACCOUNT_MAX; j++) {
> +			s64 n = percpu_counter_sum(&ra_stat[i][j]);
> +			ra_stats[i][j] += n;
> +			ra_stats[RA_PATTERN_ALL][j] += n;
> +		}
> +}
> +
> +static int readahead_stats_show(struct seq_file *s, void *_)
> +{
> +	long long ra_stats[RA_PATTERN_MAX][RA_ACCOUNT_MAX];
> +	int i;
> +
> +	seq_printf(s,
> +		   "%-10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s\n",
> +		   "pattern", "readahead", "eof_hit", "cache_hit",
> +		   "io", "sync_io", "mmap_io", "meta_io",
> +		   "size", "async_size", "io_size");
> +
> +	memset(ra_stats, 0, sizeof(ra_stats));
> +	readahead_stats_sum(ra_stats);
> +
> +	for (i = 0; i < RA_PATTERN_MAX; i++) {
> +		unsigned long count = ra_stats[i][RA_ACCOUNT_COUNT];
> +		unsigned long iocount = ra_stats[i][RA_ACCOUNT_IOCOUNT];
> +		/*
> +		 * avoid division-by-zero
> +		 */
> +		if (count == 0)
> +			count = 1;
> +		if (iocount == 0)
> +			iocount = 1;
> +
> +		seq_printf(s, "%-10s %10lld %10lld %10lld %10lld %10lld "
> +			   "%10lld %10lld %10lld %10lld %10lld\n",
> +				ra_pattern_names[i].name,
> +				ra_stats[i][RA_ACCOUNT_COUNT],
> +				ra_stats[i][RA_ACCOUNT_EOF],
> +				ra_stats[i][RA_ACCOUNT_CACHE_HIT],
> +				ra_stats[i][RA_ACCOUNT_IOCOUNT],
> +				ra_stats[i][RA_ACCOUNT_SYNC],
> +				ra_stats[i][RA_ACCOUNT_MMAP],
> +				ra_stats[i][RA_ACCOUNT_METADATA],
> +				ra_stats[i][RA_ACCOUNT_SIZE] / count,
> +				ra_stats[i][RA_ACCOUNT_ASYNC_SIZE] / count,
> +				ra_stats[i][RA_ACCOUNT_ACTUAL] / iocount);
> +	}
> +
> +	return 0;
> +}
> +
> +static int readahead_stats_open(struct inode *inode, struct file *file)
> +{
> +	return single_open(file, readahead_stats_show, NULL);
> +}
> +
> +static ssize_t readahead_stats_write(struct file *file, const char __user *buf,
> +				     size_t size, loff_t *offset)
> +{
> +	readahead_stats_reset();
> +	return size;
> +}
> +
> +static const struct file_operations readahead_stats_fops = {
> +	.owner		= THIS_MODULE,
> +	.open		= readahead_stats_open,
> +	.write		= readahead_stats_write,
> +	.read		= seq_read,
> +	.llseek		= seq_lseek,
> +	.release	= single_release,
> +};
> +
> +static int __init readahead_create_debugfs(void)
> +{
> +	struct dentry *root;
> +	struct dentry *entry;
> +	int i, j;
> +
> +	root = debugfs_create_dir("readahead", NULL);
> +	if (!root)
> +		goto out;
> +
> +	entry = debugfs_create_file("stats", 0644, root,
> +				    NULL, &readahead_stats_fops);
> +	if (!entry)
> +		goto out;
> +
> +	entry = debugfs_create_bool("stats_enable", 0644, root,
> +				    &readahead_stats_enable);
> +	if (!entry)
> +		goto out;
> +
> +	for (i = 0; i < RA_PATTERN_ALL; i++)
> +		for (j = 0; j < RA_ACCOUNT_MAX; j++)
> +			percpu_counter_init(&ra_stat[i][j], 0);
> +
> +	return 0;
> +out:
> +	printk(KERN_ERR "readahead: failed to create debugfs entries\n");
> +	return -ENOMEM;
> +}
> +
> +late_initcall(readahead_create_debugfs);
> +#endif
> +
>  static inline void readahead_event(struct address_space *mapping,
>  				   pgoff_t offset,
>  				   unsigned long req_size,
> @@ -44,6 +240,12 @@ static inline void readahead_event(struc
>  				   unsigned long async_size,
>  				   int actual)
>  {
> +#ifdef CONFIG_READAHEAD_STATS
> +	if (readahead_stats_enable)
> +		readahead_stats(mapping, offset, req_size,
> +				for_mmap, for_metadata,
> +				pattern, start, size, async_size, actual);
> +#endif
>  	trace_readahead(mapping, offset, req_size,
>  			pattern, start, size, async_size, actual);
>  }
> --- linux-next.orig/mm/Kconfig	2011-12-23 20:28:06.000000000 +0800
> +++ linux-next/mm/Kconfig	2011-12-23 20:29:31.000000000 +0800
> @@ -396,3 +396,18 @@ config FRONTSWAP
>  	  and swap data is stored as normal on the matching swap device.
>  
>  	  If unsure, say Y to enable frontswap.
> +
> +config READAHEAD_STATS
> +	bool "Collect page cache readahead stats"
> +	depends on DEBUG_FS
> +	default y
> +	help
> +	  This provides the readahead events accounting facilities.
> +
> +	  To do readahead accounting for a workload:
> +
> +	  echo 1 > /sys/kernel/debug/readahead/stats_enable
> +	  echo 0 > /sys/kernel/debug/readahead/stats  # reset counters
> +	  # run the workload
> +	  cat /sys/kernel/debug/readahead/stats       # check counters
> +	  echo 0 > /sys/kernel/debug/readahead/stats_enable
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ