lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20110311115235.3663792c.kamezawa.hiroyu@jp.fujitsu.com>
Date:	Fri, 11 Mar 2011 11:52:35 +0900
From:	KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>
To:	Vivek Goyal <vgoyal@...hat.com>
Cc:	Dave Chinner <david@...morbit.com>,
	Chris Mason <chris.mason@...cle.com>,
	Andreas Dilger <adilger@...ger.ca>,
	Justin TerAvest <teravest@...gle.com>,
	m-ikeda <m-ikeda@...jp.nec.com>, jaxboe <jaxboe@...ionio.com>,
	linux-kernel <linux-kernel@...r.kernel.org>,
	ryov <ryov@...inux.co.jp>, taka <taka@...inux.co.jp>,
	"righi.andrea" <righi.andrea@...il.com>,
	guijianfeng <guijianfeng@...fujitsu.com>,
	balbir <balbir@...ux.vnet.ibm.com>,
	ctalbott <ctalbott@...gle.com>, nauman <nauman@...gle.com>,
	mrubin <mrubin@...gle.com>,
	linux-fsdevel <linux-fsdevel@...r.kernel.org>
Subject: Re: [RFC] Storing cgroup id in page->private (Was: Re: [RFC] [PATCH
 0/6] Provide cgroup isolation for buffered writes.)

On Thu, 10 Mar 2011 21:15:31 -0500
Vivek Goyal <vgoyal@...hat.com> wrote:
 
> > IMO, if you really need some per-page information, then just put it
> > in the struct page - you can't hide the memory overhead just by
> > having the filesystem to store it for you. That just adds
> > unnecessary complexity...
> 
> Ok. I guess adding anything to struct page is going to be hard and 
> we might have to fall back to looking into using page_cgroup for
> tracking page state. I was trying to explore the ways so that we don't
> have to instantiate whole page_cgroup structure just for trying
> to figure out who dirtied the page.
> 

Is this bad ?
==

At handling ASYNC I/O in blkio layer, it's unknown that who dirtied the page.
This lack of information makes impossible to throttole Async I/O per
cgroup in blkio queue layer.

This patch records the information into radix-tree and preserve the
information. There is no 'clear' operation because all I/O starts when
the page is marked as DIRTY.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>
---
 include/linux/radix-tree.h |    3 +++
 lib/radix-tree.c           |   42 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)

Index: mmotm-Mar10/include/linux/radix-tree.h
===================================================================
--- mmotm-Mar10.orig/include/linux/radix-tree.h
+++ mmotm-Mar10/include/linux/radix-tree.h
@@ -58,12 +58,14 @@ struct radix_tree_root {
 	unsigned int		height;
 	gfp_t			gfp_mask;
 	struct radix_tree_node	__rcu *rnode;
+	int			iohint;
 };
 
 #define RADIX_TREE_INIT(mask)	{					\
 	.height = 0,							\
 	.gfp_mask = (mask),						\
 	.rnode = NULL,							\
+	.iohint = 0,							\
 }
 
 #define RADIX_TREE(name, mask) \
@@ -74,6 +76,7 @@ do {									\
 	(root)->height = 0;						\
 	(root)->gfp_mask = (mask);					\
 	(root)->rnode = NULL;						\
+	(root)->iohint = 0;						\
 } while (0)
 
 /**
Index: mmotm-Mar10/lib/radix-tree.c
===================================================================
--- mmotm-Mar10.orig/lib/radix-tree.c
+++ mmotm-Mar10/lib/radix-tree.c
@@ -31,6 +31,7 @@
 #include <linux/string.h>
 #include <linux/bitops.h>
 #include <linux/rcupdate.h>
+#include <linux/blkdev.h>
 
 
 #ifdef __KERNEL__
@@ -51,6 +52,9 @@ struct radix_tree_node {
 	struct rcu_head	rcu_head;
 	void __rcu	*slots[RADIX_TREE_MAP_SIZE];
 	unsigned long	tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
+#ifdef CONFIG_BLK_CGROUP
+	unsigned short  iohint[RADIX_TREE_MAP_SIZE];
+#endif
 };
 
 struct radix_tree_path {
@@ -473,6 +477,8 @@ void *radix_tree_tag_set(struct radix_tr
 		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
 		if (!tag_get(slot, tag, offset))
 			tag_set(slot, tag, offset);
+		if (height == 1 && slot && tag == PAGECACHE_TAG_DIRTY)
+			blkio_record_hint(&slot->iohint[offset]);
 		slot = slot->slots[offset];
 		BUG_ON(slot == NULL);
 		shift -= RADIX_TREE_MAP_SHIFT;
@@ -1418,3 +1425,38 @@ void __init radix_tree_init(void)
 	radix_tree_init_maxindex();
 	hotcpu_notifier(radix_tree_callback, 0);
 }
+
+#ifdef CONFIG_BLK_CGROUP
+
+unsigned short radix_tree_lookup_iohint(struct radix_tree_root *root,
+				int index)
+{
+	unsigned int height, shift;
+	struct radix_tree_node *node;
+
+	node = rcu_redereference(root->rnode);
+	if (node == NULL)
+		return 0;
+	if (!radix_tree_is_indirect_ptr(node))
+		return root->iohint;
+	node = radxi_tree_indirect_to_ptr(node);
+
+	height = node->height;
+	if (index > radix_tree_maxindex(height))
+		return 0;
+	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+	for ( ; ; ) {
+		int offset;
+
+		if (node == NULL)
+			return 0;
+		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+		if (height == 1)
+			return node->iohint[offset];
+		node = rcu_rereference(node->slots[offset]);
+		shift -= RADIX_TREE_MAP_SHIFT;
+		height--;
+	}
+}
+EXPORT_SYMBOL(radix_tree_lookup_iohint);
+#endif

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ