lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20100924181550.d1757901.kamezawa.hiroyu@jp.fujitsu.com>
Date:	Fri, 24 Sep 2010 18:15:50 +0900
From:	KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>
To:	KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>
Cc:	"linux-mm@...ck.org" <linux-mm@...ck.org>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
	"balbir@...ux.vnet.ibm.com" <balbir@...ux.vnet.ibm.com>,
	"nishimura@....nes.nec.co.jp" <nishimura@....nes.nec.co.jp>,
	"akpm@...ux-foundation.org" <akpm@...ux-foundation.org>
Subject: [RFC][PATCH 1/2] memcg: special ID lookup routine

From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>

It seems previous patches are not welcomed, this is a revised one.
My purpose is to replace pc->mem_cgroup to be pc->mem_cgroup_id and to prevent
using more memory when pc->blkio_cgroup_id is added.

As 1st step, this patch implements a lookup table from ID.
For usual lookup, css_lookup() will work enough well but it may have to
access several level of idr radix-tree. Memory cgroup's limit is 65536 and
as far as I here, there are a user who uses 2000+ memory cgroup on a system.
And with generic rcu based lookup routine, the caller has to

Type A:
	rcu_read_lock()
	obj = obj_lookup()
	atomic_inc(obj->refcnt)
	rcu_read_unlock()
	/* do jobs */
Type B:
	rcu_read_lock()
	obj = rcu_lookup()
	/* do jobs */
	rcu_read_unlock()

Under some spinlock in many case.
(Type A is very bad in busy routine and even type B has to check the
 object is alive or not. It's not no cost)
This is complicated.

Because page_cgroup -> mem_cgroup information is required at every LRU
operatons, I think it's worth to add a special lookup routine for reducing
cache footprint and, with some limitaton, lookup routine can be RCU free.

Note:
 - memcg_lookup() is defined but not used. it's called in other patch.

Changelog:
 - no hooks to cgroup.
 - no limitation of the number of memcg.
 - delay table allocation until memory cgroup is really used.
 - No RCU routine. (depends on the limitation to callers newly added.)

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>
---
 mm/memcontrol.c |   67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

Index: mmotm-0922/mm/memcontrol.c
===================================================================
--- mmotm-0922.orig/mm/memcontrol.c
+++ mmotm-0922/mm/memcontrol.c
@@ -198,6 +198,7 @@ static void mem_cgroup_oom_notify(struct
  */
 struct mem_cgroup {
 	struct cgroup_subsys_state css;
+	bool	cached;
 	/*
 	 * the counter to account for memory usage
 	 */
@@ -352,6 +353,65 @@ static void mem_cgroup_put(struct mem_cg
 static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem);
 static void drain_all_stock_async(void);
 
+#define MEMCG_ARRAY_SIZE	(sizeof(struct mem_cgroup *) *(65536))
+struct mem_cgroup **memcg_array __read_mostly;
+DEFINE_SPINLOCK(memcg_array_lock);
+
+/*
+ * A quick lookup routine for memory cgroup via ID. This can be used
+ * until destroy() is called against memory cgroup. Then, in most case,
+ * there must be page_cgroups or tasks which points to memcg.
+ * So, cannot be used for swap_cgroup reference.
+ */
+static struct mem_cgroup *memcg_lookup(int id)
+{
+	if (id == 0)
+		return NULL;
+	if (id == 1)
+		return root_mem_cgroup;
+	return *(memcg_array + id);
+}
+
+static void memcg_lookup_set(struct mem_cgroup *mem)
+{
+	int id;
+
+	if (likely(mem->cached) || mem == root_mem_cgroup)
+		return;
+	id = css_id(&mem->css);
+	/* There are race with other "set" entry. need to avoid double refcnt */
+	spin_lock(&memcg_array_lock);
+	if (!(*(memcg_array + id))) {
+		mem_cgroup_get(mem);
+		*(memcg_array + id) = mem;
+		mem->cached = true;
+	}
+	spin_unlock(&memcg_array_lock);
+}
+
+static void memcg_lookup_clear(struct mem_cgroup *mem)
+{
+	int id = css_id(&mem->css);
+	/* No race with other look up/set/unset entry */
+	*(memcg_array + id) = NULL;
+	mem_cgroup_put(mem);
+}
+
+static int init_mem_cgroup_lookup_array(void)
+{
+	int size;
+
+	if (memcg_array)
+		return 0;
+
+	size = MEMCG_ARRAY_SIZE;
+	memcg_array = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
+				PAGE_KERNEL);
+	if (!memcg_array)
+		return -ENOMEM;
+
+	return 0;
+}
 
 static struct mem_cgroup_per_zone *
 mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
@@ -2096,6 +2156,7 @@ static void __mem_cgroup_commit_charge(s
 		mem_cgroup_cancel_charge(mem);
 		return;
 	}
+	memcg_lookup_set(mem);
 
 	pc->mem_cgroup = mem;
 	/*
@@ -4341,6 +4402,10 @@ mem_cgroup_create(struct cgroup_subsys *
 		}
 		hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
 	} else {
+		/* Allocation of lookup array is delayd until creat cgroup */
+		error = init_mem_cgroup_lookup_array();
+		if (error == -ENOMEM)
+			goto free_out;
 		parent = mem_cgroup_from_cont(cont->parent);
 		mem->use_hierarchy = parent->use_hierarchy;
 		mem->oom_kill_disable = parent->oom_kill_disable;
@@ -4389,6 +4454,8 @@ static void mem_cgroup_destroy(struct cg
 {
 	struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
 
+	memcg_lookup_clear(mem);
+
 	mem_cgroup_put(mem);
 }
 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ