[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20080819173721.750d489e.kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 19 Aug 2008 17:37:21 +0900
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>
Cc: LKML <linux-kernel@...r.kernel.org>,
"balbir@...ux.vnet.ibm.com" <balbir@...ux.vnet.ibm.com>,
"yamamoto@...inux.co.jp" <yamamoto@...inux.co.jp>,
"nishimura@....nes.nec.co.jp" <nishimura@....nes.nec.co.jp>,
ryov@...inux.co.jp
Subject: [PATCH -mm][preview] memcg: a patch series for next [1/9]
Replasce res_counter with new mem_counter to do complex counting.
This patch is for mem+swap controller.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>
mm/memcontrol.c | 160 ++++++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 139 insertions(+), 21 deletions(-)
Index: linux-2.6.27-rc1-mm1/mm/memcontrol.c
===================================================================
--- linux-2.6.27-rc1-mm1.orig/mm/memcontrol.c
+++ linux-2.6.27-rc1-mm1/mm/memcontrol.c
@@ -116,12 +116,20 @@ struct mem_cgroup_lru_info {
* no reclaim occurs from a cgroup at it's low water mark, this is
* a feature that will be implemented much later in the future.
*/
+struct mem_counter {
+ unsigned long pages_limit;
+ unsigned long pages;
+ unsigned long failcnt;
+ unsigned long max_usage;
+ spinlock_t lock;
+};
+
struct mem_cgroup {
struct cgroup_subsys_state css;
/*
* the counter to account for memory usage
*/
- struct res_counter res;
+ struct mem_counter res;
/*
* Per cgroup active and inactive list, similar to the
* per zone LRU lists.
@@ -181,6 +189,16 @@ enum charge_type {
MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */
};
+/* Private File ID for memcg */
+enum {
+ MEMCG_FILE_TYPE_PAGE_LIMIT,
+ MEMCG_FILE_TYPE_PAGE_USAGE,
+ MEMCG_FILE_TYPE_FAILCNT,
+ MEMCG_FILE_TYPE_MAX_USAGE,
+};
+
+
+
/*
* Always modified under lru lock. Then, not necessary to preempt_disable()
*/
@@ -279,6 +297,74 @@ static void unlock_page_cgroup(struct pa
bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
}
+/*
+ * counter for memory resource accounting.
+ *
+ */
+static void mem_counter_init(struct mem_cgroup *memcg)
+{
+ spin_lock_init(&memcg->res.lock);
+ memcg->res.pages = 0;
+ memcg->res.pages_limit = ~0UL;
+ memcg->res.failcnt = 0;
+}
+
+static int mem_counter_charge(struct mem_cgroup *memcg, long num)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&memcg->res.lock, flags);
+ if (memcg->res.pages + num > memcg->res.pages_limit) {
+ memcg->res.failcnt++;
+ spin_unlock_irqrestore(&memcg->res.lock, flags);
+ return -EBUSY;
+ }
+ memcg->res.pages += num;
+ if (memcg->res.pages > memcg->res.max_usage)
+ memcg->res.max_usage = memcg->res.pages;
+ spin_unlock_irqrestore(&memcg->res.lock, flags);
+ return 0;
+}
+
+static inline void mem_counter_uncharge(struct mem_cgroup *memcg, long num)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&memcg->res.lock, flags);
+ memcg->res.pages -= num;
+ BUG_ON(memcg->res.pages < 0);
+ spin_unlock_irqrestore(&memcg->res.lock, flags);
+}
+
+static int mem_counter_set_pages_limit(struct mem_cgroup *memcg,
+ unsigned long lim)
+{
+ unsigned long flags;
+ int ret = -EBUSY;
+
+ spin_lock_irqsave(&memcg->res.lock, flags);
+ if (memcg->res.pages < lim) {
+ memcg->res.pages_limit = lim;
+ ret = 0;
+ }
+ spin_unlock_irqrestore(&memcg->res.lock, flags);
+
+ return ret;
+}
+
+static int __mem_counter_check_under_limit(struct mem_cgroup *memcg)
+{
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&memcg->res.lock, flags);
+ if (memcg->res.pages < memcg->res.pages_limit)
+ ret = 1;
+ spin_unlock_irqrestore(&memcg->res.lock, flags);
+
+ return ret;
+}
+
static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
struct page_cgroup *pc)
{
@@ -402,7 +488,7 @@ int mem_cgroup_calc_mapped_ratio(struct
* usage is recorded in bytes. But, here, we assume the number of
* physical pages can be represented by "long" on any arch.
*/
- total = (long) (mem->res.usage >> PAGE_SHIFT) + 1L;
+ total = (long) (mem->res.pages >> PAGE_SHIFT) + 1L;
rss = (long)mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS);
return (int)((rss * 100L) / total);
}
@@ -544,7 +630,7 @@ static int mem_cgroup_charge_common(stru
css_get(&memcg->css);
}
- while (res_counter_charge(&mem->res, PAGE_SIZE)) {
+ while (mem_counter_charge(mem, 1)) {
if (!(gfp_mask & __GFP_WAIT))
goto out;
@@ -558,7 +644,7 @@ static int mem_cgroup_charge_common(stru
* Check the limit again to see if the reclaim reduced the
* current usage of the cgroup before giving up
*/
- if (res_counter_check_under_limit(&mem->res))
+ if (__mem_counter_check_under_limit(mem))
continue;
if (!nr_retries--) {
@@ -585,7 +671,7 @@ static int mem_cgroup_charge_common(stru
lock_page_cgroup(page);
if (unlikely(page_get_page_cgroup(page))) {
unlock_page_cgroup(page);
- res_counter_uncharge(&mem->res, PAGE_SIZE);
+ mem_counter_uncharge(mem, 1);
css_put(&mem->css);
kmem_cache_free(page_cgroup_cache, pc);
goto done;
@@ -701,7 +787,7 @@ __mem_cgroup_uncharge_common(struct page
unlock_page_cgroup(page);
mem = pc->mem_cgroup;
- res_counter_uncharge(&mem->res, PAGE_SIZE);
+ mem_counter_uncharge(mem, 1);
css_put(&mem->css);
kmem_cache_free(page_cgroup_cache, pc);
@@ -807,8 +893,9 @@ int mem_cgroup_resize_limit(struct mem_c
int retry_count = MEM_CGROUP_RECLAIM_RETRIES;
int progress;
int ret = 0;
+ unsigned long pages = (unsigned long)(val >> PAGE_SHIFT);
- while (res_counter_set_limit(&memcg->res, val)) {
+ while (mem_counter_set_pages_limit(memcg, pages)) {
if (signal_pending(current)) {
ret = -EINTR;
break;
@@ -882,7 +969,7 @@ static int mem_cgroup_force_empty(struct
* active_list <-> inactive_list while we don't take a lock.
* So, we have to do loop here until all lists are empty.
*/
- while (mem->res.usage > 0) {
+ while (mem->res.pages > 0) {
if (atomic_read(&mem->css.cgroup->count) > 0)
goto out;
for_each_node_state(node, N_POSSIBLE)
@@ -902,13 +989,44 @@ out:
static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
{
- return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res,
- cft->private);
+ struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ unsigned long long ret;
+
+ switch (cft->private) {
+ case MEMCG_FILE_TYPE_PAGE_USAGE:
+ ret = memcg->res.pages << PAGE_SHIFT;
+ break;
+ case MEMCG_FILE_TYPE_MAX_USAGE:
+ ret = memcg->res.max_usage << PAGE_SHIFT;
+ break;
+ case MEMCG_FILE_TYPE_PAGE_LIMIT:
+ ret = memcg->res.pages_limit << PAGE_SHIFT;
+ break;
+ case MEMCG_FILE_TYPE_FAILCNT:
+ ret = memcg->res.failcnt << PAGE_SHIFT;
+ break;
+ default:
+ BUG();
+ }
+ return ret;
}
+
/*
* The user of this function is...
* RES_LIMIT.
*/
+
+static int call_memparse(const char *buf, unsigned long long *val)
+{
+ char *end;
+
+ *val = memparse((char *)buf, &end);
+ if (*end != '\0')
+ return -EINVAL;
+ *val = PAGE_ALIGN(*val);
+ return 0;
+}
+
static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
const char *buffer)
{
@@ -917,9 +1035,9 @@ static int mem_cgroup_write(struct cgrou
int ret;
switch (cft->private) {
- case RES_LIMIT:
+ case MEMCG_FILE_TYPE_PAGE_LIMIT:
/* This function does all necessary parse...reuse it */
- ret = res_counter_memparse_write_strategy(buffer, &val);
+ ret = call_memparse(buffer, &val);
if (!ret)
ret = mem_cgroup_resize_limit(memcg, val);
break;
@@ -936,11 +1054,11 @@ static int mem_cgroup_reset(struct cgrou
mem = mem_cgroup_from_cont(cont);
switch (event) {
- case RES_MAX_USAGE:
- res_counter_reset_max(&mem->res);
+ case MEMCG_FILE_TYPE_MAX_USAGE:
+ mem->res.max_usage = 0;
break;
- case RES_FAILCNT:
- res_counter_reset_failcnt(&mem->res);
+ case MEMCG_FILE_TYPE_FAILCNT:
+ mem->res.failcnt = 0;
break;
}
return 0;
@@ -1005,24 +1123,24 @@ static int mem_control_stat_show(struct
static struct cftype mem_cgroup_files[] = {
{
.name = "usage_in_bytes",
- .private = RES_USAGE,
+ .private = MEMCG_FILE_TYPE_PAGE_USAGE,
.read_u64 = mem_cgroup_read,
},
{
.name = "max_usage_in_bytes",
- .private = RES_MAX_USAGE,
+ .private = MEMCG_FILE_TYPE_MAX_USAGE,
.trigger = mem_cgroup_reset,
.read_u64 = mem_cgroup_read,
},
{
.name = "limit_in_bytes",
- .private = RES_LIMIT,
+ .private = MEMCG_FILE_TYPE_PAGE_LIMIT,
.write_string = mem_cgroup_write,
.read_u64 = mem_cgroup_read,
},
{
.name = "failcnt",
- .private = RES_FAILCNT,
+ .private = MEMCG_FILE_TYPE_FAILCNT,
.trigger = mem_cgroup_reset,
.read_u64 = mem_cgroup_read,
},
@@ -1111,7 +1229,7 @@ mem_cgroup_create(struct cgroup_subsys *
return ERR_PTR(-ENOMEM);
}
- res_counter_init(&mem->res);
+ mem_counter_init(mem);
for_each_node_state(node, N_POSSIBLE)
if (alloc_mem_cgroup_per_zone_info(mem, node))
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists