[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <alpine.LSU.2.00.1201182155480.7862@eggly.anvils>
Date: Wed, 18 Jan 2012 22:05:12 -0800 (PST)
From: Hugh Dickins <hughd@...gle.com>
To: Tejun Heo <tj@...nel.org>
cc: Li Zefan <lizf@...fujitsu.com>,
Andrew Morton <akpm@...ux-foundation.org>,
Manfred Spraul <manfred@...orfullife.com>,
KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>,
Johannes Weiner <hannes@...xchg.org>,
Ying Han <yinghan@...gle.com>,
Greg Thelen <gthelen@...gle.com>, cgroups@...r.kernel.org,
linux-mm@...ck.org, linux-kernel@...r.kernel.org
Subject: [PATCH] memcg: restore ss->id_lock to spinlock, using RCU for next
Commit c1e2ee2dc436 "memcg: replace ss->id_lock with a rwlock" has
now been seen to cause the unfair behavior we should have expected
from converting a spinlock to an rwlock: softlockup in cgroup_mkdir(),
whose get_new_cssid() is waiting for the wlock, while there are 19
tasks using the rlock in css_get_next() to get on with their memcg
workload (in an artificial test, admittedly). Yet lib/idr.c was
made suitable for RCU way back.
1. Revert that commit, restoring ss->id_lock to a spinlock.
2. Make one small adjustment to idr_get_next(): take the height from
the top layer (stable under RCU) instead of from the root (unprotected
by RCU), as idr_find() does.
3. Remove lock and unlock around css_get_next()'s call to idr_get_next():
memcg iterators (only users of css_get_next) already did rcu_read_lock(),
and comment demands that, but add a WARN_ON_ONCE to make sure of it.
Signed-off-by: Hugh Dickins <hughd@...gle.com>
---
include/linux/cgroup.h | 2 +-
kernel/cgroup.c | 19 +++++++++----------
lib/idr.c | 4 ++--
3 files changed, 12 insertions(+), 13 deletions(-)
--- 3.2.0+/include/linux/cgroup.h 2012-01-14 13:01:57.532007832 -0800
+++ linux/include/linux/cgroup.h 2012-01-18 21:21:45.695966602 -0800
@@ -535,7 +535,7 @@ struct cgroup_subsys {
struct list_head sibling;
/* used when use_id == true */
struct idr idr;
- rwlock_t id_lock;
+ spinlock_t id_lock;
/* should be defined only by modular subsystems */
struct module *module;
--- 3.2.0+/kernel/cgroup.c 2012-01-14 13:01:57.824007839 -0800
+++ linux/kernel/cgroup.c 2012-01-18 21:29:05.199958492 -0800
@@ -4939,9 +4939,9 @@ void free_css_id(struct cgroup_subsys *s
rcu_assign_pointer(id->css, NULL);
rcu_assign_pointer(css->id, NULL);
- write_lock(&ss->id_lock);
+ spin_lock(&ss->id_lock);
idr_remove(&ss->idr, id->id);
- write_unlock(&ss->id_lock);
+ spin_unlock(&ss->id_lock);
kfree_rcu(id, rcu_head);
}
EXPORT_SYMBOL_GPL(free_css_id);
@@ -4967,10 +4967,10 @@ static struct css_id *get_new_cssid(stru
error = -ENOMEM;
goto err_out;
}
- write_lock(&ss->id_lock);
+ spin_lock(&ss->id_lock);
/* Don't use 0. allocates an ID of 1-65535 */
error = idr_get_new_above(&ss->idr, newid, 1, &myid);
- write_unlock(&ss->id_lock);
+ spin_unlock(&ss->id_lock);
/* Returns error when there are no free spaces for new ID.*/
if (error) {
@@ -4985,9 +4985,9 @@ static struct css_id *get_new_cssid(stru
return newid;
remove_idr:
error = -ENOSPC;
- write_lock(&ss->id_lock);
+ spin_lock(&ss->id_lock);
idr_remove(&ss->idr, myid);
- write_unlock(&ss->id_lock);
+ spin_unlock(&ss->id_lock);
err_out:
kfree(newid);
return ERR_PTR(error);
@@ -4999,7 +4999,7 @@ static int __init_or_module cgroup_init_
{
struct css_id *newid;
- rwlock_init(&ss->id_lock);
+ spin_lock_init(&ss->id_lock);
idr_init(&ss->idr);
newid = get_new_cssid(ss, 0);
@@ -5087,6 +5087,8 @@ css_get_next(struct cgroup_subsys *ss, i
return NULL;
BUG_ON(!ss->use_id);
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
/* fill start point for scan */
tmpid = id;
while (1) {
@@ -5094,10 +5096,7 @@ css_get_next(struct cgroup_subsys *ss, i
* scan next entry from bitmap(tree), tmpid is updated after
* idr_get_next().
*/
- read_lock(&ss->id_lock);
tmp = idr_get_next(&ss->idr, &tmpid);
- read_unlock(&ss->id_lock);
-
if (!tmp)
break;
if (tmp->depth >= depth && tmp->stack[depth] == rootid) {
--- 3.2.0+/lib/idr.c 2012-01-04 15:55:44.000000000 -0800
+++ linux/lib/idr.c 2012-01-18 21:25:36.947963342 -0800
@@ -605,11 +605,11 @@ void *idr_get_next(struct idr *idp, int
int n, max;
/* find first ent */
- n = idp->layers * IDR_BITS;
- max = 1 << n;
p = rcu_dereference_raw(idp->top);
if (!p)
return NULL;
+ n = (p->layer + 1) * IDR_BITS;
+ max = 1 << n;
while (id < max) {
while (n > 0 && p) {
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists