lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Date:	Wed, 10 Sep 2008 15:25:51 -0700
From:	Paul Menage <menage@...gle.com>
To:	Andrew Morton <akpm@...ux-foundation.org>,
	Lai Jiangshan <laijs@...fujitsu.com>
CC:	Linux Kernel Mailing List <linux-kernel@...r.kernel.org>
Subject: [PATCH] Eliminate race in css_set refcounting

Eliminate race in css_set refcounting

This patch replaces the use of krefs in struct css_set with a plain
atomic_t, and ensures that the reference count of a css_set never hits
zero outside of a write_lock(&css_set_lock) critical section

This prevents a race between find_css_set() and put_css_set*() where
the reference count can hit zero when the css_set object is still
accessible to readers of the hash table.

Signed-off-by: Paul Menage <menage@...gle.com>

---

This patch applies to mmotm-2008-09-08 on top of Lai Jiangshan's
previous patch. The previous patch narrowed the race, but still left a
hole whereby a new reader could take a reference on a to-be-deleted
css_set and then drop the reference immediately, leading to two
threads both seeing the refcount drop to zero (and hence wanting to
delete it).

The code sequence in __put_css_set() could be extracted into a
atomic_dec_and_write_lock() function if considered appropriate.

 include/linux/cgroup.h |    3 --
 kernel/cgroup.c        |   51 ++++++++++++++-----------------------------------
 kernel/cgroup_debug.c  |    4 +--
 3 files changed, 18 insertions(+), 40 deletions(-)

Index: css-mmotm-20080908/include/linux/cgroup.h
===================================================================
--- css-mmotm-20080908.orig/include/linux/cgroup.h
+++ css-mmotm-20080908/include/linux/cgroup.h
@@ -9,7 +9,6 @@
  */
 
 #include <linux/sched.h>
-#include <linux/kref.h>
 #include <linux/cpumask.h>
 #include <linux/nodemask.h>
 #include <linux/rcupdate.h>
@@ -159,7 +158,7 @@ struct cgroup {
 struct css_set {
 
 	/* Reference count */
-	struct kref ref;
+	atomic_t refcount;
 
 	/*
 	 * List running through all cgroup groups in the same hash
Index: css-mmotm-20080908/kernel/cgroup.c
===================================================================
--- css-mmotm-20080908.orig/kernel/cgroup.c
+++ css-mmotm-20080908/kernel/cgroup.c
@@ -252,14 +252,18 @@ static void unlink_css_set(struct css_se
 	}
 }
 
-static void __release_css_set(struct kref *k, int taskexit)
+static void __put_css_set(struct css_set *cg, int taskexit)
 {
 	int i;
-	struct css_set *cg = container_of(k, struct css_set, ref);
-
+	/*
+	 * Ensure that the refcount doesn't hit zero while any readers
+	 * can see it. Similar to atomic_dec_and_lock(), but for an
+	 * rwlock
+	 */
+	if (atomic_add_unless(&cg->refcount, -1, 1))
+		return;
 	write_lock(&css_set_lock);
-	if (atomic_read(&k->refcount) > 0) {
-		/* See find_css_set()'s read_lock()ed section */
+	if (!atomic_dec_and_test(&cg->refcount)) {
 		write_unlock(&css_set_lock);
 		return;
 	}
@@ -280,32 +284,22 @@ static void __release_css_set(struct kre
 	kfree(cg);
 }
 
-static void release_css_set(struct kref *k)
-{
-	__release_css_set(k, 0);
-}
-
-static void release_css_set_taskexit(struct kref *k)
-{
-	__release_css_set(k, 1);
-}
-
 /*
  * refcounted get/put for css_set objects
  */
 static inline void get_css_set(struct css_set *cg)
 {
-	kref_get(&cg->ref);
+	atomic_inc(&cg->refcount);
 }
 
 static inline void put_css_set(struct css_set *cg)
 {
-	kref_put(&cg->ref, release_css_set);
+	__put_css_set(cg, 0);
 }
 
 static inline void put_css_set_taskexit(struct css_set *cg)
 {
-	kref_put(&cg->ref, release_css_set_taskexit);
+	__put_css_set(cg, 1);
 }
 
 /*
@@ -414,20 +408,6 @@ static struct css_set *find_css_set(
 	 * the desired set */
 	read_lock(&css_set_lock);
 	res = find_existing_css_set(oldcg, cgrp, template);
-	/*
-	 * put_css_set[_taskexit]() may race with find_css_set(), in that
-	 * find_css_set() got the css_set after put_css_set() had released it.
-	 *
-	 * We should put the whole put_css_set[_taskexit]() into css_set_lock's
-	 * write_lock critical setion to avoid this race. But it will increase
-	 * overhead for do_exit().
-	 *
-	 * So we do not avoid this race but put it under control:
-	 * __release_css_set() will re-check the refcount
-	 * with css_set_lock held.
-	 *
-	 * This race may trigger the warning in kref_get().
-	 */
 	if (res)
 		get_css_set(res);
 	read_unlock(&css_set_lock);
@@ -445,7 +425,7 @@ static struct css_set *find_css_set(
 		return NULL;
 	}
 
-	kref_init(&res->ref);
+	atomic_set(&res->refcount, 1);
 	INIT_LIST_HEAD(&res->cg_links);
 	INIT_LIST_HEAD(&res->tasks);
 	INIT_HLIST_NODE(&res->hlist);
@@ -1754,7 +1734,7 @@ int cgroup_task_count(const struct cgrou
 
 	read_lock(&css_set_lock);
 	list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
-		count += atomic_read(&link->cg->ref.refcount);
+		count += atomic_read(&link->cg->refcount);
 	}
 	read_unlock(&css_set_lock);
 	return count;
@@ -2572,8 +2552,7 @@ static void __init cgroup_init_subsys(st
 int __init cgroup_init_early(void)
 {
 	int i;
-	kref_init(&init_css_set.ref);
-	kref_get(&init_css_set.ref);
+	atomic_set(&init_css_set.refcount, 1);
 	INIT_LIST_HEAD(&init_css_set.cg_links);
 	INIT_LIST_HEAD(&init_css_set.tasks);
 	INIT_HLIST_NODE(&init_css_set.hlist);
Index: css-mmotm-20080908/kernel/cgroup_debug.c
===================================================================
--- css-mmotm-20080908.orig/kernel/cgroup_debug.c
+++ css-mmotm-20080908/kernel/cgroup_debug.c
@@ -57,7 +57,7 @@ static u64 current_css_set_refcount_read
 	u64 count;
 
 	rcu_read_lock();
-	count = atomic_read(&current->cgroups->ref.refcount);
+	count = atomic_read(&current->cgroups->refcount);
 	rcu_read_unlock();
 	return count;
 }
@@ -90,7 +90,7 @@ static struct cftype files[] =  {
 	{
 		.name = "releasable",
 		.read_u64 = releasable_read,
-	}
+	},
 };
 
 static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ