lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20070514094114.GH4139@in.ibm.com>
Date:	Mon, 14 May 2007 15:11:14 +0530
From:	Bharata B Rao <bharata@...ux.vnet.ibm.com>
To:	linux-kernel@...r.kernel.org
Cc:	linux-fsdevel@...r.kernel.org, Jan Blunck <j.blunck@...harburg.de>
Subject: [RFC][PATCH  6/14] Union-mount dentry reference counting

From: Jan Blunck <j.blunck@...harburg.de>
Subject: Union-mount dentry reference counting

dget is modified to walk the union stack taking reference on every
dentry that is part of the union stack. This is necessary to ensure that
parts of union stack don't go away from under us. Since dget() takes a mutex
for walking the stack, dget can now sleep.

dput also walks the union stack and releases references to all the
dentries that are part of the union.

Since dget() can now sleep, make sure that dget() doesn't go to sleep with
any spinlocks held while it tries to get the mutex.

Signed-off-by: Jan Blunck <j.blunck@...harburg.de>
Signed-off-by: Bharata B Rao <bharata@...ux.vnet.ibm.com>
---
 fs/dcache.c                  |   43 ++++++-
 fs/dnotify.c                 |    5 
 fs/inotify.c                 |    8 +
 fs/namei.c                   |   42 +++++--
 fs/namespace.c               |   12 +-
 fs/proc/base.c               |   17 ++
 fs/union.c                   |  248 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/dcache.h       |  110 ++++++++++++++++++-
 include/linux/dcache_union.h |   65 +++++++++++
 kernel/auditsc.c             |    4 
 kernel/cpuset.c              |    4 
 kernel/fork.c                |   10 +
 net/unix/af_unix.c           |    5 
 13 files changed, 537 insertions(+), 36 deletions(-)

--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -171,8 +171,7 @@ static struct dentry *d_kill(struct dent
  *
  * no dcache lock, please.
  */
-
-void dput(struct dentry *dentry)
+void __dput_single(struct dentry *dentry)
 {
 	if (!dentry)
 		return;
@@ -190,6 +189,13 @@ repeat:
 		return;
 	}
 
+	if (!__dput_single_destroy_union(dentry)) {
+		atomic_inc(&dentry->d_count);
+		spin_unlock(&dentry->d_lock);
+		spin_unlock(&dcache_lock);
+		goto repeat;
+	}
+
 	/*
 	 * AV: ->d_delete() is _NOT_ allowed to block now.
 	 */
@@ -224,6 +230,14 @@ kill_it:
 		goto repeat;
 }
 
+void dput(struct dentry *dentry)
+{
+	if (!dentry)
+		return;
+
+	dput_common(dentry);
+}
+
 /**
  * d_invalidate - invalidate a dentry
  * @dentry: dentry to invalidate
@@ -285,6 +299,15 @@ int d_invalidate(struct dentry * dentry)
 
 static inline struct dentry * __dget_locked(struct dentry *dentry)
 {
+	/*
+	 * TODO: We come here with dcache_lock held and can't
+	 * afford to sleep now to acquire the union_lock. We should
+	 * change all the callers to acquire union_lock first using
+	 * the union_lock_spinlock() helper. Until that is done,
+	 * BUG() here.
+	 */
+	BUG_ON(IS_UNION(dentry));
+
 	atomic_inc(&dentry->d_count);
 	if (!list_empty(&dentry->d_lru)) {
 		dentry_stat.nr_unused--;
@@ -392,7 +415,7 @@ static void prune_one_dentry(struct dent
 	__d_drop(dentry);
 	dentry = d_kill(dentry);
 	if (!prune_parents) {
-		dput(dentry);
+		__dput_single(dentry);
 		spin_lock(&dcache_lock);
 		return;
 	}
@@ -947,7 +970,7 @@ struct dentry *d_alloc(struct dentry * p
 	INIT_LIST_HEAD(&dentry->d_alias);
 
 	if (parent) {
-		dentry->d_parent = dget(parent);
+		dentry->d_parent = __dget_single(parent);
 		dentry->d_sb = parent->d_sb;
 	} else {
 		INIT_LIST_HEAD(&dentry->d_u.d_child);
@@ -1879,8 +1902,10 @@ char * d_path(struct dentry *dentry, str
 		return dentry->d_op->d_dname(dentry, buf, buflen);
 
 	read_lock(&current->fs->lock);
+	union_lock_readlock(current->fs->root, &current->fs->lock);
 	rootmnt = mntget(current->fs->rootmnt);
-	root = dget(current->fs->root);
+	root = __dget(current->fs->root);
+	union_unlock(current->fs->root);
 	read_unlock(&current->fs->lock);
 	spin_lock(&dcache_lock);
 	res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen);
@@ -1940,10 +1965,14 @@ asmlinkage long sys_getcwd(char __user *
 		return -ENOMEM;
 
 	read_lock(&current->fs->lock);
+	union_lock_fs(current->fs);
 	pwdmnt = mntget(current->fs->pwdmnt);
-	pwd = dget(current->fs->pwd);
+	pwd = __dget(current->fs->pwd);
 	rootmnt = mntget(current->fs->rootmnt);
-	root = dget(current->fs->root);
+	root = __dget(current->fs->root);
+	union_unlock(current->fs->pwd);
+	union_unlock(current->fs->altroot);
+	union_unlock(current->fs->root);
 	read_unlock(&current->fs->lock);
 
 	error = -ENOENT;
--- a/fs/dnotify.c
+++ b/fs/dnotify.c
@@ -161,13 +161,16 @@ void dnotify_parent(struct dentry *dentr
 		return;
 
 	spin_lock(&dentry->d_lock);
+	union_lock_spinlock(dentry->d_parent, &dentry->d_lock);
 	parent = dentry->d_parent;
 	if (parent->d_inode->i_dnotify_mask & event) {
-		dget(parent);
+		__dget(parent);
+		union_unlock(parent);
 		spin_unlock(&dentry->d_lock);
 		__inode_dir_notify(parent->d_inode, event);
 		dput(parent);
 	} else {
+		union_unlock(parent);
 		spin_unlock(&dentry->d_lock);
 	}
 }
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -325,17 +325,21 @@ void inotify_dentry_parent_queue_event(s
 		return;
 
 	spin_lock(&dentry->d_lock);
+	union_lock_spinlock(dentry->d_parent, &dentry->d_lock);
 	parent = dentry->d_parent;
 	inode = parent->d_inode;
 
 	if (inotify_inode_watched(inode)) {
-		dget(parent);
+		__dget(parent);
+		union_unlock(parent);
 		spin_unlock(&dentry->d_lock);
 		inotify_inode_queue_event(inode, mask, cookie, name,
 					  dentry->d_inode);
 		dput(parent);
-	} else
+	} else {
+		union_unlock(parent);
 		spin_unlock(&dentry->d_lock);
+	}
 }
 EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event);
 
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -522,16 +522,23 @@ walk_init_root(const char *name, struct 
 	struct fs_struct *fs = current->fs;
 
 	read_lock(&fs->lock);
+	union_lock_fs(fs);
 	if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
 		nd->mnt = mntget(fs->altrootmnt);
-		nd->dentry = dget(fs->altroot);
+		nd->dentry = __dget(fs->altroot);
+		union_unlock(fs->pwd);
+		union_unlock(fs->altroot);
+		union_unlock(fs->root);
 		read_unlock(&fs->lock);
 		if (__emul_lookup_dentry(name,nd))
 			return 0;
 		read_lock(&fs->lock);
 	}
 	nd->mnt = mntget(fs->rootmnt);
-	nd->dentry = dget(fs->root);
+	nd->dentry = __dget(fs->root);
+	union_unlock(fs->pwd);
+	union_unlock(fs->altroot);
+	union_unlock(fs->root);
 	read_unlock(&fs->lock);
 	return 1;
 }
@@ -654,13 +661,16 @@ int follow_up(struct vfsmount **mnt, str
 	struct vfsmount *parent;
 	struct dentry *mountpoint;
 	spin_lock(&vfsmount_lock);
+	union_lock_spinlock((*mnt)->mnt_mountpoint, &vfsmount_lock);
 	parent=(*mnt)->mnt_parent;
 	if (parent == *mnt) {
+		union_unlock((*mnt)->mnt_mountpoint);
 		spin_unlock(&vfsmount_lock);
 		return 0;
 	}
 	mntget(parent);
-	mountpoint=dget((*mnt)->mnt_mountpoint);
+	mountpoint=__dget((*mnt)->mnt_mountpoint);
+	union_unlock((*mnt)->mnt_mountpoint);
 	spin_unlock(&vfsmount_lock);
 	dput(*dentry);
 	*dentry = mountpoint;
@@ -736,21 +746,27 @@ static __always_inline void follow_dotdo
 		}
                 read_unlock(&fs->lock);
 		spin_lock(&dcache_lock);
+		union_lock_spinlock(nd->dentry->d_parent, &dcache_lock);
 		if (nd->dentry != nd->mnt->mnt_root) {
-			nd->dentry = dget(nd->dentry->d_parent);
+			nd->dentry = __dget(nd->dentry->d_parent);
+			union_unlock(nd->dentry->d_parent);
 			spin_unlock(&dcache_lock);
 			dput(old);
 			break;
 		}
+		union_unlock(nd->dentry->d_parent);
 		spin_unlock(&dcache_lock);
 		spin_lock(&vfsmount_lock);
+		union_lock_spinlock(nd->mnt->mnt_mountpoint, &vfsmount_lock);
 		parent = nd->mnt->mnt_parent;
 		if (parent == nd->mnt) {
+			union_unlock(nd->mnt->mnt_mountpoint);
 			spin_unlock(&vfsmount_lock);
 			break;
 		}
 		mntget(parent);
-		nd->dentry = dget(nd->mnt->mnt_mountpoint);
+		nd->dentry = __dget(nd->mnt->mnt_mountpoint);
+		union_unlock(nd->mnt->mnt_mountpoint);
 		spin_unlock(&vfsmount_lock);
 		dput(old);
 		mntput(nd->mnt);
@@ -1050,8 +1066,10 @@ static int __emul_lookup_dentry(const ch
 		 */
 		nd->last_type = LAST_ROOT;
 		read_lock(&fs->lock);
+		union_lock_readlock(fs->root, &fs->lock);
 		nd->mnt = mntget(fs->rootmnt);
-		nd->dentry = dget(fs->root);
+		nd->dentry = __dget(fs->root);
+		union_unlock(fs->root);
 		read_unlock(&fs->lock);
 		if (path_walk(name, nd) == 0) {
 			if (nd->dentry->d_inode) {
@@ -1114,20 +1132,26 @@ static int fastcall do_path_lookup(int d
 	if (*name=='/') {
 		read_lock(&fs->lock);
 		if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
+			union_lock_readlock(fs->altroot, &fs->lock);
 			nd->mnt = mntget(fs->altrootmnt);
-			nd->dentry = dget(fs->altroot);
+			nd->dentry = __dget(fs->altroot);
+			union_unlock(fs->altroot);
 			read_unlock(&fs->lock);
 			if (__emul_lookup_dentry(name,nd))
 				goto out; /* found in altroot */
 			read_lock(&fs->lock);
 		}
+		union_lock_readlock(fs->root, &fs->lock);
 		nd->mnt = mntget(fs->rootmnt);
-		nd->dentry = dget(fs->root);
+		nd->dentry = __dget(fs->root);
+		union_unlock(fs->root);
 		read_unlock(&fs->lock);
 	} else if (dfd == AT_FDCWD) {
 		read_lock(&fs->lock);
+		union_lock_readlock(fs->pwd, &fs->lock);
 		nd->mnt = mntget(fs->pwdmnt);
-		nd->dentry = dget(fs->pwd);
+		nd->dentry = __dget(fs->pwd);
+		union_unlock(fs->pwd);
 		read_unlock(&fs->lock);
 	} else {
 		struct dentry *dentry;
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1743,12 +1743,14 @@ void set_fs_root(struct fs_struct *fs, s
 {
 	struct dentry *old_root;
 	struct vfsmount *old_rootmnt;
+	union_lock(dentry);
 	write_lock(&fs->lock);
 	old_root = fs->root;
 	old_rootmnt = fs->rootmnt;
 	fs->rootmnt = mntget(mnt);
-	fs->root = dget(dentry);
+	fs->root = __dget(dentry);
 	write_unlock(&fs->lock);
+	union_unlock(dentry);
 	if (old_root) {
 		dput(old_root);
 		mntput(old_rootmnt);
@@ -1765,12 +1767,14 @@ void set_fs_pwd(struct fs_struct *fs, st
 	struct dentry *old_pwd;
 	struct vfsmount *old_pwdmnt;
 
+	union_lock(dentry);
 	write_lock(&fs->lock);
 	old_pwd = fs->pwd;
 	old_pwdmnt = fs->pwdmnt;
 	fs->pwdmnt = mntget(mnt);
-	fs->pwd = dget(dentry);
+	fs->pwd = __dget(dentry);
 	write_unlock(&fs->lock);
+	union_unlock(dentry);
 
 	if (old_pwd) {
 		dput(old_pwd);
@@ -1859,8 +1863,10 @@ asmlinkage long sys_pivot_root(const cha
 	}
 
 	read_lock(&current->fs->lock);
+	union_lock_readlock(current->fs->root, &current->fs->lock);
 	user_nd.mnt = mntget(current->fs->rootmnt);
-	user_nd.dentry = dget(current->fs->root);
+	user_nd.dentry = __dget(current->fs->root);
+	union_unlock(current->fs->root);
 	read_unlock(&current->fs->lock);
 	down_write(&namespace_sem);
 	mutex_lock(&old_nd.dentry->d_inode->i_mutex);
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -162,8 +162,10 @@ static int proc_cwd_link(struct inode *i
 	}
 	if (fs) {
 		read_lock(&fs->lock);
+		union_lock_readlock(fs->pwd, &fs->lock);
 		*mnt = mntget(fs->pwdmnt);
-		*dentry = dget(fs->pwd);
+		*dentry = __dget(fs->pwd);
+		union_unlock(fs->pwd);
 		read_unlock(&fs->lock);
 		result = 0;
 		put_fs_struct(fs);
@@ -183,8 +185,10 @@ static int proc_root_link(struct inode *
 	}
 	if (fs) {
 		read_lock(&fs->lock);
+		union_lock_readlock(fs->root, &fs->lock);
 		*mnt = mntget(fs->rootmnt);
-		*dentry = dget(fs->root);
+		*dentry = __dget(fs->root);
+		union_unlock(fs->root);
 		read_unlock(&fs->lock);
 		result = 0;
 		put_fs_struct(fs);
@@ -1199,19 +1203,26 @@ static int proc_fd_info(struct inode *in
 		 * We are not taking a ref to the file structure, so we must
 		 * hold ->file_lock.
 		 */
+repeat:
 		spin_lock(&files->file_lock);
 		file = fcheck_files(files, fd);
 		if (file) {
+			if (!union_trylock(file->f_path.dentry)) {
+				spin_unlock(&files->file_lock);
+				cpu_relax();
+				goto repeat;
+			}
 			if (mnt)
 				*mnt = mntget(file->f_path.mnt);
 			if (dentry)
-				*dentry = dget(file->f_path.dentry);
+				*dentry = __dget(file->f_path.dentry);
 			if (info)
 				snprintf(info, PROC_FDINFO_MAX,
 					 "pos:\t%lli\n"
 					 "flags:\t0%o\n",
 					 (long long) file->f_pos,
 					 file->f_flags);
+			union_unlock(file->f_path.dentry);
 			spin_unlock(&files->file_lock);
 			put_files_struct(files);
 			return 0;
--- a/fs/union.c
+++ b/fs/union.c
@@ -11,6 +11,9 @@
  */
 
 #include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/module.h>
+#include <linux/mount.h>
 
 struct union_info * union_alloc(void)
 {
@@ -51,3 +54,248 @@ void union_put(struct union_info *info)
 
 	return;
 }
+
+void __union_check(struct dentry *dentry)
+{
+	if (likely(!(dentry->d_topmost || dentry->d_overlaid))) {
+		if (unlikely(dentry->d_union)) {
+			printk(KERN_ERR "%s: \"%s\" stale union reference\n" \
+			       "\tdentry=%p, inode=%p, count=%d, u_count=%d\n",
+			       __FUNCTION__,
+			       dentry->d_name.name,
+			       dentry,
+			       dentry->d_inode,
+			       atomic_read(&dentry->d_count),
+			       atomic_read(&dentry->d_union->u_count));
+			dump_stack();
+		}
+		return;
+	}
+
+	BUG_ON(!dentry->d_union);
+
+	if ((dentry == dentry->d_topmost) || (dentry == dentry->d_overlaid)) {
+		printk(KERN_ERR "%s: \"%s\" loop in union stack\n",
+		       __FUNCTION__, dentry->d_name.name);
+		BUG();
+	}
+
+	if (dentry->d_inode && !S_ISDIR(dentry->d_inode->i_mode)) {
+		printk(KERN_ERR "%s: \"%s\" isn't a directory!\n",
+		       __FUNCTION__, dentry->d_name.name);
+		BUG();
+	}
+
+	if (dentry->d_topmost && !dentry->d_topmost->d_inode) {
+		printk(KERN_ERR "%s: \"%s\" has a negative topmost dentry!\n",
+		       __FUNCTION__, dentry->d_name.name);
+		BUG();
+	}
+
+	if (!dentry->d_inode && !dentry->d_topmost) {
+		printk(KERN_ERR "%s: \"%s\" is a negative topmost dentry!\n",
+		       __FUNCTION__, dentry->d_name.name);
+		BUG();
+	}
+}
+EXPORT_SYMBOL_GPL(__union_check);
+
+/*
+ * Check if the given @parent dentry is really a parent of @dentry
+ */
+static int union_is_parent(struct dentry *dentry, struct dentry *parent)
+{
+	struct dentry *tmp = dentry;
+
+	if (parent->d_sb != dentry->d_sb) {
+		UM_DEBUG("%s and %s have different superblocks\n",
+			 dentry->d_name.name, parent->d_name.name);
+		return 0;
+	}
+
+	do {
+		if (tmp == parent)
+			return 1;
+	} while (tmp != tmp->d_parent && (tmp = tmp->d_parent));
+
+	return 0;
+}
+
+/*
+ * Check if the @dentry is part of a union
+ */
+int union_is_member(struct dentry *dentry, struct vfsmount *mnt)
+{
+	struct list_head *tmp;
+	struct vfsmount *p, *m_tmp = mntget(mnt);
+	struct dentry *d_tmp = __dget(dentry);
+
+	UM_DEBUG_UID("dentry=%s\n", dentry->d_name.name);
+
+	do {
+		UM_DEBUG_UID("device=%s\n", mnt->mnt_devname);
+		list_for_each(tmp, &m_tmp->mnt_mounts) {
+			p = list_entry(tmp, struct vfsmount, mnt_child);
+			UM_DEBUG_UID("child=%s\n", p->mnt_devname);
+			if (p->mnt_flags & MNT_UNION) {
+				UM_DEBUG_UID("is union=%s\n", p->mnt_devname);
+				if (union_is_parent(d_tmp, p->mnt_mountpoint)) {
+					__dput(d_tmp);
+					mntput(m_tmp);
+					return 1;
+				}
+			}
+		}
+
+		__dput(d_tmp);
+		d_tmp = __dget(m_tmp->mnt_mountpoint);
+		p = mntget(m_tmp->mnt_parent);
+		mntput(m_tmp);
+		m_tmp = p;
+	} while (m_tmp != m_tmp->mnt_parent);
+
+	__dput(d_tmp);
+	mntput(m_tmp);
+	return 0;
+}
+
+int __destroy_union(struct dentry * dentry)
+{
+	struct dentry *next;
+	struct dentry *topmost;
+	struct union_info *uinfo;
+
+	if (!union_trylock(dentry))
+		return 0;
+
+	uinfo = union_get(dentry->d_union);
+
+	UM_DEBUG_DCACHE("destroying \"%s\" (%p) union stack %p\n",
+			dentry->d_name.name, dentry->d_inode, uinfo);
+
+	next = dentry->d_topmost ? dentry->d_topmost : dentry;
+	while (next) {
+		struct dentry *tmp = next;
+		next = next->d_overlaid;
+
+		UM_DEBUG_DCACHE("\"%s\", inode=%p, count=%d\n",
+				tmp->d_name.name, tmp->d_inode,
+				atomic_read(&tmp->d_count));
+		if (tmp != dentry)
+			spin_lock(&tmp->d_lock);
+		tmp->d_topmost = NULL;
+		tmp->d_overlaid = NULL;
+		union_put(tmp->d_union);
+		tmp->d_union = NULL;
+		if (tmp != dentry)
+			spin_unlock(&tmp->d_lock);
+		if (tmp == dentry)
+			goto rebuild_stack;
+	}
+
+	mutex_unlock(&uinfo->u_mutex);
+	union_put(uinfo);
+	return 1;
+
+rebuild_stack:
+	if (next) {
+		topmost = next;
+		UM_DEBUG_DCACHE("\"%s\", inode=%p, count=%d\n",
+				next->d_name.name, next->d_inode,
+				atomic_read(&next->d_count));
+		spin_lock(&next->d_lock);
+		next->d_topmost = NULL;
+		if (!next->d_overlaid) {
+			union_put(next->d_union);
+			next->d_union = NULL;
+		}
+		spin_unlock(&next->d_lock);
+		next = next->d_overlaid;
+	}
+
+	while (next) {
+		struct dentry *tmp = next;
+		next = next->d_overlaid;
+		UM_DEBUG_DCACHE("\"%s\", inode=%p, count=%d\n",
+				tmp->d_name.name, tmp->d_inode,
+				atomic_read(&tmp->d_count));
+		tmp->d_topmost = topmost;
+	}
+
+	mutex_unlock(&uinfo->u_mutex);
+	union_put(uinfo);
+	return 1;
+}
+
+static void __destroy_stack_part(struct dentry * first, struct dentry * last)
+{
+	struct dentry * next = first;
+
+	while (next) {
+		struct dentry * tmp = next;
+		next = next->d_overlaid;
+
+		spin_lock(&tmp->d_lock);
+		tmp->d_topmost = NULL;
+		tmp->d_overlaid = NULL;
+		union_put(tmp->d_union);
+		tmp->d_union = NULL;
+		spin_unlock(&tmp->d_lock);
+		if (tmp == last)
+			break;
+	}
+}
+
+/*
+ * This is union-mount dput(). For union mount dentries it is walking DOWN
+ * the union stack and putting every dentry in it. If one of the dentries
+ * usage count reaching zero it is removed from the stack.
+ */
+void __dput_union(struct dentry *dentry)
+{
+	struct dentry *topmost;		// the new topmost after dput()
+	struct dentry *next;
+
+	union_check(dentry);
+
+	if (dentry->d_topmost) {
+		UM_DEBUG_DCACHE("we are not the topmost dentry\n");
+		topmost = dentry->d_topmost;
+	} else
+		topmost = NULL;
+
+	next = dentry;
+	while (next) {
+		struct dentry *tmp = next;	// the dentry we dput now
+		next = next->d_overlaid;	// the dentry we dput next
+
+		UM_DEBUG_DCACHE("name=\"%s\", inode=%p, count=%d\n",
+				tmp->d_name.name, tmp->d_inode,
+				atomic_read(&tmp->d_count));
+
+		if (atomic_read(&tmp->d_count) < 2) {
+			__destroy_stack_part(topmost ? topmost : tmp, tmp);
+			topmost = NULL;
+		} else {
+			tmp->d_topmost = topmost;
+			if (!topmost)
+				topmost = tmp;
+		}
+
+		/* We are the last one using d_union */
+		spin_lock(&tmp->d_lock);
+		if (tmp->d_union
+		    && (atomic_read(&tmp->d_union->u_count) == 1)) {
+			BUG_ON(next);
+			tmp->d_overlaid = NULL;
+			tmp->d_topmost = NULL;
+			union_put(tmp->d_union);
+			tmp->d_union = NULL;
+		}
+		spin_unlock(&tmp->d_lock);
+
+		__dput_single(tmp);
+	}
+
+	return;
+}
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -322,7 +322,7 @@ extern char * d_path(struct dentry *, st
  *	and call dget_locked() instead of dget().
  */
  
-static inline struct dentry *dget(struct dentry *dentry)
+static inline struct dentry *__dget_single(struct dentry *dentry)
 {
 	if (dentry) {
 		BUG_ON(!atomic_read(&dentry->d_count));
@@ -333,6 +333,108 @@ static inline struct dentry *dget(struct
 
 extern struct dentry * dget_locked(struct dentry *);
 
+/*
+ * Reference counting for union mounts
+ */
+#include <linux/dcache_union.h>
+extern void __dput_single(struct dentry *);
+extern void dput(struct dentry *);
+
+#ifdef CONFIG_UNION_MOUNT
+/*
+ * Called with dentry's union lock held
+ */
+static inline struct dentry * __dget(struct dentry *dentry)
+{
+	if (unlikely(IS_UNION(dentry)))
+		return __dget_union(dentry);
+	else
+		return __dget_single(dentry);
+}
+
+static inline struct dentry * dget(struct dentry *dentry)
+{
+	if (!dentry)
+		return dentry;
+
+	/*
+	 * Yes, dget() can sleep now, if the union struct isn't yet read
+	 * in completly. This is symmetric to dput() which can sleep too.
+	 */
+	might_sleep();
+
+	union_lock(dentry);
+	__dget(dentry);
+	union_unlock(dentry);
+	return dentry;
+}
+
+/*
+ * Called with dentry's union lock held
+ */
+static inline void __dput(struct dentry *dentry)
+{
+	if (unlikely(IS_UNION(dentry)))
+		__dput_union(dentry);
+	else
+		__dput_single(dentry);
+}
+
+#else /* CONFIG_UNION_MOUNT */
+
+/* Allocation counts.. */
+
+/**
+ *	dget, dget_locked	-	get a reference to a dentry
+ *	@dentry: dentry to get a reference to
+ *
+ *	Given a dentry or %NULL pointer increment the reference count
+ *	if appropriate and return the dentry. A dentry will not be
+ *	destroyed when it has references. dget() should never be
+ *	called for dentries with zero reference counter. For these cases
+ *	(preferably none, functions in dcache.c are sufficient for normal
+ *	needs and they take necessary precautions) you should hold dcache_lock
+ *	and call dget_locked() instead of dget().
+ */
+
+static inline struct dentry *dget(struct dentry *dentry)
+{
+	if (dentry) {
+		BUG_ON(!atomic_read(&dentry->d_count));
+		atomic_inc(&dentry->d_count);
+	}
+	return dentry;
+}
+
+#define __dget(dentry)	dget(dentry)
+#define __dput(dentry)	dput(dentry)
+
+#endif /* CONFIG_UNION_MOUNT */
+
+static inline void dput_common(struct dentry *dentry)
+{
+#ifdef CONFIG_UNION_MOUNT
+	if (unlikely(IS_UNION(dentry))) {
+		struct union_info *uinfo;
+
+		/*
+		 * Grab a reference to the union_info which might get detached
+		 * from the dentries in __dput_union().
+		 */
+		uinfo = union_lock_and_get(dentry);
+		__dput_union(dentry);
+		if (uinfo) {
+			if (atomic_read(&uinfo->u_count) == 1)
+				/* We are the last user of this union_info */
+				union_release(uinfo);
+			else
+				union_put_and_unlock(uinfo);
+		}
+	} else
+#endif
+	return __dput_single(dentry);
+}
+
 /**
  *	d_unhashed -	is dentry hashed
  *	@dentry: entry to check
@@ -350,13 +452,13 @@ static inline struct dentry *dget_parent
 	struct dentry *ret;
 
 	spin_lock(&dentry->d_lock);
-	ret = dget(dentry->d_parent);
+	union_lock_spinlock(dentry->d_parent, &dentry->d_lock);
+	ret = __dget(dentry->d_parent);
+	union_unlock(dentry->d_parent);
 	spin_unlock(&dentry->d_lock);
 	return ret;
 }
 
-extern void dput(struct dentry *);
-
 static inline int d_mountpoint(struct dentry *dentry)
 {
 	return dentry->d_mounted;
--- a/include/linux/dcache_union.h
+++ b/include/linux/dcache_union.h
@@ -39,6 +39,17 @@ extern struct union_info *union_alloc(vo
 extern struct union_info *union_get(struct union_info *);
 extern void union_put(struct union_info *);
 
+
+extern void __union_check(struct dentry *);
+
+static inline void union_check(struct dentry *dentry)
+{
+	if (!dentry)
+		return;
+	if (unlikely(dentry->d_union))
+		__union_check(dentry);
+}
+
 /*
  * These are the functions for locking a dentry's union. When one
  * want to acquire a denties union lock, use:
@@ -74,6 +85,7 @@ static inline void union_lock(struct den
 		UM_DEBUG_LOCK("\"%s\" locking %p (count=%d)\n",
 			      dentry->d_name.name, ui,
 			      atomic_read(&ui->u_count));
+		union_check(dentry);
 		__union_lock(dentry->d_union);
 	}
 }
@@ -92,6 +104,7 @@ static inline void union_unlock(struct d
 		UM_DEBUG_LOCK("\"%s\" unlocking %p (count=%d)\n",
 			      dentry->d_name.name, ui,
 			      atomic_read(&ui->u_count));
+		union_check(dentry);
 		__union_unlock(dentry->d_union);
 	}
 }
@@ -146,6 +159,7 @@ static inline int union_trylock(struct d
 		UM_DEBUG_LOCK("\"%s\" try locking %p (count=%d)\n",
 			      dentry->d_name.name, dentry->d_union,
 			      atomic_read(&dentry->d_union->u_count));
+		union_check(dentry);
 		BUG_ON(!atomic_read(&dentry->d_union->u_count));
 		locked = mutex_trylock(&dentry->d_union->u_mutex);
 		UM_DEBUG_LOCK("\"%s\" trylock %p %s\n", dentry->d_name.name,
@@ -227,10 +241,57 @@ static inline void union_lock_fs(struct 
 }
 
 #define IS_UNION(dentry) ((dentry)->d_overlaid || (dentry)->d_topmost || \
-				(dentry)->d_overlaid)
+			 (dentry)->d_union)
+
+/* dentry reference counting */
+static inline struct dentry * __dget_union(struct dentry *dentry)
+{
+	if (!dentry)
+		return dentry;
+
+	union_check(dentry);
+	__dget_single(dentry);
+
+	if (likely(!dentry->d_overlaid && !dentry->d_topmost))
+		return dentry;
+
+	if (dentry->d_topmost)
+		UM_DEBUG_DCACHE("we are not the topmost dentry\n");
+
+	UM_DEBUG_DCACHE("name=\"%s\", inode=%p, count=%d\n",
+			dentry->d_name.name, dentry->d_inode,
+			atomic_read(&dentry->d_count));
+
+	if (dentry->d_overlaid) {
+		struct dentry * tmp = dentry->d_overlaid;
+
+		while (tmp) {
+			__dget_single(tmp);
+			UM_DEBUG_DCACHE("name=\"%s\", inode=%p, count=%d\n",
+					tmp->d_name.name, tmp->d_inode,
+					atomic_read(&tmp->d_count));
+			tmp = tmp->d_overlaid;
+		}
+	}
+
+	return dentry;
+}
+
+extern void __dput_union(struct dentry *);
+extern int __destroy_union(struct dentry *dentry);
+
+static inline int __dput_single_destroy_union(struct dentry *dentry)
+{
+	if (!dentry->d_union)
+		return 1;
+
+	return __destroy_union(dentry);
+}
 
 #else /* CONFIG_UNION_MOUNT */
 
+#define union_check(dentry) do { /* empty */ } while (0)
+
 #define union_lock(dentry) do { /* empty */ } while (0)
 #define union_trylock(dentry) ({ (1); })
 #define union_unlock(dentry) do { /* empty */ } while (0)
@@ -243,6 +304,8 @@ static inline void union_lock_fs(struct 
 #define union_unlock_and_put(dentry) do { /* empty */ } while (0)
 #define union_release(x) do { BUG(); } while (0)
 
+#define __dput_single_destroy_union(x) ({ (1); })
+
 #endif	/* CONFIG_UNION_MOUNT */
 #endif	/* __KERNEL__ */
 #endif	/* __LINUX_DCACHE_UNION_H */
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1229,8 +1229,10 @@ void __audit_getname(const char *name)
 	++context->name_count;
 	if (!context->pwd) {
 		read_lock(&current->fs->lock);
-		context->pwd = dget(current->fs->pwd);
+		union_lock_readlock(current->fs->pwd, &current->fs->lock);
+		context->pwd = __dget(current->fs->pwd);
 		context->pwdmnt = mntget(current->fs->pwdmnt);
+		union_unlock(current->fs->pwd);
 		read_unlock(&current->fs->lock);
 	}
 		
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1908,9 +1908,11 @@ static int cpuset_rmdir(struct inode *un
 	mutex_lock(&callback_mutex);
 	set_bit(CS_REMOVED, &cs->flags);
 	list_del(&cs->sibling);	/* delete my sibling from parent->children */
+	union_lock(cs->dentry);
 	spin_lock(&cs->dentry->d_lock);
-	d = dget(cs->dentry);
+	d = __dget(cs->dentry);
 	cs->dentry = NULL;
+	union_unlock(d);
 	spin_unlock(&d->d_lock);
 	cpuset_d_remove_dir(d);
 	dput(d);
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -580,17 +580,21 @@ static inline struct fs_struct *__copy_f
 		rwlock_init(&fs->lock);
 		fs->umask = old->umask;
 		read_lock(&old->lock);
+		union_lock_fs(old);
 		fs->rootmnt = mntget(old->rootmnt);
-		fs->root = dget(old->root);
+		fs->root = __dget(old->root);
 		fs->pwdmnt = mntget(old->pwdmnt);
-		fs->pwd = dget(old->pwd);
+		fs->pwd = __dget(old->pwd);
 		if (old->altroot) {
 			fs->altrootmnt = mntget(old->altrootmnt);
-			fs->altroot = dget(old->altroot);
+			fs->altroot = __dget(old->altroot);
 		} else {
 			fs->altrootmnt = NULL;
 			fs->altroot = NULL;
 		}
+		union_unlock(old->pwd);
+		union_unlock(old->altroot);
+		union_unlock(old->root);
 		read_unlock(&old->lock);
 	}
 	return fs;
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1082,8 +1082,11 @@ restart:
 		newu->addr = otheru->addr;
 	}
 	if (otheru->dentry) {
-		newu->dentry	= dget(otheru->dentry);
+		/* Is this safe here? I don't know ... */
+		union_lock_spinlock(otheru->dentry, &otheru->lock);
+		newu->dentry	= __dget(otheru->dentry);
 		newu->mnt	= mntget(otheru->mnt);
+		union_unlock(otheru->dentry);
 	}
 
 	/* Set credentials */
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ