linux-kernel - Re: [PATCH 0/5] hybrid union filesystem prototype

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20100901143333.0b9db01a@notabene>
Date:	Wed, 1 Sep 2010 14:33:33 +1000
From:	Neil Brown <neilb@...e.de>
To:	Miklos Szeredi <miklos@...redi.hu>
Cc:	linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org,
	vaurora@...hat.com, viro@...iv.linux.org.uk, jblunck@...e.de,
	hch@...radead.org
Subject: Re: [PATCH 0/5] hybrid union filesystem prototype

On Mon, 30 Aug 2010 21:40:27 +1000
Neil Brown <neilb@...e.de> wrote:

> > > > I think it's best to leave that stuff until someone actually cares.
> > > > The "people might find it useful" argument is not strong enough to put
> > > > nontrivial effort into thinking about all the weird corner cases.
> > > 
> > > My thought on this is that in order to describe the behaviour of the
> > > filesystem accurately you need to think about all the weird corner cases.
> > > 
> > > Then it becomes a question of: is it easier to document the weird behaviour,
> > > or change the code so the documentation will be easier to understand?
> > > Some cases will go one way, some the other.
> > > 
> > > But as you suggest this isn't urgent.
> > 
> > You didn't mention one possibility: add limitations that prevent the
> > weird corner cases arising.  I believe this is the simplest option.
> 
> Val has been following that approach and asking if it is possible to make an
> NFS filesystem really-truly read-only. i.e. no changes.
> I don't believe it is.
> 
> But I won't pursue this further without patches.
> 

And here is a patch.  It isn't really complete, but I have done enough for
today.  It at least shows what I am trying to do.

To see a clear indication of the effect, try the following script before and
after the patch..

-----------------------
#!/bin/sh

# L = lower U = upper O = overlay

umount /tmp/O
rm -rf /tmp/[LUO]

mkdir /tmp/{L,U,O}

for i in 1 2 3; do > /tmp/L/$i ; done

mount -t union -o upperdir=/tmp/U,lowerdir=/tmp/L pointlessword /tmp/O

ls -l /tmp/O/foo
ls -l /tmp/O
> /tmp/L/foo
ls -l /tmp/O
-------------------------


The last 'ls -l' report a non-existent 'foo' without the patch.  With the
patch it more correctly doesn't report anything at all about foo.


No patch change-log yet because I'm not suggesting it be included, just
reviewed.

NeilBrown

diff --git a/fs/union/union.c b/fs/union/union.c
index e19fe62..434d152 100644
--- a/fs/union/union.c
+++ b/fs/union/union.c
@@ -392,9 +392,160 @@ static void union_dentry_iput(struct dentry *dentry, struct inode *inode)
 	iput(inode);
 }
 
+static int union_still_valid(struct dentry *dentry,
+			     struct dentry *parent, struct dentry *child)
+{
+	/* dentry is in the union filesystem
+	 * child is the corresponding dentry in the upper to lower layer
+	 * parent is the corresponding dentry of dentry's parent in the same layer
+	 *
+	 * If child is NULL, the parent must be NULL or negative.
+	 * Otherwise child must be hashed, have parent as the d_parent, and
+	 * have the same name as dentry
+	 *
+	 */
+	struct qstr *qstr;
+	int rv;
+
+	if (child == NULL)
+		return (parent == NULL || parent->d_inode == NULL);
+
+	if (child->d_parent != parent)
+		return 0;
+	if (d_unhashed(child))
+		return 0;
+
+	/* Unfortunately we need d_lock to compare names */
+	spin_lock(&dentry->d_lock);
+	spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
+	qstr = &child->d_name;
+	if (parent->d_op && parent->d_op->d_compare)
+		rv = ! (parent->d_op->d_compare(parent, qstr, &dentry->d_name));
+	else
+		rv =  (qstr->len == dentry->d_name.len &&
+		       memcmp(qstr->name, dentry->d_name.name, qstr->len) == 0);
+
+	spin_unlock(&child->d_lock);
+	spin_unlock(&dentry->d_lock);
+	return rv;
+}
+
+static struct dentry *union_lookup_real(struct dentry *dir, struct qstr *name);
+static struct inode *union_new_inode(struct super_block *sb, umode_t mode);
+
+static struct union_entry do_union_lookup(struct inode *dir, struct dentry *dentry,
+					   struct nameidata *nd)
+{
+	struct union_entry *pue = dentry->d_parent->d_fsdata;
+	struct union_entry ue;
+	struct dentry *upperdir = pue->upperpath.dentry;
+	struct dentry *upperdentry = NULL;
+	struct dentry *lowerdir = pue->lowerpath.dentry;
+	struct dentry *lowerdentry = NULL;
+	int err;
+
+	memset(&ue, 0, sizeof(ue));
+
+	if (upperdir) {
+		upperdentry = union_lookup_real(upperdir, &dentry->d_name);
+		err = PTR_ERR(upperdentry);
+		if (IS_ERR(upperdentry))
+			goto out;
+
+		if (upperdentry) {
+			if (union_is_opaquedir(upperdentry))
+				ue.opaque = true;
+			else if (union_is_whiteout(upperdentry)) {
+				dput(upperdentry);
+				upperdentry = NULL;
+				ue.opaque = true;
+			}
+		}
+	}
+	if (lowerdir && !ue.opaque) {
+		lowerdentry = union_lookup_real(lowerdir, &dentry->d_name);
+		if (IS_ERR(lowerdentry)) {
+			err = PTR_ERR(lowerdentry);
+			dput(upperdentry);
+			goto out;
+		}
+	}
+
+	if (lowerdentry && upperdentry &&
+	    (!S_ISDIR(upperdentry->d_inode->i_mode) ||
+	     !S_ISDIR(lowerdentry->d_inode->i_mode))) {
+		dput(lowerdentry);
+		lowerdentry = NULL;
+		ue.opaque = true;
+	}
+
+	if (upperdentry) {
+		ue.upperpath.mnt = pue->upperpath.mnt;
+		ue.upperpath.dentry = upperdentry;
+		path_get(&ue.upperpath);
+		dput(upperdentry);
+	}
+	if (lowerdentry) {
+		ue.lowerpath.mnt = pue->lowerpath.mnt;
+		ue.lowerpath.dentry = lowerdentry;
+		path_get(&ue.lowerpath);
+		dput(lowerdentry);
+	}
+
+	return ue;
+
+out:
+	ue.upperpath.dentry = ERR_PTR(err);
+	return ue;
+}
+
+static int union_dentry_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+	/* We need to revalidate this dentry if the upper parent might have
+	 * changed.
+	 */
+	struct dentry *parent = dget_parent(dentry);
+	struct union_entry *ue = dentry->d_fsdata;
+	struct union_entry *pue = parent->d_fsdata;
+	struct union_entry nue;
+	int rv = 1;
+
+	if (union_still_valid(dentry, pue->upperpath.dentry,  ue->upperpath.dentry) &&
+	    union_still_valid(dentry, pue->lowerpath.dentry,  ue->lowerpath.dentry))
+		goto out;
+
+	/* If dentry is not a directory, it is safe to fail */
+	rv = 0;
+	if (dentry->d_inode == NULL || !S_ISDIR(dentry->d_inode->i_mode))
+		goto out;
+
+	/* If this dentry or a child is in use by someone else, it cannot be
+	 * invalidated so the best thing to do is re-lookup the names
+	 */
+	mutex_lock(&parent->d_inode->i_mutex);
+	if (dentry->d_parent != parent)
+		goto out_unlock;
+	rv = 1;
+	nue = do_union_lookup(parent->d_inode, dentry, nd);
+	if (IS_ERR(nue.upperpath.dentry))
+		/* Leave the dentry unchanged, but return the error */
+		rv = PTR_ERR(nue.upperpath.dentry);
+	else {
+		path_put(&ue->upperpath);
+		path_put(&ue->lowerpath);
+		*ue = nue;
+	}
+out_unlock:
+	mutex_unlock(&parent->d_inode->i_mutex);
+out:
+	dput(parent);
+	return rv;
+}
+
 static const struct dentry_operations union_dentry_operations = {
 	.d_release = union_dentry_release,
 	.d_iput = union_dentry_iput,
+	.d_revalidate = union_dentry_revalidate,
 };
 
 static struct inode *union_new_inode(struct super_block *sb, umode_t mode)
@@ -459,12 +610,7 @@ static struct dentry *union_lookup_real(struct dentry *dir, struct qstr *name)
 static struct dentry *union_lookup(struct inode *dir, struct dentry *dentry,
 				   struct nameidata *nd)
 {
-	struct union_entry *pue = dentry->d_parent->d_fsdata;
 	struct union_entry *ue;
-	struct dentry *upperdir = pue->upperpath.dentry;
-	struct dentry *upperdentry = NULL;
-	struct dentry *lowerdir = pue->lowerpath.dentry;
-	struct dentry *lowerdentry = NULL;
 	struct inode *inode = NULL;
 	int err;
 
@@ -472,71 +618,29 @@ static struct dentry *union_lookup(struct inode *dir, struct dentry *dentry,
 	ue = kzalloc(sizeof(struct union_entry), GFP_KERNEL);
 	if (!ue)
 		goto out;
-
-	if (upperdir) {
-		upperdentry = union_lookup_real(upperdir, &dentry->d_name);
-		err = PTR_ERR(upperdentry);
-		if (IS_ERR(upperdentry))
-			goto out_free;
-
-		if (upperdentry) {
-			if (union_is_opaquedir(upperdentry))
-				ue->opaque = true;
-			else if (union_is_whiteout(upperdentry)) {
-				dput(upperdentry);
-				upperdentry = NULL;
-				ue->opaque = true;
-			}
-		}
-	}
-	if (lowerdir && !ue->opaque) {
-		lowerdentry = union_lookup_real(lowerdir, &dentry->d_name);
-		if (IS_ERR(lowerdentry)) {
-			err = PTR_ERR(lowerdentry);
-			dput(upperdentry);
-			goto out_free;
-		}
+	*ue = do_union_lookup(dir, dentry, nd);
+	if (IS_ERR(ue->upperpath.dentry)) {
+		err = PTR_ERR(ue->upperpath.dentry);
+		goto out_free;
 	}
-
-	if (lowerdentry && upperdentry &&
-	    (!S_ISDIR(upperdentry->d_inode->i_mode) ||
-	     !S_ISDIR(lowerdentry->d_inode->i_mode))) {
-		dput(lowerdentry);
-		lowerdentry = NULL;
-		ue->opaque = true;
-	}
-
-	if (lowerdentry || upperdentry) {
+	if (ue->upperpath.dentry || ue->lowerpath.dentry) {
 		struct dentry *realdentry;
 
-		realdentry = upperdentry ? upperdentry : lowerdentry;
+		realdentry = ue->upperpath.dentry ? : ue->lowerpath.dentry;
 		inode = union_new_inode(dir->i_sb, realdentry->d_inode->i_mode);
-		if (!inode)
-			goto out_dput;
-	}
-
-	if (upperdentry) {
-		ue->upperpath.mnt = pue->upperpath.mnt;
-		ue->upperpath.dentry = upperdentry;
-		path_get(&ue->upperpath);
-		dput(upperdentry);
-	}
-	if (lowerdentry) {
-		ue->lowerpath.mnt = pue->lowerpath.mnt;
-		ue->lowerpath.dentry = lowerdentry;
-		path_get(&ue->lowerpath);
-		dput(lowerdentry);
+		err = -ENOMEM;
+		if (!inode) {
+			path_put(&ue->upperpath);
+			path_put(&ue->lowerpath);
+			goto out_free;
+		}
 	}
-
 	d_add(dentry, inode);
 	dentry->d_fsdata = ue;
 	dentry->d_op = &union_dentry_operations;
 
 	return NULL;
 
-out_dput:
-	dput(upperdentry);
-	dput(lowerdentry);
 out_free:
 	kfree(ue);
 out:
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/