lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 19 May 2011 09:47:13 +0100
From:	Steven Whitehouse <swhiteho@...hat.com>
To:	linux-kernel@...r.kernel.org, cluster-devel@...hat.com
Cc:	Steven Whitehouse <swhiteho@...hat.com>
Subject: [PATCH 18/32] GFS2: Double check link count under glock

To avoid any possible races relating to the link count, we need to
recheck it under the inode's glock in all cases where it matters.
Also to ensure we never get any nasty surprises, this patch also
ensures that once the link count has hit zero it can never be
elevated by rereading in data from disk.

The only place we cannot provide a proper solution is in rename
in the case where we are removing a target inode and we discover
that the target inode has been already unlinked on another node.
The race window is very small, and we return EAGAIN in this case
to indicate what has happened. The proper solution would be to move
the lookup parts of rename from the vfs into library calls which
the fs could call directly, but that is potentially a very big job
and this fix should cover most cases for now.

Signed-off-by: Steven Whitehouse <swhiteho@...hat.com>

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 94c3a7d..5a02606 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -248,6 +248,32 @@ fail_iput:
 	goto fail;
 }
 
+/**
+ * gfs2_set_nlink - Set the inode's link count based on on-disk info
+ * @inode: The inode in question
+ * @nlink: The link count
+ *
+ * If the link count has hit zero, it must never be raised, whatever the
+ * on-disk inode might say. When new struct inodes are created the link
+ * count is set to 1, so that we can safely use this test even when reading
+ * in on disk information for the first time.
+ */
+
+static void gfs2_set_nlink(struct inode *inode, u32 nlink)
+{
+	/*
+	 * We will need to review setting the nlink count here in the
+	 * light of the forthcoming ro bind mount work. This is a reminder
+	 * to do that.
+	 */
+	if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) {
+		if (nlink == 0)
+			clear_nlink(inode);
+		else
+			inode->i_nlink = nlink;
+	}
+}
+
 static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 {
 	const struct gfs2_dinode *str = buf;
@@ -269,12 +295,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 
 	ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
 	ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
-	/*
-	 * We will need to review setting the nlink count here in the
-	 * light of the forthcoming ro bind mount work. This is a reminder
-	 * to do that.
-	 */
-	ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink);
+	gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
 	i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
 	gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
 	atime.tv_sec = be64_to_cpu(str->di_atime);
@@ -484,7 +505,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
 
 	/*  Don't create entries in an unlinked directory  */
 	if (!dip->i_inode.i_nlink)
-		return -EPERM;
+		return -ENOENT;
 
 	error = gfs2_dir_check(&dip->i_inode, name, NULL);
 	switch (error) {
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 09e436a..1005f9e 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -162,6 +162,10 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 	if (error)
 		goto out_child;
 
+	error = -ENOENT;
+	if (inode->i_nlink == 0)
+		goto out_gunlock;
+
 	error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
 	if (error)
 		goto out_gunlock;
@@ -335,6 +339,10 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
 	if (error)
 		goto out_child;
 
+	error = -ENOENT;
+	if (ip->i_inode.i_nlink == 0)
+		goto out_rgrp;
+
 	error = gfs2_glock_nq(ghs + 2); /* rgrp */
 	if (error)
 		goto out_rgrp;
@@ -589,6 +597,10 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
 	if (error)
 		goto out_child;
 
+	error = -ENOENT;
+	if (ip->i_inode.i_nlink == 0)
+		goto out_rgrp;
+
 	error = gfs2_glock_nq(ghs + 2); /* rgrp */
 	if (error)
 		goto out_rgrp;
@@ -792,6 +804,10 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 			goto out_gunlock;
 	}
 
+	error = -ENOENT;
+	if (ip->i_inode.i_nlink == 0)
+		goto out_gunlock;
+
 	/* Check out the old directory */
 
 	error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
@@ -805,6 +821,11 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		if (error)
 			goto out_gunlock;
 
+		if (nip->i_inode.i_nlink == 0) {
+			error = -EAGAIN;
+			goto out_gunlock;
+		}
+
 		if (S_ISDIR(nip->i_inode.i_mode)) {
 			if (nip->i_entries < 2) {
 				if (gfs2_consist_inode(nip))
@@ -835,7 +856,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 
 		if (odip != ndip) {
 			if (!ndip->i_inode.i_nlink) {
-				error = -EINVAL;
+				error = -ENOENT;
 				goto out_gunlock;
 			}
 			if (ndip->i_entries == (u32)-1) {
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ