linux-kernel - [PATCH 34/48] [GFS2] Fix a page lock / glock deadlock

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 17 Apr 2008 09:39:10 +0100
From:	swhiteho@...hat.com
To:	linux-kernel@...r.kernel.org, cluster-devel@...hat.com
Cc:	Steven Whitehouse <swhiteho@...hat.com>
Subject: [PATCH 34/48] [GFS2] Fix a page lock / glock deadlock

From: Steven Whitehouse <swhiteho@...hat.com>

We've previously been using a "try lock" in readpage on the basis that
it would prevent deadlocks due to the inverted lock ordering (our normal
lock ordering is glock first and then page lock). Unfortunately tests
have shown that this isn't enough. If the glock has a demote request
queued such that run_queue() in the glock code tries to do a demote when
its called under readpage then it will try and write out all the dirty
pages which requires locking them. This then deadlocks with the page
locked by readpage.

The solution is to always require two calls into readpage. The first
unlocks the page, gets the glock and returns AOP_TRUNCATED_PAGE, the
second does the actual readpage and unlocks the glock & page as
required.

Signed-off-by: Steven Whitehouse <swhiteho@...hat.com>

diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index ace5770..cdad3e6 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -32,24 +32,23 @@
 #define GLR_TRYFAILED		13
 #define GLR_CANCELED		14
 
-static inline int gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
+static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
 {
 	struct gfs2_holder *gh;
-	int locked = 0;
 	struct pid *pid;
 
 	/* Look in glock's list of holders for one with current task as owner */
 	spin_lock(&gl->gl_spin);
 	pid = task_pid(current);
 	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
-		if (gh->gh_owner_pid == pid) {
-			locked = 1;
-			break;
-		}
+		if (gh->gh_owner_pid == pid)
+			goto out;
 	}
+	gh = NULL;
+out:
 	spin_unlock(&gl->gl_spin);
 
-	return locked;
+	return gh;
 }
 
 static inline int gfs2_glock_is_held_excl(struct gfs2_glock *gl)
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 5f50dd5..810ff02 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -493,7 +493,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
 		return dir;
 	}
 
-	if (gfs2_glock_is_locked_by_me(dip->i_gl) == 0) {
+	if (gfs2_glock_is_locked_by_me(dip->i_gl) == NULL) {
 		error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
 		if (error)
 			return ERR_PTR(error);
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 7523999..fbb4a6a 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -508,23 +508,26 @@ static int __gfs2_readpage(void *file, struct page *page)
 static int gfs2_readpage(struct file *file, struct page *page)
 {
 	struct gfs2_inode *ip = GFS2_I(page->mapping->host);
-	struct gfs2_holder gh;
+	struct gfs2_holder *gh;
 	int error;
 
-	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh);
-	error = gfs2_glock_nq_atime(&gh);
-	if (unlikely(error)) {
+	gh = gfs2_glock_is_locked_by_me(ip->i_gl);
+	if (!gh) {
+		gh = kmalloc(sizeof(struct gfs2_holder), GFP_NOFS);
+		if (!gh)
+			return -ENOBUFS;
+		gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, gh);
 		unlock_page(page);
-		goto out;
+		error = gfs2_glock_nq_atime(gh);
+		if (likely(error != 0))
+			goto out;
+		return AOP_TRUNCATED_PAGE;
 	}
 	error = __gfs2_readpage(file, page);
-	gfs2_glock_dq(&gh);
+	gfs2_glock_dq(gh);
 out:
-	gfs2_holder_uninit(&gh);
-	if (error == GLR_TRYFAILED) {
-		yield();
-		return AOP_TRUNCATED_PAGE;
-	}
+	gfs2_holder_uninit(gh);
+	kfree(gh);
 	return error;
 }
 
@@ -826,7 +829,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
 	unsigned int to = from + len;
 	int ret;
 
-	BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == 0);
+	BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL);
 
 	ret = gfs2_meta_inode_buffer(ip, &dibh);
 	if (unlikely(ret)) {
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
index 793e334..4a5e676 100644
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@@ -43,7 +43,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
 	struct gfs2_holder d_gh;
 	struct gfs2_inode *ip = NULL;
 	int error;
-	int had_lock=0;
+	int had_lock = 0;
 
 	if (inode) {
 		if (is_bad_inode(inode))
@@ -54,7 +54,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
 	if (sdp->sd_args.ar_localcaching)
 		goto valid;
 
-	had_lock = gfs2_glock_is_locked_by_me(dip->i_gl);
+	had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL);
 	if (!had_lock) {
 		error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
 		if (error)
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 301c945..af7097a 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -898,7 +898,7 @@ static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
 	int error;
 	int unlock = 0;
 
-	if (gfs2_glock_is_locked_by_me(ip->i_gl) == 0) {
+	if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
 		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
 		if (error)
 			return error;
@@ -1065,7 +1065,7 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	int error;
 	int unlock = 0;
 
-	if (gfs2_glock_is_locked_by_me(ip->i_gl) == 0) {
+	if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
 		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
 		if (error)
 			return error;
-- 
1.5.1.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/