lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <E1HFdnm-0006DB-00@dorka.pomaz.szeredi.hu>
Date:	Fri, 09 Feb 2007 23:01:06 +0100
From:	Miklos Szeredi <miklos@...redi.hu>
To:	akpm@...l.org
CC:	"Russ Cox" <rsc@...ch.com>, linux-fsdevel@...r.kernel.org,
	linux-kernel@...r.kernel.org
Subject: [PATCH] fix quadratic behavior of shrink_dcache_parent()

From: Miklos Szeredi <mszeredi@...e.cz>

The time shrink_dcache_parent() takes, grows quadratically with the
depth of the tree under 'parent'.  This starts to get noticable at
about 10,000.

These kinds of depths don't occur normally, and filesystems which
invoke shrink_dcache_parent() via d_invalidate() seem to have other
depth dependent timings, so it's not even easy to expose this problem.

However with FUSE it's easy to create a deep tree and d_invalidate()
will also get called.  This can make a syscall hang for a very long
time.

This is the original discovery of the problem by Russ Cox:

  http://article.gmane.org/gmane.comp.file-systems.fuse.devel/3826

The following patch fixes the quadratic behavior, by optionally
allowing prune_dcache() to prune ancestors of a dentry in one go,
instead of doing it one at a time.

Common code in dput() and prune_one_dentry() is extracted into a new
helper function d_kill().

shrink_dcache_parent() as well as shrink_dcache_sb() are converted to
use the ancestry-pruner option.  Only for shrink_dcache_memory() is
this behavior not desirable, so it keeps using the old algorithm.

Signed-off-by: Miklos Szeredi <mszeredi@...e.cz>

---
Index: linux/fs/dcache.c
===================================================================
--- linux.orig/fs/dcache.c	2007-02-09 22:11:02.000000000 +0100
+++ linux/fs/dcache.c	2007-02-09 22:51:02.000000000 +0100
@@ -121,6 +121,28 @@ static void dentry_iput(struct dentry * 
 	}
 }
 
+/**
+ * d_kill - kill dentry and return parent
+ * @dentry: dentry to kill
+ *
+ * Called with dcache_lock and d_lock, releases both.  The dentry must
+ * already be unhashed and removed from the LRU.
+ *
+ * If this is the root of the dentry tree, return NULL.
+ */
+static struct dentry *d_kill(struct dentry *dentry)
+{
+	struct dentry *parent;
+
+	list_del(&dentry->d_u.d_child);
+	dentry_stat.nr_dentry--;	/* For d_free, below */
+	/*drops the locks, at that point nobody can reach this dentry */
+	dentry_iput(dentry);
+	parent = dentry->d_parent;
+	d_free(dentry);
+	return dentry == parent ? NULL : parent;
+}
+
 /* 
  * This is dput
  *
@@ -189,28 +211,17 @@ repeat:
 
 unhash_it:
 	__d_drop(dentry);
-
-kill_it: {
-		struct dentry *parent;
-
-		/* If dentry was on d_lru list
-		 * delete it from there
-		 */
-  		if (!list_empty(&dentry->d_lru)) {
-  			list_del(&dentry->d_lru);
-  			dentry_stat.nr_unused--;
-  		}
-  		list_del(&dentry->d_u.d_child);
-		dentry_stat.nr_dentry--;	/* For d_free, below */
-		/*drops the locks, at that point nobody can reach this dentry */
-		dentry_iput(dentry);
-		parent = dentry->d_parent;
-		d_free(dentry);
-		if (dentry == parent)
-			return;
-		dentry = parent;
-		goto repeat;
+kill_it:
+	/* If dentry was on d_lru list
+	 * delete it from there
+	 */
+	if (!list_empty(&dentry->d_lru)) {
+		list_del(&dentry->d_lru);
+		dentry_stat.nr_unused--;
 	}
+	dentry = d_kill(dentry);
+	if (dentry)
+		goto repeat;
 }
 
 /**
@@ -371,22 +382,41 @@ restart:
  * Throw away a dentry - free the inode, dput the parent.  This requires that
  * the LRU list has already been removed.
  *
+ * If prune_parents is true, try to prune ancestors as well.
+ *
  * Called with dcache_lock, drops it and then regains.
  * Called with dentry->d_lock held, drops it.
  */
-static void prune_one_dentry(struct dentry * dentry)
+static void prune_one_dentry(struct dentry * dentry, int prune_parents)
 {
-	struct dentry * parent;
-
 	__d_drop(dentry);
-	list_del(&dentry->d_u.d_child);
-	dentry_stat.nr_dentry--;	/* For d_free, below */
-	dentry_iput(dentry);
-	parent = dentry->d_parent;
-	d_free(dentry);
-	if (parent != dentry)
-		dput(parent);
+	dentry = d_kill(dentry);
+	if (!prune_parents) {
+		if (dentry)
+			dput(dentry);
+		spin_lock(&dcache_lock);
+		return;
+	}
+
+	/*
+	 * Prune ancestors.  Locking is simpler than in dput(),
+	 * because dcache_lock needs to be taken anyway.
+	 */
 	spin_lock(&dcache_lock);
+	while (dentry) {
+		if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock))
+			return;
+
+		if (dentry->d_op && dentry->d_op->d_delete)
+			dentry->d_op->d_delete(dentry);
+		if (!list_empty(&dentry->d_lru)) {
+			list_del(&dentry->d_lru);
+			dentry_stat.nr_unused--;
+		}
+		__d_drop(dentry);
+		dentry = d_kill(dentry);
+		spin_lock(&dcache_lock);
+	}
 }
 
 /**
@@ -394,6 +424,7 @@ static void prune_one_dentry(struct dent
  * @count: number of entries to try and free
  * @sb: if given, ignore dentries for other superblocks
  *         which are being unmounted.
+ * @prune_parents: if true, try to prune ancestors as well in one go
  *
  * Shrink the dcache. This is done when we need
  * more memory, or simply when we need to unmount
@@ -404,7 +435,7 @@ static void prune_one_dentry(struct dent
  * all the dentries are in use.
  */
  
-static void prune_dcache(int count, struct super_block *sb)
+static void prune_dcache(int count, struct super_block *sb, int prune_parents)
 {
 	spin_lock(&dcache_lock);
 	for (; count ; count--) {
@@ -464,7 +495,7 @@ static void prune_dcache(int count, stru
 		 * without taking the s_umount lock (I already hold it).
 		 */
 		if (sb && dentry->d_sb == sb) {
-			prune_one_dentry(dentry);
+			prune_one_dentry(dentry, prune_parents);
 			continue;
 		}
 		/*
@@ -479,7 +510,7 @@ static void prune_dcache(int count, stru
 		s_umount = &dentry->d_sb->s_umount;
 		if (down_read_trylock(s_umount)) {
 			if (dentry->d_sb->s_root != NULL) {
-				prune_one_dentry(dentry);
+				prune_one_dentry(dentry, prune_parents);
 				up_read(s_umount);
 				continue;
 			}
@@ -550,7 +581,7 @@ repeat:
 			spin_unlock(&dentry->d_lock);
 			continue;
 		}
-		prune_one_dentry(dentry);
+		prune_one_dentry(dentry, 1);
 		cond_resched_lock(&dcache_lock);
 		goto repeat;
 	}
@@ -829,7 +860,7 @@ void shrink_dcache_parent(struct dentry 
 	int found;
 
 	while ((found = select_parent(parent)) != 0)
-		prune_dcache(found, parent->d_sb);
+		prune_dcache(found, parent->d_sb, 1);
 }
 
 /*
@@ -849,7 +880,7 @@ static int shrink_dcache_memory(int nr, 
 	if (nr) {
 		if (!(gfp_mask & __GFP_FS))
 			return -1;
-		prune_dcache(nr, NULL);
+		prune_dcache(nr, NULL, 0);
 	}
 	return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
 }
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ