lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:	Mon, 18 May 2015 16:54:01 +1200
From:	Mark Tomlinson <mark.tomlinson@...iedtelesis.co.nz>
To:	linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org
Cc:	Mark Tomlinson <mark.tomlinson@...iedtelesis.co.nz>
Subject: [PATCH 1/1] JFFS2: Less locking when reading directory entries

At startup, reading a directory (for example to do an ls), requires finding
information on every file. To support JFFS2 recovery from partially written
blocks, the JFFS2 driver must scan all data blocks to verify the checksums
are correct just to be able to correctly report the length of a file. This
will take some time, and will be dependent on the amount of data on the
filesystem.

What makes this worse is that any path lookup will lock the dentry cache
to add the new entry. The JFFS2 driver then spends time finding the file
information (reading the entire file), before it returns the new dentry
information allowing the cache to be unlocked. During this time, no other
files in the same directory can be opened or even tested for existence.

However, there is no need for the dentry cache to be locked for the scan of
the file. The JFFS2 driver already locks the file, so the file will not be
deleted or modified. It also ensures that if another process tries to scan
the same file, the second process will be blocked and the scan only proceed
once.

To make the scan occur without locking the cache, a new vfs call has been
added which allows a filesystem to scan the file, but not return anything.
When the lookup occurs after this, the JFFS2 driver will find this
information and can quickly return the filled-in dentry.

Signed-off-by: Mark Tomlinson <mark.tomlinson@...iedtelesis.co.nz>
---
 fs/jffs2/dir.c     | 41 +++++++++++++++++++++++++++++------
 fs/namei.c         | 63 +++++++++++++++++++++++++++++++++++++++++++-----------
 include/linux/fs.h |  1 +
 3 files changed, 85 insertions(+), 20 deletions(-)

diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 1ba5c97..69c0ec4 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -36,6 +36,7 @@ static int jffs2_rmdir (struct inode *,struct dentry *);
 static int jffs2_mknod (struct inode *,struct dentry *,umode_t,dev_t);
 static int jffs2_rename (struct inode *, struct dentry *,
 			 struct inode *, struct dentry *);
+static void jffs2_prescan(struct inode *dir_i, struct qstr *d_name);
 
 const struct file_operations jffs2_dir_operations =
 {
@@ -51,6 +52,7 @@ const struct inode_operations jffs2_dir_inode_operations =
 {
 	.create =	jffs2_create,
 	.lookup =	jffs2_lookup,
+	.prescan =	jffs2_prescan,
 	.link =		jffs2_link,
 	.unlink =	jffs2_unlink,
 	.symlink =	jffs2_symlink,
@@ -74,8 +76,12 @@ const struct inode_operations jffs2_dir_inode_operations =
    and we use the same hash function as the dentries. Makes this
    nice and simple
 */
-static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
-				   unsigned int flags)
+/* The prescan function does not have a dentry to fill in, so create this common function
+ * which is just passed the name and the inode for the directory.
+ * This function is very similar to the original jffs2_lookup, except for the arguments
+ * and the fact that the dentry (now not passed) is not updated.
+ */
+static struct inode *jffs2_lookup_common(struct inode *dir_i, struct qstr *d_name)
 {
 	struct jffs2_inode_info *dir_f;
 	struct jffs2_full_dirent *fd = NULL, *fd_list;
@@ -84,7 +90,7 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
 
 	jffs2_dbg(1, "jffs2_lookup()\n");
 
-	if (target->d_name.len > JFFS2_MAX_NAME_LEN)
+	if (d_name->len > JFFS2_MAX_NAME_LEN)
 		return ERR_PTR(-ENAMETOOLONG);
 
 	dir_f = JFFS2_INODE_INFO(dir_i);
@@ -92,11 +98,11 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
 	mutex_lock(&dir_f->sem);
 
 	/* NB: The 2.2 backport will need to explicitly check for '.' and '..' here */
-	for (fd_list = dir_f->dents; fd_list && fd_list->nhash <= target->d_name.hash; fd_list = fd_list->next) {
-		if (fd_list->nhash == target->d_name.hash &&
+	for (fd_list = dir_f->dents; fd_list && fd_list->nhash <= d_name->hash; fd_list = fd_list->next) {
+		if (fd_list->nhash == d_name->hash &&
 		    (!fd || fd_list->version > fd->version) &&
-		    strlen(fd_list->name) == target->d_name.len &&
-		    !strncmp(fd_list->name, target->d_name.name, target->d_name.len)) {
+		    strlen(fd_list->name) == d_name->len &&
+		    !strncmp(fd_list->name, d_name->name, d_name->len)) {
 			fd = fd_list;
 		}
 	}
@@ -108,6 +114,27 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
 		if (IS_ERR(inode))
 			pr_warn("iget() failed for ino #%u\n", ino);
 	}
+	return inode;
+}
+
+/* Fill in an inode, and store the information in cache. This allows a
+ * subsequent jffs2_lookup() call to proceed quickly, which is useful
+ * since the jffs2_lookup() call will have the directory entry cache
+ * locked.
+ */
+static void jffs2_prescan(struct inode *dir_i, struct qstr *d_name)
+{
+	(void)jffs2_lookup_common(dir_i, d_name);
+}
+
+/* jffs2_lookup function has the same functionality as before,
+ * just refactored */
+static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
+				   unsigned int flags)
+{
+	struct inode *inode;
+
+	inode = jffs2_lookup_common(dir_i, &target->d_name);
 
 	return d_splice_alias(inode, target);
 }
diff --git a/fs/namei.c b/fs/namei.c
index fe30d3b..36a0d84 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1322,13 +1322,12 @@ static void follow_dotdot(struct nameidata *nd)
  *
  * dir->d_inode->i_mutex must be held
  */
-static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
-				    unsigned int flags, bool *need_lookup)
+static struct dentry *lookup_dcache_no_alloc(struct qstr *name, struct dentry *dir,
+                                             unsigned int flags)
 {
 	struct dentry *dentry;
 	int error;
 
-	*need_lookup = false;
 	dentry = d_lookup(dir, name);
 	if (dentry) {
 		if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
@@ -1345,6 +1344,16 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
 			}
 		}
 	}
+	return dentry;
+}
+
+static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
+				    unsigned int flags, bool *need_lookup)
+{
+	struct dentry *dentry;
+
+	*need_lookup = false;
+	dentry = lookup_dcache_no_alloc(name, dir, flags);
 
 	if (!dentry) {
 		dentry = d_alloc(dir, name);
@@ -1394,6 +1403,37 @@ static struct dentry *__lookup_hash(struct qstr *name,
 	return lookup_real(base->d_inode, dentry, flags);
 }
 
+static struct dentry *__lookup_hash_lock(struct nameidata *nd, unsigned int subclass)
+{
+	struct dentry *parent = nd->path.dentry;
+	struct inode *d_inode = parent->d_inode;
+	struct dentry *dentry;
+
+	if (d_inode->i_op->prescan) {
+		/* First, just try to find the dentry in the cache.
+		 * If it is not present, don't do anything.
+		 */
+		mutex_lock_nested(&d_inode->i_mutex, subclass);
+		dentry = lookup_dcache_no_alloc(&nd->last, parent, nd->flags);
+		if (likely(dentry)) {
+			return dentry;
+		}
+		mutex_unlock(&d_inode->i_mutex);
+
+		/* Not in cache. Warn filesystem layer that a lookup is coming.
+		 * This can be done without the directory's mutex held, since
+		 * no dentry is being filled in here.
+		 */
+		d_inode->i_op->prescan(d_inode, &nd->last);
+	}
+	/* Actually perform the lookup via the filesystem. The prescan
+	 * done above will hopefully ensure this is now a quick operation,
+	 * so the directory's mutex is not held for a long time.
+	 */
+	mutex_lock_nested(&d_inode->i_mutex, subclass);
+	return  __lookup_hash(&nd->last, parent, nd->flags);
+}
+
 /*
  *  It's more convoluted than I'd like it to be, but... it's still fairly
  *  small and for now I'd prefer to have fast path as straight as possible.
@@ -1505,9 +1545,9 @@ static int lookup_slow(struct nameidata *nd, struct path *path)
 	parent = nd->path.dentry;
 	BUG_ON(nd->inode != parent->d_inode);
 
-	mutex_lock(&parent->d_inode->i_mutex);
-	dentry = __lookup_hash(&nd->last, parent, nd->flags);
+	dentry = __lookup_hash_lock(nd, I_MUTEX_NORMAL);
 	mutex_unlock(&parent->d_inode->i_mutex);
+
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 	path->mnt = nd->path.mnt;
@@ -2068,8 +2108,8 @@ struct dentry *kern_path_locked(const char *name, struct path *path)
 		d = ERR_PTR(-EINVAL);
 		goto out;
 	}
-	mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
-	d = __lookup_hash(&nd.last, nd.path.dentry, 0);
+	nd.flags = 0;
+	d = __lookup_hash_lock(&nd, I_MUTEX_PARENT);
 	if (IS_ERR(d)) {
 		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 		path_put(&nd.path);
@@ -3355,8 +3395,7 @@ static struct dentry *filename_create(int dfd, struct filename *name,
 	/*
 	 * Do the final lookup.
 	 */
-	mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
-	dentry = lookup_hash(&nd);
+	dentry = __lookup_hash_lock(&nd, I_MUTEX_PARENT);
 	if (IS_ERR(dentry))
 		goto unlock;
 
@@ -3669,8 +3708,7 @@ retry:
 	if (error)
 		goto exit1;
 
-	mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
-	dentry = lookup_hash(&nd);
+	dentry = __lookup_hash_lock(&nd, I_MUTEX_PARENT);
 	error = PTR_ERR(dentry);
 	if (IS_ERR(dentry))
 		goto exit2;
@@ -3789,8 +3827,7 @@ retry:
 	if (error)
 		goto exit1;
 retry_deleg:
-	mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
-	dentry = lookup_hash(&nd);
+	dentry = __lookup_hash_lock(&nd, I_MUTEX_PARENT);
 	error = PTR_ERR(dentry);
 	if (!IS_ERR(dentry)) {
 		/* Why not before? Because we want correct error value */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 35ec87e..8d68867 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1639,6 +1639,7 @@ struct inode_operations {
 			   umode_t create_mode, int *opened);
 	int (*tmpfile) (struct inode *, struct dentry *, umode_t);
 	int (*set_acl)(struct inode *, struct posix_acl *, int);
+	void (*prescan) (struct inode *dir, struct qstr *name);
 
 	/* WARNING: probably going away soon, do not use! */
 	int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ