lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1322851407-17182-4-git-send-email-andi@firstfloor.org>
Date:	Fri,  2 Dec 2011 10:43:27 -0800
From:	Andi Kleen <andi@...stfloor.org>
To:	greg@...ah.com
Cc:	linux-kernel@...r.kernel.org, linux-fsdevel@...r.kernel.org,
	fengguang.wu@...el.com, Andi Kleen <ak@...ux.intel.com>
Subject: [PATCH 3/3] VFS: Add event counting to dcache

From: Andi Kleen <ak@...ux.intel.com>

Most self respecting subsystems -- like networking or MM -- have
own counter infrastructure these days. This is useful to understand
the behaviour of a running system. Counters are low enough
overhead that they can be always enabled.

This patch adds event counts to the dcache.

Instead of developing an own counter infrastructure for the VFS
I'm using generic counters implemented in debugfs.

Since we had problems with this recently I instrumented the dcache
RCU code. This is rather tricky code which is difficult to tune,
and some indication on why aborts happen is quite useful.

I'm especially interested in feedback on the placement of the event counters.

Signed-off-by: Andi Kleen <ak@...ux.intel.com>
---
 fs/namei.c |   42 ++++++++++++++++++++++++++++++++++++------
 1 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 5008f01..bfbe36a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -33,10 +33,22 @@
 #include <linux/device_cgroup.h>
 #include <linux/fs_struct.h>
 #include <linux/posix_acl.h>
+#include <linux/debugfs.h>
 #include <asm/uaccess.h>
 
 #include "internal.h"
 
+const char dname[] = "vfs/dcache";
+DEFINE_DEBUGFS_COUNTER(dcache_rcu_root_changed_abort, dname);
+DEFINE_DEBUGFS_COUNTER(dcache_rcu_dir_changed_abort, dname);
+DEFINE_DEBUGFS_COUNTER(dcache_rcu_entry_changed_abort, dname);
+DEFINE_DEBUGFS_COUNTER(dcache_rcu_permission_abort, dname);
+DEFINE_DEBUGFS_COUNTER(dcache_rcu_revalidate_abort, dname);
+DEFINE_DEBUGFS_COUNTER(dcache_ref_walks, dname);
+DEFINE_DEBUGFS_COUNTER(dcache_rcu_walks, dname);
+DEFINE_DEBUGFS_COUNTER(dcache_reval_walks, dname);
+
+
 /* [Feb-1997 T. Schoebel-Theuer]
  * Fundamental changes in the pathname lookup mechanisms (namei)
  * were necessary because of omirr.  The reason is that omirr needs
@@ -430,20 +442,26 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
 		want_root = 1;
 		spin_lock(&fs->lock);
 		if (nd->root.mnt != fs->root.mnt ||
-				nd->root.dentry != fs->root.dentry)
+		    nd->root.dentry != fs->root.dentry) {
+			debugfs_counter_inc(dcache_rcu_root_changed_abort);
 			goto err_root;
+		}
 	}
 	spin_lock(&parent->d_lock);
 	if (!dentry) {
-		if (!__d_rcu_to_refcount(parent, nd->seq))
+		if (!__d_rcu_to_refcount(parent, nd->seq)) {
+			debugfs_counter_inc(dcache_rcu_dir_changed_abort);
 			goto err_parent;
+		}
 		BUG_ON(nd->inode != parent->d_inode);
 	} else {
 		if (dentry->d_parent != parent)
 			goto err_parent;
 		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
-		if (!__d_rcu_to_refcount(dentry, nd->seq))
+		if (!__d_rcu_to_refcount(dentry, nd->seq)) {
+			debugfs_counter_inc(dcache_rcu_entry_changed_abort);
 			goto err_child;
+		}
 		/*
 		 * If the sequence check on the child dentry passed, then
 		 * the child has not been removed from its parent. This
@@ -474,6 +492,7 @@ err_parent:
 err_root:
 	if (want_root)
 		spin_unlock(&fs->lock);
+	debugfs_counter_inc(dcache_rcu_root_changed_abort);
 	return -ECHILD;
 }
 
@@ -522,6 +541,7 @@ static int complete_walk(struct nameidata *nd)
 			spin_unlock(&dentry->d_lock);
 			rcu_read_unlock();
 			br_read_unlock(vfsmount_lock);
+			debugfs_counter_inc(dcache_rcu_entry_changed_abort);
 			return -ECHILD;
 		}
 		BUG_ON(nd->inode != dentry->d_inode);
@@ -960,6 +980,7 @@ failed:
 		nd->root.mnt = NULL;
 	rcu_read_unlock();
 	br_read_unlock(vfsmount_lock);
+	debugfs_counter_inc(dcache_rcu_entry_changed_abort);
 	return -ECHILD;
 }
 
@@ -1132,8 +1153,10 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
 			goto unlazy;
 
 		/* Memory barrier in read_seqcount_begin of child is enough */
-		if (__read_seqcount_retry(&parent->d_seq, nd->seq))
+		if (__read_seqcount_retry(&parent->d_seq, nd->seq)) {
+			debugfs_counter_inc(dcache_rcu_dir_changed_abort);
 			return -ECHILD;
+		}
 		nd->seq = seq;
 
 		if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
@@ -1141,6 +1164,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
 			if (unlikely(status <= 0)) {
 				if (status != -ECHILD)
 					need_reval = 0;
+				debugfs_counter_inc(dcache_rcu_revalidate_abort);
 				goto unlazy;
 			}
 		}
@@ -1226,6 +1250,7 @@ static inline int may_lookup(struct nameidata *nd)
 		int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
 		if (err != -ECHILD)
 			return err;
+		debugfs_counter_inc(dcache_rcu_permission_abort);
 		if (unlazy_walk(nd, NULL))
 			return -ECHILD;
 	}
@@ -1643,10 +1668,15 @@ static int do_path_lookup(int dfd, const char *name,
 				unsigned int flags, struct nameidata *nd)
 {
 	int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
-	if (unlikely(retval == -ECHILD))
+	debugfs_counter_inc(dcache_rcu_walks);
+	if (unlikely(retval == -ECHILD)) {
+		debugfs_counter_inc(dcache_ref_walks);
 		retval = path_lookupat(dfd, name, flags, nd);
-	if (unlikely(retval == -ESTALE))
+	}
+	if (unlikely(retval == -ESTALE)) {
+		debugfs_counter_inc(dcache_reval_walks);
 		retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
+	}
 
 	if (likely(!retval)) {
 		if (unlikely(!audit_dummy_context())) {
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ