lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <2303960.1585684042@warthog.procyon.org.uk>
Date:   Tue, 31 Mar 2020 20:47:22 +0100
From:   David Howells <dhowells@...hat.com>
To:     Miklos Szeredi <miklos@...redi.hu>
Cc:     dhowells@...hat.com,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        Al Viro <viro@...iv.linux.org.uk>, dray@...hat.com,
        Karel Zak <kzak@...hat.com>,
        Miklos Szeredi <mszeredi@...hat.com>,
        Steven Whitehouse <swhiteho@...hat.com>,
        Jeff Layton <jlayton@...hat.com>, Ian Kent <raven@...maw.net>,
        andres@...razel.de,
        Christian Brauner <christian.brauner@...ntu.com>,
        Lennart Poettering <mzxreary@...inter.de>,
        keyrings@...r.kernel.org, linux-fsdevel@...r.kernel.org,
        linux-kernel@...r.kernel.org
Subject: Re: Upcoming: Notifications, FS notifications and fsinfo()

Miklos Szeredi <miklos@...redi.hu> wrote:

> Cool, thanks for testing.  Unfortunately the test-fsinfo-perf.c file
> didn't make it into the patch.   Can you please refresh and resend?

Oops - I forgot to add it.  See attached.

David
---
commit b7239021cb7660bf328bb7fcce05e3a35ce5842b
Author: David Howells <dhowells@...hat.com>
Date:   Tue Mar 31 14:39:07 2020 +0100

    Performance test Miklós's patch vs fsinfo

diff --git a/fs/Makefile b/fs/Makefile
index b6bf2424c7f7..ac0627176db1 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -137,3 +137,4 @@ obj-$(CONFIG_EFIVAR_FS)		+= efivarfs/
 obj-$(CONFIG_EROFS_FS)		+= erofs/
 obj-$(CONFIG_VBOXSF_FS)		+= vboxsf/
 obj-$(CONFIG_ZONEFS_FS)		+= zonefs/
+obj-y				+= mountfs/
diff --git a/fs/mount.h b/fs/mount.h
index 063f41bc2e93..89b091fc482f 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -82,6 +82,7 @@ struct mount {
 	atomic_t mnt_subtree_notifications;	/* Number of notifications in subtree */
 	struct watch_list *mnt_watchers; /* Watches on dentries within this mount */
 #endif
+	struct mountfs_entry *mnt_mountfs_entry;
 } __randomize_layout;
 
 #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
@@ -177,3 +178,11 @@ static inline void notify_mount(struct mount *triggered,
 {
 }
 #endif
+
+void mnt_namespace_lock_read(void);
+void mnt_namespace_unlock_read(void);
+
+void mountfs_create(struct mount *mnt);
+extern void mountfs_remove(struct mount *mnt);
+int mountfs_lookup_internal(struct vfsmount *m, struct path *path);
+
diff --git a/fs/mountfs/Makefile b/fs/mountfs/Makefile
new file mode 100644
index 000000000000..35a65e9a966f
--- /dev/null
+++ b/fs/mountfs/Makefile
@@ -0,0 +1 @@
+obj-y				+= super.o
diff --git a/fs/mountfs/super.c b/fs/mountfs/super.c
new file mode 100644
index 000000000000..82c01eb6154d
--- /dev/null
+++ b/fs/mountfs/super.c
@@ -0,0 +1,502 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "../pnode.h"
+#include <linux/fs.h>
+#include <linux/kref.h>
+#include <linux/nsproxy.h>
+#include <linux/fs_struct.h>
+#include <linux/fs_context.h>
+
+#define MOUNTFS_SUPER_MAGIC 0x4e756f4d
+
+static DEFINE_SPINLOCK(mountfs_lock);
+static struct rb_root mountfs_entries = RB_ROOT;
+static struct vfsmount *mountfs_mnt __read_mostly;
+
+struct mountfs_entry {
+	struct kref kref;
+	struct mount *mnt;
+	struct rb_node node;
+	int id;
+};
+
+static const char *mountfs_attrs[] = {
+	"root", "mountpoint", "id", "parent", "options", "children",
+	"group", "master", "propagate_from"
+};
+
+#define MOUNTFS_INO(id) (((unsigned long) id + 1) * \
+			 (ARRAY_SIZE(mountfs_attrs) + 1))
+
+void mountfs_entry_release(struct kref *kref)
+{
+	kfree(container_of(kref, struct mountfs_entry, kref));
+}
+
+void mountfs_entry_put(struct mountfs_entry *entry)
+{
+	kref_put(&entry->kref, mountfs_entry_release);
+}
+
+static bool mountfs_entry_visible(struct mountfs_entry *entry)
+{
+	struct mount *mnt;
+	bool visible = false;
+
+	rcu_read_lock();
+	mnt = rcu_dereference(entry->mnt);
+	if (mnt && mnt->mnt_ns == current->nsproxy->mnt_ns)
+		visible = true;
+	rcu_read_unlock();
+
+	return visible;
+}
+static int mountfs_attr_show(struct seq_file *sf, void *v)
+{
+	const char *name = sf->file->f_path.dentry->d_name.name;
+	struct mountfs_entry *entry = sf->private;
+	struct mount *mnt;
+	struct vfsmount *m;
+	struct super_block *sb;
+	struct path root;
+	int tmp, err = -ENODEV;
+
+	mnt_namespace_lock_read();
+
+	mnt = entry->mnt;
+	if (!mnt || !mnt->mnt_ns)
+		goto out;
+
+	err = 0;
+	m = &mnt->mnt;
+	sb = m->mnt_sb;
+
+	if (strcmp(name, "root") == 0) {
+		if (sb->s_op->show_path) {
+			err = sb->s_op->show_path(sf, m->mnt_root);
+		} else {
+			seq_dentry(sf, m->mnt_root, " \t\n\\");
+		}
+		seq_putc(sf, '\n');
+	} else if (strcmp(name, "mountpoint") == 0) {
+		struct path mnt_path = { .dentry = m->mnt_root, .mnt = m };
+
+		get_fs_root(current->fs, &root);
+		err = seq_path_root(sf, &mnt_path, &root, " \t\n\\");
+		if (err == SEQ_SKIP) {
+			seq_puts(sf, "(unreachable)");
+			err = 0;
+		}
+		seq_putc(sf, '\n');
+		path_put(&root);
+	} else if (strcmp(name, "id") == 0) {
+		seq_printf(sf, "%i\n", mnt->mnt_id);
+	} else if (strcmp(name, "parent") == 0) {
+		tmp = rcu_dereference(mnt->mnt_parent)->mnt_id;
+		seq_printf(sf, "%i\n", tmp);
+	} else if (strcmp(name, "options") == 0) {
+		int mnt_flags = READ_ONCE(m->mnt_flags);
+
+		seq_puts(sf, mnt_flags & MNT_READONLY ? "ro" : "rw");
+		seq_mnt_opts(sf, mnt_flags);
+		seq_putc(sf, '\n');
+	} else if (strcmp(name, "children") == 0) {
+		struct mount *child;
+		bool first = true;
+
+		list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
+			if (!first)
+				seq_putc(sf, ',');
+			else
+				first = false;
+			seq_printf(sf, "%i", child->mnt_id);
+		}
+		if (!first)
+			seq_putc(sf, '\n');
+	} else if (strcmp(name, "group") == 0) {
+		if (IS_MNT_SHARED(mnt))
+			seq_printf(sf, "%i\n", mnt->mnt_group_id);
+	} else if (strcmp(name, "master") == 0) {
+		if (IS_MNT_SLAVE(mnt)) {
+			tmp = rcu_dereference(mnt->mnt_master)->mnt_group_id;
+			seq_printf(sf, "%i\n", tmp);
+		}
+	} else if (strcmp(name, "propagate_from") == 0) {
+		if (IS_MNT_SLAVE(mnt)) {
+			get_fs_root(current->fs, &root);
+			tmp = get_dominating_id(mnt, &root);
+			if (tmp)
+				seq_printf(sf, "%i\n", tmp);
+		}
+	} else {
+		WARN_ON(1);
+		err = -EIO;
+	}
+out:
+	mnt_namespace_unlock_read();
+
+	return err;
+}
+
+static int mountfs_attr_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mountfs_attr_show, inode->i_private);
+}
+
+static const struct file_operations mountfs_attr_fops = {
+	.open		= mountfs_attr_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static struct mountfs_entry *mountfs_node_to_entry(struct rb_node *node)
+{
+	return rb_entry(node, struct mountfs_entry, node);
+}
+
+static struct rb_node **mountfs_find_node(int id, struct rb_node **parent)
+{
+	struct rb_node **link = &mountfs_entries.rb_node;
+
+	*parent = NULL;
+	while (*link) {
+		struct mountfs_entry *entry = mountfs_node_to_entry(*link);
+
+		*parent = *link;
+		if (id < entry->id)
+			link = &entry->node.rb_left;
+		else if (id > entry->id)
+			link = &entry->node.rb_right;
+		else
+			break;
+	}
+	return link;
+}
+
+void mountfs_create(struct mount *mnt)
+{
+	struct mountfs_entry *entry;
+	struct rb_node **link, *parent;
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		WARN(1, "failed to allocate mountfs entry");
+		return;
+	}
+	kref_init(&entry->kref);
+	entry->mnt = mnt;
+	entry->id = mnt->mnt_id;
+
+	spin_lock(&mountfs_lock);
+	link = mountfs_find_node(entry->id, &parent);
+	if (!WARN_ON(*link)) {
+		rb_link_node(&entry->node, parent, link);
+		rb_insert_color(&entry->node, &mountfs_entries);
+		mnt->mnt_mountfs_entry = entry;
+	} else {
+		kfree(entry);
+	}
+	spin_unlock(&mountfs_lock);
+}
+
+void mountfs_remove(struct mount *mnt)
+{
+	struct mountfs_entry *entry = mnt->mnt_mountfs_entry;
+
+	if (!entry)
+		return;
+	spin_lock(&mountfs_lock);
+	entry->mnt = NULL;
+	rb_erase(&entry->node, &mountfs_entries);
+	spin_unlock(&mountfs_lock);
+
+	mountfs_entry_put(entry);
+
+	mnt->mnt_mountfs_entry = NULL;
+}
+
+static struct mountfs_entry *mountfs_get_entry(const char *name)
+{
+	struct mountfs_entry *entry = NULL;
+	struct rb_node **link, *dummy;
+	unsigned long mnt_id;
+	char buf[32];
+	int ret;
+
+	ret = kstrtoul(name, 10, &mnt_id);
+	if (ret || mnt_id > INT_MAX)
+		return NULL;
+
+	snprintf(buf, sizeof(buf), "%lu", mnt_id);
+	if (strcmp(buf, name) != 0)
+		return NULL;
+
+	spin_lock(&mountfs_lock);
+	link = mountfs_find_node(mnt_id, &dummy);
+	if (*link) {
+		entry = mountfs_node_to_entry(*link);
+		if (!mountfs_entry_visible(entry))
+			entry = NULL;
+		else
+			kref_get(&entry->kref);
+	}
+	spin_unlock(&mountfs_lock);
+
+	return entry;
+}
+
+static void mountfs_init_inode(struct inode *inode, umode_t mode);
+
+static struct dentry *mountfs_lookup_entry(struct dentry *dentry,
+					   struct mountfs_entry *entry,
+					   int idx)
+{
+	struct inode *inode;
+
+	inode = new_inode(dentry->d_sb);
+	if (!inode) {
+		mountfs_entry_put(entry);
+		return ERR_PTR(-ENOMEM);
+	}
+	inode->i_private = entry;
+	inode->i_ino = MOUNTFS_INO(entry->id) + idx;
+	mountfs_init_inode(inode, idx ? S_IFREG | 0444 : S_IFDIR | 0555);
+	return d_splice_alias(inode, dentry);
+
+}
+
+static struct dentry *mountfs_lookup(struct inode *dir, struct dentry *dentry,
+				     unsigned int flags)
+{
+	struct mountfs_entry *entry = dir->i_private;
+	int i = 0;
+
+	if (entry) {
+		for (i = 0; i < ARRAY_SIZE(mountfs_attrs); i++)
+			if (strcmp(mountfs_attrs[i], dentry->d_name.name) == 0)
+				break;
+		if (i == ARRAY_SIZE(mountfs_attrs))
+			return ERR_PTR(-ENOMEM);
+		i++;
+		kref_get(&entry->kref);
+	} else {
+		entry = mountfs_get_entry(dentry->d_name.name);
+		if (!entry)
+			return ERR_PTR(-ENOENT);
+	}
+
+	return mountfs_lookup_entry(dentry, entry, i);
+}
+
+static int mountfs_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+	struct mountfs_entry *entry = dentry->d_inode->i_private;
+
+	/* root: valid */
+	if (!entry)
+		return 1;
+
+	/* removed: invalid */
+	if (!entry->mnt)
+		return 0;
+
+	/* attribute or visible in this namespace: valid */
+	if (!d_can_lookup(dentry) || mountfs_entry_visible(entry))
+		return 1;
+
+	/* invlisible in this namespace: valid but deny entry*/
+	return -ENOENT;
+}
+
+static int mountfs_readdir(struct file *file, struct dir_context *ctx)
+{
+	struct rb_node *node;
+	struct mountfs_entry *entry = file_inode(file)->i_private;
+	char name[32];
+	const char *s;
+	unsigned int len, pos, id;
+
+	if (ctx->pos - 2 > INT_MAX || !dir_emit_dots(file, ctx))
+		return 0;
+
+	if (entry) {
+		while (ctx->pos - 2 < ARRAY_SIZE(mountfs_attrs)) {
+			s = mountfs_attrs[ctx->pos - 2];
+			if (!dir_emit(ctx, s, strlen(s),
+				      MOUNTFS_INO(entry->id) + ctx->pos,
+				      DT_REG))
+				break;
+			ctx->pos++;
+		}
+		return 0;
+	}
+
+	pos = ctx->pos - 2;
+	do {
+		spin_lock(&mountfs_lock);
+		mountfs_find_node(pos, &node);
+		pos = 1U + INT_MAX;
+		do {
+			if (!node) {
+				spin_unlock(&mountfs_lock);
+				goto out;
+			}
+			entry = mountfs_node_to_entry(node);
+			node = rb_next(node);
+		} while (!mountfs_entry_visible(entry));
+		if (node)
+			pos = mountfs_node_to_entry(node)->id;
+		id = entry->id;
+		spin_unlock(&mountfs_lock);
+
+		len = snprintf(name, sizeof(name), "%i", id);
+		ctx->pos = id + 2;
+		if (!dir_emit(ctx, name, len, MOUNTFS_INO(id), DT_DIR))
+			return 0;
+	} while (pos <= INT_MAX);
+out:
+	ctx->pos =  pos + 2;
+	return 0;
+}
+
+int mountfs_lookup_internal(struct vfsmount *m, struct path *path)
+{
+	char name[32];
+	struct qstr this = { .name = name };
+	struct mount *mnt = real_mount(m);
+	struct mountfs_entry *entry = mnt->mnt_mountfs_entry;
+	struct dentry *dentry, *old, *root = mountfs_mnt->mnt_root;
+
+	this.len = snprintf(name, sizeof(name), "%i", mnt->mnt_id);
+	dentry = d_hash_and_lookup(root, &this);
+	if (dentry && dentry->d_inode->i_private != entry) {
+		d_invalidate(dentry);
+		dput(dentry);
+		dentry = NULL;
+	}
+	if (!dentry) {
+		dentry = d_alloc(root, &this);
+		if (!dentry)
+			return -ENOMEM;
+
+		kref_get(&entry->kref);
+		old = mountfs_lookup_entry(dentry, entry, 0);
+		if (old) {
+			dput(dentry);
+			if (IS_ERR(old))
+				return PTR_ERR(old);
+			dentry = old;
+		}
+	}
+
+	*path = (struct path) { .mnt = mountfs_mnt, .dentry = dentry };
+	return 0;
+}
+
+static const struct dentry_operations mountfs_dops = {
+	.d_revalidate = mountfs_d_revalidate,
+};
+
+static const struct inode_operations mountfs_iops = {
+	.lookup = mountfs_lookup,
+};
+
+static const struct file_operations mountfs_fops = {
+	.iterate_shared = mountfs_readdir,
+	.read = generic_read_dir,
+	.llseek = generic_file_llseek,
+};
+
+static void mountfs_init_inode(struct inode *inode, umode_t mode)
+{
+	inode->i_mode = mode;
+	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+	if (S_ISREG(mode)) {
+		inode->i_size = PAGE_SIZE;
+		inode->i_fop = &mountfs_attr_fops;
+	} else {
+		inode->i_op = &mountfs_iops;
+		inode->i_fop = &mountfs_fops;
+	}
+}
+
+static void mountfs_evict_inode(struct inode *inode)
+{
+	struct mountfs_entry *entry = inode->i_private;
+
+	clear_inode(inode);
+	if (entry)
+		mountfs_entry_put(entry);
+}
+
+static const struct super_operations mountfs_sops = {
+	.statfs		= simple_statfs,
+	.drop_inode	= generic_delete_inode,
+	.evict_inode	= mountfs_evict_inode,
+};
+
+static int mountfs_fill_super(struct super_block *sb, struct fs_context *fc)
+{
+	struct inode *root;
+
+	sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
+	sb->s_magic = MOUNTFS_SUPER_MAGIC;
+	sb->s_time_gran = 1;
+	sb->s_shrink.seeks = 0;
+	sb->s_op = &mountfs_sops;
+	sb->s_d_op = &mountfs_dops;
+
+	root = new_inode(sb);
+	if (!root)
+		return -ENOMEM;
+
+	root->i_ino = 1;
+	mountfs_init_inode(root, S_IFDIR | 0444);
+
+	sb->s_root = d_make_root(root);
+	if (!sb->s_root)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int mountfs_get_tree(struct fs_context *fc)
+{
+	return get_tree_single(fc, mountfs_fill_super);
+}
+
+static const struct fs_context_operations mountfs_context_ops = {
+	.get_tree = mountfs_get_tree,
+};
+
+static int mountfs_init_fs_context(struct fs_context *fc)
+{
+	fc->ops = &mountfs_context_ops;
+	fc->global = true;
+	return 0;
+}
+
+static struct file_system_type mountfs_fs_type = {
+	.name = "mountfs",
+	.init_fs_context = mountfs_init_fs_context,
+	.kill_sb = kill_anon_super,
+};
+
+static int __init mountfs_init(void)
+{
+	int err;
+
+	err = register_filesystem(&mountfs_fs_type);
+	if (!err) {
+		mountfs_mnt = kern_mount(&mountfs_fs_type);
+		if (IS_ERR(mountfs_mnt)) {
+			err = PTR_ERR(mountfs_mnt);
+			unregister_filesystem(&mountfs_fs_type);
+		}
+	}
+	return err;
+}
+fs_initcall(mountfs_init);
diff --git a/fs/namespace.c b/fs/namespace.c
index 5427e732c1bf..a05a2885a90e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -962,6 +962,8 @@ struct vfsmount *vfs_create_mount(struct fs_context *fc)
 
 	if (fc->sb_flags & SB_KERNMOUNT)
 		mnt->mnt.mnt_flags = MNT_INTERNAL;
+	else
+		mountfs_create(mnt);
 
 	atomic_inc(&fc->root->d_sb->s_active);
 	mnt->mnt.mnt_sb		= fc->root->d_sb;
@@ -1033,7 +1035,7 @@ vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
 }
 EXPORT_SYMBOL_GPL(vfs_submount);
 
-static struct mount *clone_mnt(struct mount *old, struct dentry *root,
+static struct mount *clone_mnt_common(struct mount *old, struct dentry *root,
 					int flag)
 {
 	struct super_block *sb = old->mnt.mnt_sb;
@@ -1100,6 +1102,17 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 	return ERR_PTR(err);
 }
 
+static struct mount *clone_mnt(struct mount *old, struct dentry *root,
+			       int flag)
+{
+	struct mount *mnt = clone_mnt_common(old, root, flag);
+
+	if (!IS_ERR(mnt))
+		mountfs_create(mnt);
+
+	return mnt;
+}
+
 static void cleanup_mnt(struct mount *mnt)
 {
 	struct hlist_node *p;
@@ -1112,6 +1125,7 @@ static void cleanup_mnt(struct mount *mnt)
 	 * so mnt_get_writers() below is safe.
 	 */
 	WARN_ON(mnt_get_writers(mnt));
+
 	if (unlikely(mnt->mnt_pins.first))
 		mnt_pin_kill(mnt);
 	hlist_for_each_entry_safe(m, p, &mnt->mnt_stuck_children, mnt_umount) {
@@ -1197,6 +1211,8 @@ static void mntput_no_expire(struct mount *mnt)
 	unlock_mount_hash();
 	shrink_dentry_list(&list);
 
+	mountfs_remove(mnt);
+
 	if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
 		struct task_struct *task = current;
 		if (likely(!(task->flags & PF_KTHREAD))) {
@@ -1263,13 +1279,14 @@ EXPORT_SYMBOL(path_is_mountpoint);
 struct vfsmount *mnt_clone_internal(const struct path *path)
 {
 	struct mount *p;
-	p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
+	p = clone_mnt_common(real_mount(path->mnt), path->dentry, CL_PRIVATE);
 	if (IS_ERR(p))
 		return ERR_CAST(p);
 	p->mnt.mnt_flags |= MNT_INTERNAL;
 	return &p->mnt;
 }
 
+
 #ifdef CONFIG_PROC_FS
 /* iterator; we want it to have access to namespace_sem, thus here... */
 static void *m_start(struct seq_file *m, loff_t *pos)
@@ -1411,6 +1428,16 @@ static inline void namespace_lock(void)
 	down_write(&namespace_sem);
 }
 
+void mnt_namespace_lock_read(void)
+{
+	down_read(&namespace_sem);
+}
+
+void mnt_namespace_unlock_read(void)
+{
+	up_read(&namespace_sem);
+}
+
 enum umount_tree_flags {
 	UMOUNT_SYNC = 1,
 	UMOUNT_PROPAGATE = 2,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c7c64272b0fa..0477f8b51182 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3092,6 +3092,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	DIR("fd",         S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
 	DIR("map_files",  S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
 	DIR("fdinfo",     S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
+	DIR("fdmount",    S_IRUSR|S_IXUSR, proc_fdmount_inode_operations, proc_fdmount_operations),
 	DIR("ns",	  S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
 #ifdef CONFIG_NET
 	DIR("net",        S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
@@ -3497,6 +3498,7 @@ static const struct inode_operations proc_tid_comm_inode_operations = {
 static const struct pid_entry tid_base_stuff[] = {
 	DIR("fd",        S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
 	DIR("fdinfo",    S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
+	DIR("fdmount",   S_IRUSR|S_IXUSR, proc_fdmount_inode_operations, proc_fdmount_operations),
 	DIR("ns",	 S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
 #ifdef CONFIG_NET
 	DIR("net",        S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 81882a13212d..94a57e178801 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -361,3 +361,85 @@ const struct file_operations proc_fdinfo_operations = {
 	.iterate_shared	= proc_readfdinfo,
 	.llseek		= generic_file_llseek,
 };
+
+static int proc_fdmount_link(struct dentry *dentry, struct path *path)
+{
+	struct files_struct *files = NULL;
+	struct task_struct *task;
+	struct path fd_path;
+	int ret = -ENOENT;
+
+	task = get_proc_task(d_inode(dentry));
+	if (task) {
+		files = get_files_struct(task);
+		put_task_struct(task);
+	}
+
+	if (files) {
+		unsigned int fd = proc_fd(d_inode(dentry));
+		struct file *fd_file;
+
+		spin_lock(&files->file_lock);
+		fd_file = fcheck_files(files, fd);
+		if (fd_file) {
+			fd_path = fd_file->f_path;
+			path_get(&fd_path);
+			ret = 0;
+		}
+		spin_unlock(&files->file_lock);
+		put_files_struct(files);
+	}
+	if (!ret) {
+		ret = mountfs_lookup_internal(fd_path.mnt, path);
+		path_put(&fd_path);
+	}
+
+	return ret;
+}
+
+static struct dentry *proc_fdmount_instantiate(struct dentry *dentry,
+	struct task_struct *task, const void *ptr)
+{
+	const struct fd_data *data = ptr;
+	struct proc_inode *ei;
+	struct inode *inode;
+
+	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK | 0400);
+	if (!inode)
+		return ERR_PTR(-ENOENT);
+
+	ei = PROC_I(inode);
+	ei->fd = data->fd;
+
+	inode->i_op = &proc_pid_link_inode_operations;
+	inode->i_size = 64;
+
+	ei->op.proc_get_link = proc_fdmount_link;
+	tid_fd_update_inode(task, inode, 0);
+
+	d_set_d_op(dentry, &tid_fd_dentry_operations);
+	return d_splice_alias(inode, dentry);
+}
+
+static struct dentry *
+proc_lookupfdmount(struct inode *dir, struct dentry *dentry, unsigned int flags)
+{
+	return proc_lookupfd_common(dir, dentry, proc_fdmount_instantiate);
+}
+
+static int proc_readfdmount(struct file *file, struct dir_context *ctx)
+{
+	return proc_readfd_common(file, ctx,
+				  proc_fdmount_instantiate);
+}
+
+const struct inode_operations proc_fdmount_inode_operations = {
+	.lookup		= proc_lookupfdmount,
+	.setattr	= proc_setattr,
+};
+
+const struct file_operations proc_fdmount_operations = {
+	.read		= generic_read_dir,
+	.iterate_shared	= proc_readfdmount,
+	.llseek		= generic_file_llseek,
+};
diff --git a/fs/proc/fd.h b/fs/proc/fd.h
index f371a602bf58..9e087c833e65 100644
--- a/fs/proc/fd.h
+++ b/fs/proc/fd.h
@@ -10,6 +10,9 @@ extern const struct inode_operations proc_fd_inode_operations;
 extern const struct file_operations proc_fdinfo_operations;
 extern const struct inode_operations proc_fdinfo_inode_operations;
 
+extern const struct file_operations proc_fdmount_operations;
+extern const struct inode_operations proc_fdmount_inode_operations;
+
 extern int proc_fd_permission(struct inode *inode, int mask);
 
 static inline unsigned int proc_fd(struct inode *inode)
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 273ee82d8aa9..e634faa9160e 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -61,24 +61,6 @@ static int show_sb_opts(struct seq_file *m, struct super_block *sb)
 	return security_sb_show_options(m, sb);
 }
 
-static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
-{
-	static const struct proc_fs_info mnt_info[] = {
-		{ MNT_NOSUID, ",nosuid" },
-		{ MNT_NODEV, ",nodev" },
-		{ MNT_NOEXEC, ",noexec" },
-		{ MNT_NOATIME, ",noatime" },
-		{ MNT_NODIRATIME, ",nodiratime" },
-		{ MNT_RELATIME, ",relatime" },
-		{ 0, NULL }
-	};
-	const struct proc_fs_info *fs_infop;
-
-	for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
-		if (mnt->mnt_flags & fs_infop->flag)
-			seq_puts(m, fs_infop->str);
-	}
-}
 
 static inline void mangle(struct seq_file *m, const char *s)
 {
@@ -120,7 +102,7 @@ static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
 	err = show_sb_opts(m, sb);
 	if (err)
 		goto out;
-	show_mnt_opts(m, mnt);
+	seq_mnt_opts(m, mnt->mnt_flags);
 	if (sb->s_op->show_options)
 		err = sb->s_op->show_options(m, mnt_path.dentry);
 	seq_puts(m, " 0 0\n");
@@ -153,7 +135,7 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
 		goto out;
 
 	seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
-	show_mnt_opts(m, mnt);
+	seq_mnt_opts(m, mnt->mnt_flags);
 
 	/* Tagged fields ("foo:X" or "bar") */
 	if (IS_MNT_SHARED(r))
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 1600034a929b..9726baba1732 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -15,6 +15,7 @@
 #include <linux/cred.h>
 #include <linux/mm.h>
 #include <linux/printk.h>
+#include <linux/mount.h>
 #include <linux/string_helpers.h>
 
 #include <linux/uaccess.h>
@@ -548,6 +549,28 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, const char *esc)
 }
 EXPORT_SYMBOL(seq_dentry);
 
+void seq_mnt_opts(struct seq_file *m, int mnt_flags)
+{
+	unsigned int i;
+	static const struct {
+		int flag;
+		const char *str;
+	} mnt_info[] = {
+		{ MNT_NOSUID, ",nosuid" },
+		{ MNT_NODEV, ",nodev" },
+		{ MNT_NOEXEC, ",noexec" },
+		{ MNT_NOATIME, ",noatime" },
+		{ MNT_NODIRATIME, ",nodiratime" },
+		{ MNT_RELATIME, ",relatime" },
+		{ 0, NULL }
+	};
+
+	for (i = 0; mnt_info[i].flag; i++) {
+		if (mnt_flags & mnt_info[i].flag)
+			seq_puts(m, mnt_info[i].str);
+	}
+}
+
 static void *single_start(struct seq_file *p, loff_t *pos)
 {
 	return NULL + (*pos == 0);
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 770c2bf3aa43..9dd7812eb777 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -138,6 +138,7 @@ int seq_file_path(struct seq_file *, struct file *, const char *);
 int seq_dentry(struct seq_file *, struct dentry *, const char *);
 int seq_path_root(struct seq_file *m, const struct path *path,
 		  const struct path *root, const char *esc);
+void seq_mnt_opts(struct seq_file *m, int mnt_flags);
 
 int single_open(struct file *, int (*)(struct seq_file *, void *), void *);
 int single_open_size(struct file *, int (*)(struct seq_file *, void *), void *, size_t);
diff --git a/samples/vfs/Makefile b/samples/vfs/Makefile
index 19be60ab950e..78deb8483d27 100644
--- a/samples/vfs/Makefile
+++ b/samples/vfs/Makefile
@@ -4,6 +4,7 @@
 hostprogs := \
 	test-fsinfo \
 	test-fsmount \
+	test-fsinfo-perf \
 	test-mntinfo \
 	test-statx
 
@@ -12,6 +13,7 @@ always-y := $(hostprogs)
 HOSTCFLAGS_test-fsinfo.o += -I$(objtree)/usr/include
 HOSTLDLIBS_test-fsinfo += -static -lm
 HOSTCFLAGS_test-mntinfo.o += -I$(objtree)/usr/include
+HOSTCFLAGS_test-fsinfo-perf.o += -I$(objtree)/usr/include
 
 HOSTCFLAGS_test-fsmount.o += -I$(objtree)/usr/include
 HOSTCFLAGS_test-statx.o += -I$(objtree)/usr/include
diff --git a/samples/vfs/test-fsinfo-perf.c b/samples/vfs/test-fsinfo-perf.c
new file mode 100644
index 000000000000..fba40737f768
--- /dev/null
+++ b/samples/vfs/test-fsinfo-perf.c
@@ -0,0 +1,361 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Test the fsinfo() system call
+ *
+ * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@...hat.com)
+ */
+
+#define _GNU_SOURCE
+#define _ATFILE_SOURCE
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <errno.h>
+#include <time.h>
+#include <math.h>
+#include <fcntl.h>
+#include <sys/syscall.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/time.h>
+#include <linux/fsinfo.h>
+
+#ifndef __NR_fsinfo
+#define __NR_fsinfo -1
+#endif
+
+#define ERR(ret, what) do { if ((long)(ret) == -1) { perror(what); exit(1); } } while(0)
+#define OOM(ret) do { if (!(ret)) { perror(NULL); exit(1); } } while(0)
+
+static int nr_mounts = 3;
+static const char *base_path;
+
+static __attribute__((unused))
+ssize_t fsinfo(int dfd, const char *filename,
+	       struct fsinfo_params *params, size_t params_size,
+	       void *result_buffer, size_t result_buf_size)
+{
+	return syscall(__NR_fsinfo, dfd, filename,
+		       params, params_size,
+		       result_buffer, result_buf_size);
+}
+
+static void iterate(void (*func)(int i, const char *))
+{
+	char name[4096];
+	int i;
+
+	for (i = 0; i < nr_mounts; i++) {
+		sprintf(name, "%s/%d", base_path, i);
+		func(i, name);
+	}
+}
+
+static void make_mount(int ix, const char *path)
+{
+	ERR(mkdir(path, 0755), "mkdir");
+	ERR(mount("none", path, "tmpfs", 0, NULL), "mount");
+	ERR(mount("none", path, NULL, MS_PRIVATE, NULL), "mount");
+}
+
+static void do_umount(void)
+{
+	printf("--- umount ---\n");
+	if (umount2(base_path, MNT_DETACH) == -1)
+		perror("umount");
+}
+
+static unsigned long sum_mnt_id;
+
+static void get_mntid_by_fsinfo(int ix, const char *path)
+{
+	struct fsinfo_mount_info r;
+	struct fsinfo_params params = {
+		.flags		= FSINFO_FLAGS_QUERY_PATH,
+		.request	= FSINFO_ATTR_MOUNT_INFO,
+	};
+
+	ERR(fsinfo(AT_FDCWD, path, &params, sizeof(params), &r, sizeof(r)),
+	    "fsinfo");
+	//printf("[%u] %u\n", ix, r.mnt_id);
+	sum_mnt_id += r.mnt_id;
+}
+
+static void get_mntid_by_proc(int ix, const char *path)
+{
+	unsigned int mnt_id;
+	ssize_t len;
+	char procfile[100], buffer[4096], *p, *nl;
+	int fd, fd2;
+
+	fd = open(path, O_PATH);
+	ERR(fd, "open/path");
+	sprintf(procfile, "/proc/self/fdinfo/%u", fd);
+	fd2 = open(procfile, O_RDONLY);
+	ERR(fd2, "open/proc");
+	len = read(fd2, buffer, sizeof(buffer) - 1);
+	ERR(len, "read");
+	buffer[len] = 0;
+	close(fd2);
+	close(fd);
+
+	p = buffer;
+	do {
+		nl = strchr(p, '\n');
+		if (nl)
+			*nl++ = '\0';
+		else
+			nl = NULL;
+
+		if (strncmp(p, "mnt_id:", 7) != 0)
+			continue;
+		p += 7;
+		while (isblank(*p))
+			p++;
+		/* Have to allow for extra numbers being added to the line */
+		if (sscanf(p, "%u", &mnt_id) != 1) {
+			fprintf(stderr, "Bad format %s\n", procfile);
+			exit(3);
+		}
+		break;
+
+	} while ((p = nl));
+
+	if (!p) {
+		fprintf(stderr, "Missing field %s\n", procfile);
+		exit(3);
+	}
+
+	sum_mnt_id += mnt_id;
+	//printf("[%u] %u\n", ix, mnt_id);
+}
+
+static void get_mntid_by_fsinfo_2(void)
+{
+	struct fsinfo_mount_child *children, *c, *end;
+	struct fsinfo_mount_info r;
+	struct fsinfo_params params = {
+		.flags		= FSINFO_FLAGS_QUERY_PATH,
+		.request	= FSINFO_ATTR_MOUNT_INFO,
+	};
+	unsigned int base_mnt_id;
+	size_t s_children, n_children;
+	char name[32];
+	int i;
+
+	/* Convert path to mount ID */
+	ERR(fsinfo(AT_FDCWD, base_path, &params, sizeof(params), &r, sizeof(r)),
+	    "fsinfo/base");
+	base_mnt_id = r.mnt_id;
+	//printf("[B] %u\n", base_mnt_id);
+
+	/* Get a list of all the children of this mount ID */
+	s_children = (nr_mounts + 1) * sizeof(*children);
+	children = malloc(s_children);
+	OOM(children);
+
+	params.flags	= FSINFO_FLAGS_QUERY_MOUNT;
+	params.request	= FSINFO_ATTR_MOUNT_CHILDREN;
+	sprintf(name, "%u", base_mnt_id);
+	s_children = fsinfo(AT_FDCWD, name, &params, sizeof(params), children, s_children);
+	ERR(s_children, "fsinfo/children");
+
+	/* Query each child */
+	n_children = s_children / sizeof(*c) - 1; // Parent is added at end
+	c = children;
+	end = c + n_children;
+	for (i = 0; c < end; c++, i++) {
+		//printf("[%u] %u\n", i, c->mnt_id);
+		params.flags	= FSINFO_FLAGS_QUERY_MOUNT;
+		params.request	= FSINFO_ATTR_MOUNT_INFO;
+		sprintf(name, "%u", c->mnt_id);
+		ERR(fsinfo(AT_FDCWD, name, &params, sizeof(params), &r, sizeof(r)),
+		    "fsinfo/child");
+		sum_mnt_id += r.mnt_id;
+	}
+}
+
+static void get_mntid_by_mountfs(void)
+{
+	unsigned int base_mnt_id, mnt_id, x;
+	ssize_t len, s_children;
+	char procfile[100], buffer[100], *children, *p, *q, *nl, *comma;
+	int fd, fd2, mntfd, i;
+
+	/* Start off by reading the mount ID from the base path */
+	fd = open(base_path, O_PATH);
+	ERR(fd, "open/path");
+	sprintf(procfile, "/proc/self/fdinfo/%u", fd);
+	fd2 = open(procfile, O_RDONLY);
+	ERR(fd2, "open/proc");
+	len = read(fd2, buffer, sizeof(buffer) - 1);
+	ERR(len, "read");
+	buffer[len] = 0;
+	close(fd2);
+	close(fd);
+
+	p = buffer;
+	do {
+		nl = strchr(p, '\n');
+		if (nl)
+			*nl++ = '\0';
+		else
+			nl = NULL;
+
+		if (strncmp(p, "mnt_id:", 7) != 0)
+			continue;
+		p += 7;
+		while (isblank(*p))
+			p++;
+		/* Have to allow for extra numbers being added to the line */
+		if (sscanf(p, "%u", &base_mnt_id) != 1) {
+			fprintf(stderr, "Bad format %s\n", procfile);
+			exit(3);
+		}
+		break;
+
+	} while ((p = nl));
+
+	if (!p) {
+		fprintf(stderr, "Missing field %s\n", procfile);
+		exit(3);
+	}
+
+	if (0) printf("[B] %u\n", base_mnt_id);
+
+	mntfd = open("/mnt", O_PATH);
+	ERR(fd, "open/mountfs");
+
+	/* Get a list of all the children of this mount ID */
+	s_children = (nr_mounts) * 12;
+	children = malloc(s_children);
+	OOM(children);
+
+	sprintf(procfile, "%u/children", base_mnt_id);
+	fd = openat(mntfd, procfile, O_RDONLY);
+	ERR(fd, "open/children");
+	s_children = read(fd, children, s_children - 1);
+	ERR(s_children, "read/children");
+	close(fd);
+	if (s_children > 0 && children[s_children - 1] == '\n')
+		s_children--;
+	children[s_children] = 0;
+
+	/* Query each child */
+	p = children;
+	if (!*p)
+		return;
+	i = 0;
+	do {
+		mnt_id = strtoul(p, &comma, 10);
+		if (*comma) {
+			if (*comma != ',') {
+				fprintf(stderr, "Bad format in mountfs-children\n");
+				exit(3);
+			}
+			comma++;
+		}
+
+		sprintf(procfile, "%u/id", mnt_id);
+		fd = openat(mntfd, procfile, O_RDONLY);
+		ERR(fd, procfile);
+		len = read(fd, buffer, sizeof(buffer) - 1);
+		ERR(len, "read/id");
+		close(fd);
+		if (len > 0 && buffer[len - 1] == '\n')
+			len--;
+		buffer[len] = 0;
+
+		x = strtoul(buffer, &q, 10);
+
+		if (*q) {
+			fprintf(stderr, "Bad format in %s '%s'\n", procfile, buffer);
+			exit(3);
+		}
+
+		if (0) printf("[%u] %u\n", i++, x);
+		sum_mnt_id += x;
+	} while (p = comma, *comma);
+}
+
+static unsigned long duration(struct timeval *before, struct timeval *after)
+{
+	unsigned long a, b;
+
+	a = after->tv_sec * 1000000 + after->tv_usec;
+	b = before->tv_sec * 1000000 + before->tv_usec;
+	return a - b;
+}
+
+int main(int argc, char **argv)
+{
+	struct timeval f_before, f_after;
+	struct timeval f2_before, f2_after;
+	struct timeval p_before, p_after;
+	struct timeval p2_before, p2_after;
+	const char *path;
+	unsigned long f_dur, f2_dur, p_dur, p2_dur;
+
+	if (argc < 2) {
+		fprintf(stderr, "Format: %s <path> [nr_mounts]\n", argv[0]);
+		exit(2);
+	}
+
+	if (argc == 3)
+		nr_mounts = atoi(argv[2]);
+
+	path = argv[1];
+	ERR(mount("none", path, "tmpfs", 0, NULL), "mount");
+	ERR(mount("none", path, NULL, MS_PRIVATE, NULL), "mount");
+	base_path = path;
+	atexit(do_umount);
+
+	printf("--- make mounts ---\n");
+	iterate(make_mount);
+
+	printf("--- test fsinfo by path ---\n");
+	sum_mnt_id = 0;
+	ERR(gettimeofday(&f_before, NULL), "gettimeofday");
+	iterate(get_mntid_by_fsinfo);
+	ERR(gettimeofday(&f_after, NULL), "gettimeofday");
+	printf("sum(mnt_id) = %lu\n", sum_mnt_id);
+
+	printf("--- test fsinfo by mnt_id ---\n");
+	sum_mnt_id = 0;
+	ERR(gettimeofday(&f2_before, NULL), "gettimeofday");
+	get_mntid_by_fsinfo_2();
+	ERR(gettimeofday(&f2_after, NULL), "gettimeofday");
+	printf("sum(mnt_id) = %lu\n", sum_mnt_id);
+
+	printf("--- test /proc/fdinfo ---\n");
+	sum_mnt_id = 0;
+	ERR(gettimeofday(&p_before, NULL), "gettimeofday");
+	iterate(get_mntid_by_proc);
+	ERR(gettimeofday(&p_after, NULL), "gettimeofday");
+	printf("sum(mnt_id) = %lu\n", sum_mnt_id);
+
+	printf("--- test mountfs ---\n");
+	sum_mnt_id = 0;
+	ERR(gettimeofday(&p2_before, NULL), "gettimeofday");
+	get_mntid_by_mountfs();
+	ERR(gettimeofday(&p2_after, NULL), "gettimeofday");
+	printf("sum(mnt_id) = %lu\n", sum_mnt_id);
+
+	f_dur  = duration(&f_before,  &f_after);
+	f2_dur = duration(&f2_before, &f2_after);
+	p_dur  = duration(&p_before,  &p_after);
+	p2_dur = duration(&p2_before, &p2_after);
+	//printf("fsinfo duration %10luus for %d mounts\n", f_dur, nr_mounts);
+	//printf("procfd duration %10luus for %d mounts\n", p_dur, nr_mounts);
+
+	printf("For %7d mounts, f=%10luus f2=%10luus p=%10luus p2=%10luus; p=%.1f*f p=%.1f*f2 p=%.1f*p2\n",
+	       nr_mounts, f_dur, f2_dur, p_dur, p2_dur,
+	       (double)p_dur / (double)f_dur,
+	       (double)p_dur / (double)f2_dur,
+	       (double)p_dur / (double)p2_dur);
+	return 0;
+}

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ