lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <32784ee26d895bae2484e15fef205d5590720c4b.1642526745.git.khalid.aziz@oracle.com>
Date:   Tue, 18 Jan 2022 14:19:14 -0700
From:   Khalid Aziz <khalid.aziz@...cle.com>
To:     akpm@...ux-foundation.org, willy@...radead.org,
        longpeng2@...wei.com, arnd@...db.de, dave.hansen@...ux.intel.com,
        david@...hat.com, rppt@...nel.org, surenb@...gle.com,
        linux-kernel@...r.kernel.org, linux-mm@...ck.org
Cc:     Khalid Aziz <khalid.aziz@...cle.com>
Subject: [RFC PATCH 2/6] mm: Add msharefs filesystem

Add a ram-based filesystem that contains the files created for each
shared address range. This patch adds just the filesystem and creation
of files. Page table entries for these shared ranges created by mshare
syscall are still not shared.

Signed-off-by: Khalid Aziz <khalid.aziz@...cle.com>
---
 Documentation/filesystems/msharefs.rst |  19 +++
 include/uapi/linux/magic.h             |   1 +
 mm/mshare.c                            | 191 +++++++++++++++++++++++--
 3 files changed, 197 insertions(+), 14 deletions(-)
 create mode 100644 Documentation/filesystems/msharefs.rst

diff --git a/Documentation/filesystems/msharefs.rst b/Documentation/filesystems/msharefs.rst
new file mode 100644
index 000000000000..fd161f67045d
--- /dev/null
+++ b/Documentation/filesystems/msharefs.rst
@@ -0,0 +1,19 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================================
+msharefs - a filesystem to support shared page tables
+=====================================================
+
+msharefs is a ram-based filesystem that allows multiple processes to
+share page table entries for shared pages.
+
+msharefs is typically mounted like this::
+
+	mount -t msharefs none /sys/fs/mshare
+
+When a process calls mshare syscall with a name for the shared address
+range, a file with the same name is created under msharefs with that
+name. This file can be opened by another process, if permissions
+allow, to query the addresses shared under this range. These files are
+removed by mshare_unlink syscall and can not be deleted directly.
+Hence these files are created as immutable files.
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 35687dcb1a42..26a12e33a3c1 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -98,5 +98,6 @@
 #define Z3FOLD_MAGIC		0x33
 #define PPC_CMM_MAGIC		0xc7571590
 #define SECRETMEM_MAGIC		0x5345434d	/* "SECM" */
+#define MSHARE_MAGIC		0x4d534852	/* "MSHR" */
 
 #endif /* __LINUX_MAGIC_H__ */
diff --git a/mm/mshare.c b/mm/mshare.c
index c723f8369f06..e48d0f615f9f 100644
--- a/mm/mshare.c
+++ b/mm/mshare.c
@@ -10,20 +10,117 @@
  *		Khalid Aziz
  */
 
-#include <linux/anon_inodes.h>
 #include <linux/fs.h>
+#include <linux/mount.h>
 #include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <linux/pseudo_fs.h>
+#include <linux/fileattr.h>
+#include <uapi/linux/magic.h>
+#include <uapi/linux/limits.h>
 
-static const struct file_operations mshare_fops = {
+static struct super_block *msharefs_sb;
+
+static const struct file_operations msharefs_file_operations = {
+	.open	= simple_open,
+	.llseek	= no_llseek,
 };
 
+static int
+msharefs_d_hash(const struct dentry *dentry, struct qstr *qstr)
+{
+	unsigned long hash = init_name_hash(dentry);
+	const unsigned char *s = qstr->name;
+	unsigned int len = qstr->len;
+
+	while (len--)
+		hash = partial_name_hash(*s++, hash);
+	qstr->hash = end_name_hash(hash);
+	return 0;
+}
+
+static struct dentry
+*msharefs_alloc_dentry(struct dentry *parent, const char *name)
+{
+	struct dentry *d;
+	struct qstr q;
+	int err;
+
+	q.name = name;
+	q.len = strlen(name);
+
+	err = msharefs_d_hash(parent, &q);
+	if (err)
+		return ERR_PTR(err);
+
+	d = d_alloc(parent, &q);
+	if (d)
+		return d;
+
+	return ERR_PTR(-ENOMEM);
+}
+
+static struct inode
+*msharefs_get_inode(struct super_block *sb, int mode)
+{
+	struct inode *inode = new_inode(sb);
+
+	if (inode) {
+		inode->i_ino = get_next_ino();
+		inode->i_mode = mode;
+
+		/*
+		 * msharefs are not meant to be manipulated from userspace.
+		 * Reading from the file is the only allowed operation
+		 */
+		inode->i_flags = S_IMMUTABLE;
+
+		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_fop = &msharefs_file_operations;
+		inode->i_size = 0;
+		inode->i_uid = current_fsuid();
+		inode->i_gid = current_fsgid();
+	}
+
+	return inode;
+}
+
+static int
+mshare_file_create(const char *name, unsigned long flags)
+{
+	struct inode *inode;
+	struct dentry *root, *dentry;
+	int err = 0;
+
+	root = msharefs_sb->s_root;
+
+	inode = msharefs_get_inode(msharefs_sb, S_IFREG | 0400);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	dentry = msharefs_alloc_dentry(root, name);
+	if (IS_ERR(dentry)) {
+		err = PTR_ERR(dentry);
+		goto fail_inode;
+	}
+
+	d_add(dentry, inode);
+
+	return err;
+
+fail_inode:
+	iput(inode);
+	return err;
+}
+
 /*
- * mshare syscall. Returns a file descriptor
+ * mshare syscall
  */
-SYSCALL_DEFINE5(mshare, const char *, name, unsigned long, addr,
+SYSCALL_DEFINE5(mshare, const char __user *, name, unsigned long, addr,
 		unsigned long, len, int, oflag, mode_t, mode)
 {
-	int fd;
+	char mshare_name[NAME_MAX];
+	int err;
 
 	/*
 	 * Address range being shared must be aligned to pgdir
@@ -32,15 +129,14 @@ SYSCALL_DEFINE5(mshare, const char *, name, unsigned long, addr,
 	if ((addr | len) & (PGDIR_SIZE - 1))
 		return -EINVAL;
 
-	/*
-	 * Allocate a file descriptor to return
-	 *
-	 * TODO: This code ignores the object name completely. Add
-	 * support for that
-	 */
-	fd = anon_inode_getfd("mshare", &mshare_fops, NULL, O_RDWR);
+	err = copy_from_user(mshare_name, name, NAME_MAX);
+	if (err)
+		goto err_out;
 
-	return fd;
+	err = mshare_file_create(mshare_name, oflag);
+
+err_out:
+	return err;
 }
 
 /*
@@ -48,7 +144,8 @@ SYSCALL_DEFINE5(mshare, const char *, name, unsigned long, addr,
  */
 SYSCALL_DEFINE1(mshare_unlink, const char *, name)
 {
-	int fd;
+	char mshare_name[NAME_MAX];
+	int err;
 
 	/*
 	 * Delete the named object
@@ -56,5 +153,71 @@ SYSCALL_DEFINE1(mshare_unlink, const char *, name)
 	 * TODO: Mark mshare'd range for deletion
 	 *
 	 */
+	err = copy_from_user(mshare_name, name, NAME_MAX);
+	if (err)
+		goto err_out;
+	return 0;
+
+err_out:
+	return err;
+}
+
+static const struct dentry_operations msharefs_d_ops = {
+	.d_hash = msharefs_d_hash,
+};
+
+static int
+msharefs_fill_super(struct super_block *sb, struct fs_context *fc)
+{
+	static const struct tree_descr empty_descr = {""};
+	int err;
+
+	sb->s_d_op = &msharefs_d_ops;
+	err = simple_fill_super(sb, MSHARE_MAGIC, &empty_descr);
+	if (err)
+		return err;
+
+	msharefs_sb = sb;
+	return 0;
+}
+
+static int
+msharefs_get_tree(struct fs_context *fc)
+{
+	return get_tree_single(fc, msharefs_fill_super);
+}
+
+static const struct fs_context_operations msharefs_context_ops = {
+	.get_tree	= msharefs_get_tree,
+};
+
+static int
+mshare_init_fs_context(struct fs_context *fc)
+{
+	fc->ops = &msharefs_context_ops;
 	return 0;
 }
+
+static struct file_system_type mshare_fs = {
+	.name			= "msharefs",
+	.init_fs_context	= mshare_init_fs_context,
+	.kill_sb		= kill_litter_super,
+};
+
+static int
+mshare_init(void)
+{
+	int ret = 0;
+
+	ret = sysfs_create_mount_point(fs_kobj, "mshare");
+	if (ret)
+		return ret;
+
+	ret = register_filesystem(&mshare_fs);
+	if (ret)
+		sysfs_remove_mount_point(fs_kobj, "mshare");
+
+	return ret;
+}
+
+fs_initcall(mshare_init);
-- 
2.32.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ