[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <de5566e71e038d95342d00364c6760c7078cb091.1656531090.git.khalid.aziz@oracle.com>
Date: Wed, 29 Jun 2022 16:53:52 -0600
From: Khalid Aziz <khalid.aziz@...cle.com>
To: akpm@...ux-foundation.org, willy@...radead.org
Cc: Khalid Aziz <khalid.aziz@...cle.com>, aneesh.kumar@...ux.ibm.com,
arnd@...db.de, 21cnbao@...il.com, corbet@....net,
dave.hansen@...ux.intel.com, david@...hat.com,
ebiederm@...ssion.com, hagen@...u.net, jack@...e.cz,
keescook@...omium.org, kirill@...temov.name, kucharsk@...il.com,
linkinjeon@...nel.org, linux-fsdevel@...r.kernel.org,
linux-kernel@...r.kernel.org, linux-mm@...ck.org,
longpeng2@...wei.com, luto@...nel.org, markhemm@...glemail.com,
pcc@...gle.com, rppt@...nel.org, sieberf@...zon.com,
sjpark@...zon.de, surenb@...gle.com, tst@...oebel-theuer.de,
yzaikin@...gle.com
Subject: [PATCH v2 1/9] mm: Add msharefs filesystem
Add a ram-based filesystem that contains page table sharing
information and files that enables processes to share page tables.
This patch adds the basic filesystem that can be mounted.
Signed-off-by: Khalid Aziz <khalid.aziz@...cle.com>
---
Documentation/filesystems/msharefs.rst | 19 +++++
include/uapi/linux/magic.h | 1 +
mm/Makefile | 2 +-
mm/mshare.c | 103 +++++++++++++++++++++++++
4 files changed, 124 insertions(+), 1 deletion(-)
create mode 100644 Documentation/filesystems/msharefs.rst
create mode 100644 mm/mshare.c
diff --git a/Documentation/filesystems/msharefs.rst b/Documentation/filesystems/msharefs.rst
new file mode 100644
index 000000000000..fd161f67045d
--- /dev/null
+++ b/Documentation/filesystems/msharefs.rst
@@ -0,0 +1,19 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================================
+msharefs - a filesystem to support shared page tables
+=====================================================
+
+msharefs is a ram-based filesystem that allows multiple processes to
+share page table entries for shared pages.
+
+msharefs is typically mounted like this::
+
+ mount -t msharefs none /sys/fs/mshare
+
+When a process calls mshare syscall with a name for the shared address
+range, a file with the same name is created under msharefs with that
+name. This file can be opened by another process, if permissions
+allow, to query the addresses shared under this range. These files are
+removed by mshare_unlink syscall and can not be deleted directly.
+Hence these files are created as immutable files.
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index f724129c0425..2a57a6ec6f3e 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -105,5 +105,6 @@
#define Z3FOLD_MAGIC 0x33
#define PPC_CMM_MAGIC 0xc7571590
#define SECRETMEM_MAGIC 0x5345434d /* "SECM" */
+#define MSHARE_MAGIC 0x4d534852 /* "MSHR" */
#endif /* __LINUX_MAGIC_H__ */
diff --git a/mm/Makefile b/mm/Makefile
index 6f9ffa968a1a..51a2ab9080d9 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -37,7 +37,7 @@ CFLAGS_init-mm.o += $(call cc-disable-warning, override-init)
CFLAGS_init-mm.o += $(call cc-disable-warning, initializer-overrides)
mmu-y := nommu.o
-mmu-$(CONFIG_MMU) := highmem.o memory.o mincore.o \
+mmu-$(CONFIG_MMU) := highmem.o memory.o mincore.o mshare.o \
mlock.o mmap.o mmu_gather.o mprotect.o mremap.o \
msync.o page_vma_mapped.o pagewalk.o \
pgtable-generic.o rmap.o vmalloc.o
diff --git a/mm/mshare.c b/mm/mshare.c
new file mode 100644
index 000000000000..c8fab3869bab
--- /dev/null
+++ b/mm/mshare.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Enable copperating processes to share page table between
+ * them to reduce the extra memory consumed by multiple copies
+ * of page tables.
+ *
+ * This code adds an in-memory filesystem - msharefs.
+ * msharefs is used to manage page table sharing
+ *
+ *
+ * Copyright (C) 2022 Oracle Corp. All rights reserved.
+ * Author: Khalid Aziz <khalid.aziz@...cle.com>
+ *
+ */
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <linux/pseudo_fs.h>
+#include <linux/fileattr.h>
+#include <uapi/linux/magic.h>
+#include <uapi/linux/limits.h>
+
+static struct super_block *msharefs_sb;
+
+static const struct file_operations msharefs_file_operations = {
+ .open = simple_open,
+ .llseek = no_llseek,
+};
+
+static int
+msharefs_d_hash(const struct dentry *dentry, struct qstr *qstr)
+{
+ unsigned long hash = init_name_hash(dentry);
+ const unsigned char *s = qstr->name;
+ unsigned int len = qstr->len;
+
+ while (len--)
+ hash = partial_name_hash(*s++, hash);
+ qstr->hash = end_name_hash(hash);
+ return 0;
+}
+
+static const struct dentry_operations msharefs_d_ops = {
+ .d_hash = msharefs_d_hash,
+};
+
+static int
+msharefs_fill_super(struct super_block *sb, struct fs_context *fc)
+{
+ static const struct tree_descr empty_descr = {""};
+ int err;
+
+ sb->s_d_op = &msharefs_d_ops;
+ err = simple_fill_super(sb, MSHARE_MAGIC, &empty_descr);
+ if (err)
+ return err;
+
+ msharefs_sb = sb;
+ return 0;
+}
+
+static int
+msharefs_get_tree(struct fs_context *fc)
+{
+ return get_tree_single(fc, msharefs_fill_super);
+}
+
+static const struct fs_context_operations msharefs_context_ops = {
+ .get_tree = msharefs_get_tree,
+};
+
+static int
+mshare_init_fs_context(struct fs_context *fc)
+{
+ fc->ops = &msharefs_context_ops;
+ return 0;
+}
+
+static struct file_system_type mshare_fs = {
+ .name = "msharefs",
+ .init_fs_context = mshare_init_fs_context,
+ .kill_sb = kill_litter_super,
+};
+
+static int
+mshare_init(void)
+{
+ int ret = 0;
+
+ ret = sysfs_create_mount_point(fs_kobj, "mshare");
+ if (ret)
+ return ret;
+
+ ret = register_filesystem(&mshare_fs);
+ if (ret)
+ sysfs_remove_mount_point(fs_kobj, "mshare");
+
+ return ret;
+}
+
+fs_initcall(mshare_init);
--
2.32.0
Powered by blists - more mailing lists