[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240805093245.889357-2-jgowans@amazon.com>
Date: Mon, 5 Aug 2024 11:32:36 +0200
From: James Gowans <jgowans@...zon.com>
To: <linux-kernel@...r.kernel.org>
CC: James Gowans <jgowans@...zon.com>, Sean Christopherson
<seanjc@...gle.com>, Paolo Bonzini <pbonzini@...hat.com>, Alexander Viro
<viro@...iv.linux.org.uk>, Steve Sistare <steven.sistare@...cle.com>,
Christian Brauner <brauner@...nel.org>, Jan Kara <jack@...e.cz>, "Anthony
Yznaga" <anthony.yznaga@...cle.com>, Mike Rapoport <rppt@...nel.org>, "Andrew
Morton" <akpm@...ux-foundation.org>, <linux-mm@...ck.org>, Jason Gunthorpe
<jgg@...pe.ca>, <linux-fsdevel@...r.kernel.org>, Usama Arif
<usama.arif@...edance.com>, <kvm@...r.kernel.org>, Alexander Graf
<graf@...zon.com>, David Woodhouse <dwmw@...zon.co.uk>, Paul Durrant
<pdurrant@...zon.co.uk>, Nicolas Saenz Julienne <nsaenz@...zon.es>
Subject: [PATCH 01/10] guestmemfs: Introduce filesystem skeleton
Add an in-memory filesystem: guestmemfs. Memory is donated to guestmemfs
by carving it out of the normal System RAM range with the memmap= cmdline
parameter and then giving that same physical range to guestmemfs with the
guestmemfs= cmdline parameter.
A new filesystem is added; so far it doesn't do much except persist a
super block at the start of the donated memory and allows itself to be
mounted.
A hook to x86 mm init is added to reserve the memory really early on via
memblock allocator. There is probably a better arch-independent place to
do this...
Signed-off-by: James Gowans <jgowans@...zon.com>
---
arch/x86/mm/init_64.c | 2 +
fs/Kconfig | 1 +
fs/Makefile | 1 +
fs/guestmemfs/Kconfig | 11 ++++
fs/guestmemfs/Makefile | 6 ++
fs/guestmemfs/guestmemfs.c | 116 +++++++++++++++++++++++++++++++++++++
fs/guestmemfs/guestmemfs.h | 9 +++
include/linux/guestmemfs.h | 16 +++++
8 files changed, 162 insertions(+)
create mode 100644 fs/guestmemfs/Kconfig
create mode 100644 fs/guestmemfs/Makefile
create mode 100644 fs/guestmemfs/guestmemfs.c
create mode 100644 fs/guestmemfs/guestmemfs.h
create mode 100644 include/linux/guestmemfs.h
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 8932ba8f5cdd..39fcf017c90c 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -18,6 +18,7 @@
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/smp.h>
+#include <linux/guestmemfs.h>
#include <linux/init.h>
#include <linux/initrd.h>
#include <linux/kexec.h>
@@ -1331,6 +1332,7 @@ static void __init preallocate_vmalloc_pages(void)
void __init mem_init(void)
{
+ guestmemfs_reserve_mem();
pci_iommu_alloc();
/* clear_bss() already clear the empty_zero_page */
diff --git a/fs/Kconfig b/fs/Kconfig
index a46b0cbc4d8f..727359901da8 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -321,6 +321,7 @@ source "fs/befs/Kconfig"
source "fs/bfs/Kconfig"
source "fs/efs/Kconfig"
source "fs/jffs2/Kconfig"
+source "fs/guestmemfs/Kconfig"
# UBIFS File system configuration
source "fs/ubifs/Kconfig"
source "fs/cramfs/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index 6ecc9b0a53f2..044524b17d63 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -129,3 +129,4 @@ obj-$(CONFIG_EFIVAR_FS) += efivarfs/
obj-$(CONFIG_EROFS_FS) += erofs/
obj-$(CONFIG_VBOXSF_FS) += vboxsf/
obj-$(CONFIG_ZONEFS_FS) += zonefs/
+obj-$(CONFIG_GUESTMEMFS_FS) += guestmemfs/
diff --git a/fs/guestmemfs/Kconfig b/fs/guestmemfs/Kconfig
new file mode 100644
index 000000000000..d87fca4822cb
--- /dev/null
+++ b/fs/guestmemfs/Kconfig
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config GUESTMEMFS_FS
+ bool "Persistent Guest memory filesystem (guestmemfs)"
+ help
+ An in-memory filesystem on top of reserved memory specified via
+ guestmemfs= cmdline argument. Used for storing kernel state and
+ userspace memory which is preserved across kexec to support
+ live update of a hypervisor when running guest virtual machines.
+ Select this if you need the ability to persist memory for guest VMs
+ across kexec to do live update.
diff --git a/fs/guestmemfs/Makefile b/fs/guestmemfs/Makefile
new file mode 100644
index 000000000000..6dc820a9d4fe
--- /dev/null
+++ b/fs/guestmemfs/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for persistent kernel filesystem
+#
+
+obj-y += guestmemfs.o
diff --git a/fs/guestmemfs/guestmemfs.c b/fs/guestmemfs/guestmemfs.c
new file mode 100644
index 000000000000..3aaada1b8df6
--- /dev/null
+++ b/fs/guestmemfs/guestmemfs.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "guestmemfs.h"
+#include <linux/dcache.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/fs_context.h>
+#include <linux/io.h>
+#include <linux/memblock.h>
+#include <linux/statfs.h>
+
+static phys_addr_t guestmemfs_base, guestmemfs_size;
+struct guestmemfs_sb *psb;
+
+static int statfs(struct dentry *root, struct kstatfs *buf)
+{
+ simple_statfs(root, buf);
+ buf->f_bsize = PMD_SIZE;
+ buf->f_blocks = guestmemfs_size / PMD_SIZE;
+ buf->f_bfree = buf->f_bavail = buf->f_blocks;
+ return 0;
+}
+
+static const struct super_operations guestmemfs_super_ops = {
+ .statfs = statfs,
+};
+
+static int guestmemfs_fill_super(struct super_block *sb, struct fs_context *fc)
+{
+ struct inode *inode;
+ struct dentry *dentry;
+
+ psb = kzalloc(sizeof(*psb), GFP_KERNEL);
+ /*
+ * Keep a reference to the persistent super block in the
+ * ephemeral super block.
+ */
+ sb->s_fs_info = psb;
+ sb->s_op = &guestmemfs_super_ops;
+
+ inode = new_inode(sb);
+ if (!inode)
+ return -ENOMEM;
+
+ inode->i_ino = 1;
+ inode->i_mode = S_IFDIR;
+ inode->i_op = &simple_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
+ simple_inode_init_ts(inode);
+ /* directory inodes start off with i_nlink == 2 (for "." entry) */
+ inc_nlink(inode);
+
+ dentry = d_make_root(inode);
+ if (!dentry)
+ return -ENOMEM;
+ sb->s_root = dentry;
+
+ return 0;
+}
+
+static int guestmemfs_get_tree(struct fs_context *fc)
+{
+ return get_tree_nodev(fc, guestmemfs_fill_super);
+}
+
+static const struct fs_context_operations guestmemfs_context_ops = {
+ .get_tree = guestmemfs_get_tree,
+};
+
+static int guestmemfs_init_fs_context(struct fs_context *const fc)
+{
+ fc->ops = &guestmemfs_context_ops;
+ return 0;
+}
+
+static struct file_system_type guestmemfs_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "guestmemfs",
+ .init_fs_context = guestmemfs_init_fs_context,
+ .kill_sb = kill_litter_super,
+ .fs_flags = FS_USERNS_MOUNT,
+};
+
+static int __init guestmemfs_init(void)
+{
+ int ret;
+
+ ret = register_filesystem(&guestmemfs_fs_type);
+ return ret;
+}
+
+/**
+ * Format: guestmemfs=<size>:<base>
+ * Just like: memmap=nn[KMG]!ss[KMG]
+ */
+static int __init parse_guestmemfs_extents(char *p)
+{
+ guestmemfs_size = memparse(p, &p);
+ return 0;
+}
+
+early_param("guestmemfs", parse_guestmemfs_extents);
+
+void __init guestmemfs_reserve_mem(void)
+{
+ guestmemfs_base = memblock_phys_alloc(guestmemfs_size, 4 << 10);
+ if (guestmemfs_base) {
+ memblock_reserved_mark_noinit(guestmemfs_base, guestmemfs_size);
+ memblock_mark_nomap(guestmemfs_base, guestmemfs_size);
+ } else {
+ pr_warn("Failed to alloc %llu bytes for guestmemfs\n", guestmemfs_size);
+ }
+}
+
+MODULE_ALIAS_FS("guestmemfs");
+module_init(guestmemfs_init);
diff --git a/fs/guestmemfs/guestmemfs.h b/fs/guestmemfs/guestmemfs.h
new file mode 100644
index 000000000000..37d8cf630e0a
--- /dev/null
+++ b/fs/guestmemfs/guestmemfs.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#define pr_fmt(fmt) "guestmemfs: " KBUILD_MODNAME ": " fmt
+
+#include <linux/guestmemfs.h>
+
+struct guestmemfs_sb {
+ /* Will be populated soon... */
+};
diff --git a/include/linux/guestmemfs.h b/include/linux/guestmemfs.h
new file mode 100644
index 000000000000..60e769c8e533
--- /dev/null
+++ b/include/linux/guestmemfs.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef _LINUX_GUESTMEMFS_H
+#define _LINUX_GUESTMEMFS_H
+
+/*
+ * Carves out chunks of memory from memblocks for guestmemfs.
+ * Must be called in early boot before memblocks are freed.
+ */
+# ifdef CONFIG_GUESTMEMFS_FS
+void guestmemfs_reserve_mem(void);
+#else
+void guestmemfs_reserve_mem(void) { }
+#endif
+
+#endif
--
2.34.1
Powered by blists - more mailing lists