lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250307005830.65293-6-ptyadav@amazon.de>
Date: Fri, 7 Mar 2025 00:57:39 +0000
From: Pratyush Yadav <ptyadav@...zon.de>
To: <linux-kernel@...r.kernel.org>
CC: Pratyush Yadav <ptyadav@...zon.de>, Jonathan Corbet <corbet@....net>,
	"Eric Biederman" <ebiederm@...ssion.com>, Arnd Bergmann <arnd@...db.de>,
	"Greg Kroah-Hartman" <gregkh@...uxfoundation.org>, Alexander Viro
	<viro@...iv.linux.org.uk>, Christian Brauner <brauner@...nel.org>, Jan Kara
	<jack@...e.cz>, Hugh Dickins <hughd@...gle.com>, Alexander Graf
	<graf@...zon.com>, Benjamin Herrenschmidt <benh@...nel.crashing.org>, "David
 Woodhouse" <dwmw2@...radead.org>, James Gowans <jgowans@...zon.com>, "Mike
 Rapoport" <rppt@...nel.org>, Paolo Bonzini <pbonzini@...hat.com>, "Pasha
 Tatashin" <tatashin@...gle.com>, Anthony Yznaga <anthony.yznaga@...cle.com>,
	Dave Hansen <dave.hansen@...el.com>, David Hildenbrand <david@...hat.com>,
	Jason Gunthorpe <jgg@...dia.com>, Matthew Wilcox <willy@...radead.org>, "Wei
 Yang" <richard.weiyang@...il.com>, Andrew Morton <akpm@...ux-foundation.org>,
	<linux-fsdevel@...r.kernel.org>, <linux-doc@...r.kernel.org>,
	<linux-mm@...ck.org>, <kexec@...ts.infradead.org>
Subject: [RFC PATCH 5/5] mm/memfd: allow preserving FD over FDBOX + KHO

For applications with a large amount of memory that takes time to
rebuild, reboots to consume kernel upgrades can be very expensive. FDBox
allows preserving file descriptors over kexec using KHO. Combining that
with memfd gives those applications reboot-persistent memory that they
can use to quickly save and reconstruct that state.

While memfd is backed by either hugetlbfs or shmem, currently only
support on shmem is added for this. Allow saving and restoring shmem FDs
over FDBOX + KHO.

The memfd FDT node itself does not contain much information. It just
creates a subnode and passes it over to shmem to do its thing. Similar
behaviour is followed on the restore side.

Since there are now two paths of getting a shmem file, refactor the file
setup into its own function called memfd_setup_file(). It sets up the
file flags, mode, etc., and sets fdbox ops if enabled.

Signed-off-by: Pratyush Yadav <ptyadav@...zon.de>
---
 mm/memfd.c | 128 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 116 insertions(+), 12 deletions(-)

diff --git a/mm/memfd.c b/mm/memfd.c
index 37f7be57c2f50..1c32e66197f6d 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -7,6 +7,8 @@
  * This file is released under the GPL.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/fs.h>
 #include <linux/vfs.h>
 #include <linux/pagemap.h>
@@ -19,8 +21,12 @@
 #include <linux/shmem_fs.h>
 #include <linux/memfd.h>
 #include <linux/pid_namespace.h>
+#include <linux/fdbox.h>
+#include <linux/libfdt.h>
 #include <uapi/linux/memfd.h>
 
+static const struct fdbox_file_ops memfd_fdbox_fops;
+
 /*
  * We need a tag: a new tag would expand every xa_node by 8 bytes,
  * so reuse a tag which we firmly believe is never set or cleared on tmpfs
@@ -418,21 +424,10 @@ static char *alloc_name(const char __user *uname)
 	return ERR_PTR(error);
 }
 
-static struct file *alloc_file(const char *name, unsigned int flags)
+static void memfd_setup_file(struct file *file, unsigned int flags)
 {
 	unsigned int *file_seals;
-	struct file *file;
 
-	if (flags & MFD_HUGETLB) {
-		file = hugetlb_file_setup(name, 0, VM_NORESERVE,
-					HUGETLB_ANONHUGE_INODE,
-					(flags >> MFD_HUGE_SHIFT) &
-					MFD_HUGE_MASK);
-	} else {
-		file = shmem_file_setup(name, 0, VM_NORESERVE);
-	}
-	if (IS_ERR(file))
-		return file;
 	file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
 	file->f_flags |= O_LARGEFILE;
 
@@ -452,6 +447,27 @@ static struct file *alloc_file(const char *name, unsigned int flags)
 			*file_seals &= ~F_SEAL_SEAL;
 	}
 
+#if defined(CONFIG_FDBOX) && defined(CONFIG_KEXEC_HANDOVER)
+	file->f_fdbox_op = &memfd_fdbox_fops;
+#endif
+}
+
+static struct file *alloc_file(const char *name, unsigned int flags)
+{
+	struct file *file;
+
+	if (flags & MFD_HUGETLB) {
+		file = hugetlb_file_setup(name, 0, VM_NORESERVE,
+					  HUGETLB_ANONHUGE_INODE,
+					  (flags >> MFD_HUGE_SHIFT) &
+					  MFD_HUGE_MASK);
+	} else {
+		file = shmem_file_setup(name, 0, VM_NORESERVE);
+	}
+	if (IS_ERR(file))
+		return file;
+
+	memfd_setup_file(file, flags);
 	return file;
 }
 
@@ -493,3 +509,91 @@ SYSCALL_DEFINE2(memfd_create,
 	kfree(name);
 	return error;
 }
+
+#if defined(CONFIG_FDBOX) && defined(CONFIG_KEXEC_HANDOVER)
+static const char memfd_fdbox_compatible[] = "fdbox,memfd-v1";
+
+static struct file *memfd_fdbox_kho_recover(const void *fdt, int offset)
+{
+	struct file *file;
+	int ret, subnode;
+
+	ret = fdt_node_check_compatible(fdt, offset, memfd_fdbox_compatible);
+	if (ret) {
+		pr_err("kho: invalid compatible\n");
+		return NULL;
+	}
+
+	/* Make sure there is exactly one subnode. */
+	subnode = fdt_first_subnode(fdt, offset);
+	if (subnode < 0) {
+		pr_err("kho: no subnode for underlying storage found!\n");
+		return NULL;
+	}
+	if (fdt_next_subnode(fdt, subnode) >= 0) {
+		pr_err("kho: too many subnodes. Expected only 1.\n");
+		return NULL;
+	}
+
+	if (is_node_shmem(fdt, subnode)) {
+		file = shmem_fdbox_kho_recover(fdt, subnode);
+		if (!file)
+			return NULL;
+
+		memfd_setup_file(file, 0);
+		return file;
+	}
+
+	return NULL;
+}
+
+static int memfd_fdbox_kho_write(struct fdbox_fd *box_fd, void *fdt)
+{
+	int ret = 0;
+
+	ret |= fdt_property(fdt, "compatible", memfd_fdbox_compatible,
+			    sizeof(memfd_fdbox_compatible));
+
+	/* TODO: Track seals on the file as well. */
+
+	ret |= fdt_begin_node(fdt, "");
+	if (ret) {
+		pr_err("kho: failed to set up memfd node\n");
+		return -EINVAL;
+	}
+
+	if (shmem_file(box_fd->file))
+		ret = shmem_fdbox_kho_write(box_fd, fdt);
+	else
+		/* TODO: HugeTLB support. */
+		ret = -EOPNOTSUPP;
+
+	if (ret)
+		return ret;
+
+	ret = fdt_end_node(fdt);
+	if (ret) {
+		pr_err("kho: failed to end memfd node!\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct fdbox_file_ops memfd_fdbox_fops = {
+	.kho_write = memfd_fdbox_kho_write,
+};
+
+static int __init memfd_fdbox_init(void)
+{
+	int error;
+
+	error = fdbox_register_handler(memfd_fdbox_compatible,
+				       memfd_fdbox_kho_recover);
+	if (error)
+		pr_err("Could not register fdbox handler: %d\n", error);
+
+	return 0;
+}
+late_initcall(memfd_fdbox_init);
+#endif /* CONFIG_FDBOX && CONFIG_KEXEC_HANDOVER */
-- 
2.47.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ