lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 03 Dec 2008 14:04:29 -0500
From:	Lee Schermerhorn <Lee.Schermerhorn@...com>
To:	linux-mm@...ck.org, linux-kernel <linux-kernel@...r.kernel.org>
Cc:	Andrew Morton <akpm@...ux-foundation.org>, riel@...hat.com,
	hugh@...itas.com, kosaki.motohiro@...fujitsu.com
Subject: [PATCH] - support inheritance of mlocks across fork/exec V2



Against;  2.6.28-rc7-mmotm-081203

V02:	rework vetting of flags argument as suggested by
	Kosaki Motohiro.
	enhance description as requested by Andrew Morton.

Add support for mlockall(MCL_INHERIT|MCL_RECURSIVE):

	MCL_CURRENT[|MCL_FUTURE]|MCL_INHERIT - inherit memory locks
		[vmas' VM_LOCKED flags] across fork(), and inherit
		MCL_FUTURE behavior [mm's def_flags] across fork()
		and exec().  Behaves as if child and/or new task
		called mlockall(MCL_CURRENT|MCL_FUTURE) as first
		instruction.

	MCL_RECURSIVE - inherit MCL_CURRENT|MCL_FUTURE|MCL_INHERIT
		[vmas' VM_LOCKED flags for fork() and mm's def_flags
		and mcl_inherit across fork() and exec()] for all
		future generations of calling process's descendants.
		Behaves as if child and/or new task called
		mlockall(MCL_CURRENT|MCL_FUTURE|MCL_INHERIT|MCL_RECURSIVE)
		as the first instruction.

In support of a "lock prefix command"--e.g., mlock <cmd> <args> ...
Analogous to taskset(1) for cpu affinity or numactl(8) for numa memory
policy.

Together with patches to keep mlocked pages off the LRU, this will
allow users/admins to lock down applications without modifying them,
if their RLIMIT_MEMLOCK is sufficiently large, keeping their pages
off the LRU and out of consideration for reclaim.

Potentially useful, as well, in real-time environments to force
prefaulting and residency for applications that don't mlock themselves.

Jeff Sharkey at Montana State developed a similar patch for Linux
[link no longer accessible], but apparently he never submitted the patch.

I submitted an earlier version of this patch around a year ago.  I
resurrected it to test the unevictable lru/mlocked pages patches--
e.g., by "mlock -r make -j<N*nr_cpus> all".  This did shake out a few
races and vmstat accounting bugs, but NOT something I'd recommend as
general practice--for kernel builds, that is.

----

Define MCL_INHERIT, MCL_RECURSIVE in <asm-*/mman.h>.
+ x86 and  ia64 versions included.
+ other arch can/will be created, if this patch deemed merge-worthy.

Similarly, I'll provide kernel man page update if/when needed.

Example "lock prefix command" in Documentation/vm/mlock.c

Signed-off-by:  Lee Schermerhorn <lee.schermerhorn@...com>

 Documentation/vm/mlock.c     |  149 +++++++++++++++++++++++++++++++++++++++++++
 arch/ia64/include/asm/mman.h |    2 
 arch/x86/include/asm/mman.h  |    3 
 fs/binfmt_elf.c              |    9 ++
 include/linux/mm_types.h     |    2 
 kernel/fork.c                |   15 +++-
 mm/mlock.c                   |   19 ++++-
 7 files changed, 191 insertions(+), 8 deletions(-)

Index: linux-2.6.28-rc7-mmotm-081203/arch/ia64/include/asm/mman.h
===================================================================
--- linux-2.6.28-rc7-mmotm-081203.orig/arch/ia64/include/asm/mman.h	2008-12-03 09:33:42.000000000 -0500
+++ linux-2.6.28-rc7-mmotm-081203/arch/ia64/include/asm/mman.h	2008-12-03 10:33:29.000000000 -0500
@@ -21,6 +21,8 @@
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
+#define MCL_INHERIT	4		/* inherit '_FUTURE across fork/exec */
+#define MCL_RECURSIVE	8		/* inherit '_FUTURE recursively */
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
Index: linux-2.6.28-rc7-mmotm-081203/mm/mlock.c
===================================================================
--- linux-2.6.28-rc7-mmotm-081203.orig/mm/mlock.c	2008-12-03 10:33:11.000000000 -0500
+++ linux-2.6.28-rc7-mmotm-081203/mm/mlock.c	2008-12-03 10:33:29.000000000 -0500
@@ -573,15 +573,18 @@ asmlinkage long sys_munlock(unsigned lon
 static int do_mlockall(int flags)
 {
 	struct vm_area_struct * vma, * prev = NULL;
+	struct mm_struct *mm = current->mm;
 	unsigned int def_flags = 0;
 
 	if (flags & MCL_FUTURE)
-		def_flags = VM_LOCKED;
-	current->mm->def_flags = def_flags;
-	if (flags == MCL_FUTURE)
+		def_flags = VM_LOCKED;;
+	mm->def_flags = def_flags;
+	if (flags & MCL_INHERIT)
+		mm->mcl_inherit = flags & (MCL_INHERIT | MCL_RECURSIVE);
+	if ((flags & ~(MCL_INHERIT | MCL_RECURSIVE)) == MCL_FUTURE)
 		goto out;
 
-	for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
+	for (vma = mm->mmap; vma ; vma = prev->vm_next) {
 		unsigned int newflags;
 
 		newflags = vma->vm_flags | VM_LOCKED;
@@ -600,9 +603,15 @@ asmlinkage long sys_mlockall(int flags)
 	unsigned long lock_limit;
 	int ret = -EINVAL;
 
-	if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE)))
+	if (!(flags & (MCL_CURRENT | MCL_FUTURE)))
 		goto out;
 
+	if (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_INHERIT | MCL_RECURSIVE))
+		goto out;	/* undefined flag bits */
+
+	if ((flags & (MCL_INHERIT | MCL_RECURSIVE)) == MCL_RECURSIVE)
+		goto out;	/* 'RECURSIVE undefined without 'INHERIT */
+
 	ret = -EPERM;
 	if (!can_do_mlock())
 		goto out;
Index: linux-2.6.28-rc7-mmotm-081203/kernel/fork.c
===================================================================
--- linux-2.6.28-rc7-mmotm-081203.orig/kernel/fork.c	2008-12-03 10:18:15.000000000 -0500
+++ linux-2.6.28-rc7-mmotm-081203/kernel/fork.c	2008-12-03 10:33:29.000000000 -0500
@@ -278,7 +278,8 @@ static int dup_mmap(struct mm_struct *mm
 	 */
 	down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
 
-	mm->locked_vm = 0;
+	if (!mm->mcl_inherit)
+		mm->locked_vm = 0;
 	mm->mmap = NULL;
 	mm->mmap_cache = NULL;
 	mm->free_area_cache = oldmm->mmap_base;
@@ -316,7 +317,8 @@ static int dup_mmap(struct mm_struct *mm
 		if (IS_ERR(pol))
 			goto fail_nomem_policy;
 		vma_set_policy(tmp, pol);
-		tmp->vm_flags &= ~VM_LOCKED;
+		if (!mm->mcl_inherit)
+			tmp->vm_flags &= ~VM_LOCKED;
 		tmp->vm_mm = mm;
 		tmp->vm_next = NULL;
 		anon_vma_link(tmp);
@@ -406,6 +408,8 @@ __cacheline_aligned_in_smp DEFINE_SPINLO
 
 static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 {
+	unsigned long def_flags = 0;
+
 	atomic_set(&mm->mm_users, 1);
 	atomic_set(&mm->mm_count, 1);
 	init_rwsem(&mm->mmap_sem);
@@ -422,9 +426,14 @@ static struct mm_struct * mm_init(struct
 	mm->free_area_cache = TASK_UNMAPPED_BASE;
 	mm->cached_hole_size = ~0UL;
 	mm_init_owner(mm, p);
+	if (current->mm && current->mm->mcl_inherit) {
+		def_flags = current->mm->def_flags & VM_LOCKED;
+		if (mm->mcl_inherit & MCL_RECURSIVE)
+			mm->mcl_inherit  = current->mm->mcl_inherit;
+	}
 
 	if (likely(!mm_alloc_pgd(mm))) {
-		mm->def_flags = 0;
+		mm->def_flags = def_flags;
 		mmu_notifier_mm_init(mm);
 		return mm;
 	}
Index: linux-2.6.28-rc7-mmotm-081203/fs/binfmt_elf.c
===================================================================
--- linux-2.6.28-rc7-mmotm-081203.orig/fs/binfmt_elf.c	2008-12-03 10:19:21.000000000 -0500
+++ linux-2.6.28-rc7-mmotm-081203/fs/binfmt_elf.c	2008-12-03 10:33:29.000000000 -0500
@@ -585,6 +585,7 @@ static int load_elf_binary(struct linux_
 	unsigned long reloc_func_desc = 0;
 	int executable_stack = EXSTACK_DEFAULT;
 	unsigned long def_flags = 0;
+	int mcl_inherit = 0;
 	struct {
 		struct elfhdr elf_ex;
 		struct elfhdr interp_elf_ex;
@@ -749,6 +750,13 @@ static int load_elf_binary(struct linux_
 		SET_PERSONALITY(loc->elf_ex);
 	}
 
+	/* Optionally inherit MCL_FUTURE state before destroying old mm */
+	if (current->mm && current->mm->mcl_inherit) {
+		def_flags = current->mm->def_flags & VM_LOCKED;
+		if (current->mm->mcl_inherit & MCL_RECURSIVE)
+			mcl_inherit  = current->mm->mcl_inherit;
+	}
+
 	/* Flush all traces of the currently running executable */
 	retval = flush_old_exec(bprm);
 	if (retval)
@@ -757,6 +765,7 @@ static int load_elf_binary(struct linux_
 	/* OK, This is the point of no return */
 	current->flags &= ~PF_FORKNOEXEC;
 	current->mm->def_flags = def_flags;
+	current->mm->mcl_inherit = mcl_inherit;
 
 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
 	   may depend on the personality.  */
Index: linux-2.6.28-rc7-mmotm-081203/arch/x86/include/asm/mman.h
===================================================================
--- linux-2.6.28-rc7-mmotm-081203.orig/arch/x86/include/asm/mman.h	2008-12-03 10:16:26.000000000 -0500
+++ linux-2.6.28-rc7-mmotm-081203/arch/x86/include/asm/mman.h	2008-12-03 10:33:29.000000000 -0500
@@ -16,5 +16,8 @@
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
+#define MCL_INHERIT	4		/* inherit mlocks across fork */
+					/* inherit '_FUTURE flag across fork/exec */
+#define MCL_RECURSIVE	8		/* inherit mlocks recursively */
 
 #endif /* _ASM_X86_MMAN_H */
Index: linux-2.6.28-rc7-mmotm-081203/include/linux/mm_types.h
===================================================================
--- linux-2.6.28-rc7-mmotm-081203.orig/include/linux/mm_types.h	2008-12-03 10:18:01.000000000 -0500
+++ linux-2.6.28-rc7-mmotm-081203/include/linux/mm_types.h	2008-12-03 10:33:29.000000000 -0500
@@ -235,6 +235,8 @@ struct mm_struct {
 	unsigned int token_priority;
 	unsigned int last_interval;
 
+	int mcl_inherit;		/* inherit current/future locks */
+
 	unsigned long flags; /* Must use atomic bitops to access the bits */
 
 	struct core_state *core_state; /* coredumping support */
Index: linux-2.6.28-rc7-mmotm-081203/Documentation/vm/mlock.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.28-rc7-mmotm-081203/Documentation/vm/mlock.c	2008-12-03 10:33:29.000000000 -0500
@@ -0,0 +1,149 @@
+/*
+ * mlock.c
+ *
+ * Command-line utility for launching a program with the
+ * mlockall() MCL_FUTURE flag set such that all of the task's
+ * pages will be locked into memory.  This depends on the
+ * MCL_INHERIT|MCL_RECURSIVE enhancement to mlockall(2).
+ *
+ * Based on the taskset command from the schedutils package by
+ *
+ * 	Robert Love <rml@...h9.net>
+ *
+ * Compile with:
+ *
+ * 	gcc -o mlock mlock.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, v2, as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Copyright (C) 2004 Robert Love
+ * Copyright (C) 2008 Hewlett-Packard, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/mman.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#define MLOCK_VERSION "0.2"
+
+/*
+ * Version Info
+ *
+ * 0.1	- initial implementation
+ *
+ * 0.2  - add "--recursive" support
+ */
+
+#define OPTIONS "+hr"
+static struct option l_opts[] = {
+	{
+		.name = "help",
+		.has_arg = no_argument,
+		.flag = NULL,
+		.val = 'h'
+	},
+	{
+		.name = "recursive",
+		.has_arg = no_argument,
+		.flag = NULL,
+		.val = 'r'
+	},
+	{
+		.name = NULL,
+	}
+};
+
+/*
+ * For testing before MCL_INHERIT and MCL_RECURSIVE exist in a
+ * user space header.  mlockall() will fail if these flags are
+ * not implemented.
+ *
+ * N.B., won't work on platforms with "interesting" values for
+ *       MCL_FUTURE  -- e.g., powerpc, sparc, alpha
+ *       [maybe OK for alpha, but ...]
+ */
+#ifndef MCL_INHERIT
+#define MCL_INHERIT   (MCL_FUTURE << 1)
+#define MCL_RECURSIVE (MCL_INHERIT << 1)
+#endif
+
+static const char *usage = "\
+\nmlock version " MLOCK_VERSION "\n\n\
+Usage:  %s [-hr] <cmd> [args...]]\n\n\
+Where:\n\
+\t--help/-h      = show this help/usage\n\
+\t--recursive/-r = inherit recursively--i.e., across future\n\
+\t                 generations.\n\n\
+Run <cmd> as if it had called mlockall(2) with the MCL_CURRENT|MCL_FUTURE\n\
+flags set.  That is, all of <cmd>'s pages will be locked into memory.\n\
+If '--recursive/-r' specified, the MCL_RECURSIVE flag will be added, and\n\
+all future descendants of <cmd> will run with inherit this condition,\n\
+unless one of them calls munlockall(2) or mlockall(2) without the\n\
+MCL_INHERIT|MCL_RECURSIVE flags.\n\n\
+";
+
+static void show_usage(const char *cmd)
+{
+	fprintf(stderr, usage, cmd);
+}
+
+int main(int argc, char *argv[])
+{
+
+	int opt;
+	int flags = MCL_FUTURE|MCL_INHERIT;
+
+	while ((opt = getopt_long(argc, argv, OPTIONS, l_opts, NULL)) != -1) {
+		int ret = 1;
+
+		switch (opt) {
+		case 'r':
+			flags |= MCL_RECURSIVE;
+			break;
+		case 'h':
+			ret = 0;
+			/* fall through */
+
+		default:
+			show_usage(argv[0]);
+			return ret;
+		}
+	}
+
+	if ((argc - optind) < 1) {
+		show_usage(argv[0]);
+		return 1;
+	}
+
+	if (mlockall(flags) == -1) {
+		fprintf(stderr, "%s mlockall() failed - %s\n", argv[0],
+			strerror(errno));
+		return 1;
+	}
+
+	argv += optind;
+	execvp(argv[0], argv);
+	perror("execvp");
+	fprintf(stderr, "failed to execute %s\n", argv[0]);
+	return 1;
+
+}
+


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ