[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1228331069.6693.73.camel@lts-notebook>
Date: Wed, 03 Dec 2008 14:04:29 -0500
From: Lee Schermerhorn <Lee.Schermerhorn@...com>
To: linux-mm@...ck.org, linux-kernel <linux-kernel@...r.kernel.org>
Cc: Andrew Morton <akpm@...ux-foundation.org>, riel@...hat.com,
hugh@...itas.com, kosaki.motohiro@...fujitsu.com
Subject: [PATCH] - support inheritance of mlocks across fork/exec V2
Against; 2.6.28-rc7-mmotm-081203
V02: rework vetting of flags argument as suggested by
Kosaki Motohiro.
enhance description as requested by Andrew Morton.
Add support for mlockall(MCL_INHERIT|MCL_RECURSIVE):
MCL_CURRENT[|MCL_FUTURE]|MCL_INHERIT - inherit memory locks
[vmas' VM_LOCKED flags] across fork(), and inherit
MCL_FUTURE behavior [mm's def_flags] across fork()
and exec(). Behaves as if child and/or new task
called mlockall(MCL_CURRENT|MCL_FUTURE) as first
instruction.
MCL_RECURSIVE - inherit MCL_CURRENT|MCL_FUTURE|MCL_INHERIT
[vmas' VM_LOCKED flags for fork() and mm's def_flags
and mcl_inherit across fork() and exec()] for all
future generations of calling process's descendants.
Behaves as if child and/or new task called
mlockall(MCL_CURRENT|MCL_FUTURE|MCL_INHERIT|MCL_RECURSIVE)
as the first instruction.
In support of a "lock prefix command"--e.g., mlock <cmd> <args> ...
Analogous to taskset(1) for cpu affinity or numactl(8) for numa memory
policy.
Together with patches to keep mlocked pages off the LRU, this will
allow users/admins to lock down applications without modifying them,
if their RLIMIT_MEMLOCK is sufficiently large, keeping their pages
off the LRU and out of consideration for reclaim.
Potentially useful, as well, in real-time environments to force
prefaulting and residency for applications that don't mlock themselves.
Jeff Sharkey at Montana State developed a similar patch for Linux
[link no longer accessible], but apparently he never submitted the patch.
I submitted an earlier version of this patch around a year ago. I
resurrected it to test the unevictable lru/mlocked pages patches--
e.g., by "mlock -r make -j<N*nr_cpus> all". This did shake out a few
races and vmstat accounting bugs, but NOT something I'd recommend as
general practice--for kernel builds, that is.
----
Define MCL_INHERIT, MCL_RECURSIVE in <asm-*/mman.h>.
+ x86 and ia64 versions included.
+ other arch can/will be created, if this patch deemed merge-worthy.
Similarly, I'll provide kernel man page update if/when needed.
Example "lock prefix command" in Documentation/vm/mlock.c
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@...com>
Documentation/vm/mlock.c | 149 +++++++++++++++++++++++++++++++++++++++++++
arch/ia64/include/asm/mman.h | 2
arch/x86/include/asm/mman.h | 3
fs/binfmt_elf.c | 9 ++
include/linux/mm_types.h | 2
kernel/fork.c | 15 +++-
mm/mlock.c | 19 ++++-
7 files changed, 191 insertions(+), 8 deletions(-)
Index: linux-2.6.28-rc7-mmotm-081203/arch/ia64/include/asm/mman.h
===================================================================
--- linux-2.6.28-rc7-mmotm-081203.orig/arch/ia64/include/asm/mman.h 2008-12-03 09:33:42.000000000 -0500
+++ linux-2.6.28-rc7-mmotm-081203/arch/ia64/include/asm/mman.h 2008-12-03 10:33:29.000000000 -0500
@@ -21,6 +21,8 @@
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */
+#define MCL_INHERIT 4 /* inherit '_FUTURE across fork/exec */
+#define MCL_RECURSIVE 8 /* inherit '_FUTURE recursively */
#ifdef __KERNEL__
#ifndef __ASSEMBLY__
Index: linux-2.6.28-rc7-mmotm-081203/mm/mlock.c
===================================================================
--- linux-2.6.28-rc7-mmotm-081203.orig/mm/mlock.c 2008-12-03 10:33:11.000000000 -0500
+++ linux-2.6.28-rc7-mmotm-081203/mm/mlock.c 2008-12-03 10:33:29.000000000 -0500
@@ -573,15 +573,18 @@ asmlinkage long sys_munlock(unsigned lon
static int do_mlockall(int flags)
{
struct vm_area_struct * vma, * prev = NULL;
+ struct mm_struct *mm = current->mm;
unsigned int def_flags = 0;
if (flags & MCL_FUTURE)
- def_flags = VM_LOCKED;
- current->mm->def_flags = def_flags;
- if (flags == MCL_FUTURE)
+ def_flags = VM_LOCKED;;
+ mm->def_flags = def_flags;
+ if (flags & MCL_INHERIT)
+ mm->mcl_inherit = flags & (MCL_INHERIT | MCL_RECURSIVE);
+ if ((flags & ~(MCL_INHERIT | MCL_RECURSIVE)) == MCL_FUTURE)
goto out;
- for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
+ for (vma = mm->mmap; vma ; vma = prev->vm_next) {
unsigned int newflags;
newflags = vma->vm_flags | VM_LOCKED;
@@ -600,9 +603,15 @@ asmlinkage long sys_mlockall(int flags)
unsigned long lock_limit;
int ret = -EINVAL;
- if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE)))
+ if (!(flags & (MCL_CURRENT | MCL_FUTURE)))
goto out;
+ if (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_INHERIT | MCL_RECURSIVE))
+ goto out; /* undefined flag bits */
+
+ if ((flags & (MCL_INHERIT | MCL_RECURSIVE)) == MCL_RECURSIVE)
+ goto out; /* 'RECURSIVE undefined without 'INHERIT */
+
ret = -EPERM;
if (!can_do_mlock())
goto out;
Index: linux-2.6.28-rc7-mmotm-081203/kernel/fork.c
===================================================================
--- linux-2.6.28-rc7-mmotm-081203.orig/kernel/fork.c 2008-12-03 10:18:15.000000000 -0500
+++ linux-2.6.28-rc7-mmotm-081203/kernel/fork.c 2008-12-03 10:33:29.000000000 -0500
@@ -278,7 +278,8 @@ static int dup_mmap(struct mm_struct *mm
*/
down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
- mm->locked_vm = 0;
+ if (!mm->mcl_inherit)
+ mm->locked_vm = 0;
mm->mmap = NULL;
mm->mmap_cache = NULL;
mm->free_area_cache = oldmm->mmap_base;
@@ -316,7 +317,8 @@ static int dup_mmap(struct mm_struct *mm
if (IS_ERR(pol))
goto fail_nomem_policy;
vma_set_policy(tmp, pol);
- tmp->vm_flags &= ~VM_LOCKED;
+ if (!mm->mcl_inherit)
+ tmp->vm_flags &= ~VM_LOCKED;
tmp->vm_mm = mm;
tmp->vm_next = NULL;
anon_vma_link(tmp);
@@ -406,6 +408,8 @@ __cacheline_aligned_in_smp DEFINE_SPINLO
static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
{
+ unsigned long def_flags = 0;
+
atomic_set(&mm->mm_users, 1);
atomic_set(&mm->mm_count, 1);
init_rwsem(&mm->mmap_sem);
@@ -422,9 +426,14 @@ static struct mm_struct * mm_init(struct
mm->free_area_cache = TASK_UNMAPPED_BASE;
mm->cached_hole_size = ~0UL;
mm_init_owner(mm, p);
+ if (current->mm && current->mm->mcl_inherit) {
+ def_flags = current->mm->def_flags & VM_LOCKED;
+ if (mm->mcl_inherit & MCL_RECURSIVE)
+ mm->mcl_inherit = current->mm->mcl_inherit;
+ }
if (likely(!mm_alloc_pgd(mm))) {
- mm->def_flags = 0;
+ mm->def_flags = def_flags;
mmu_notifier_mm_init(mm);
return mm;
}
Index: linux-2.6.28-rc7-mmotm-081203/fs/binfmt_elf.c
===================================================================
--- linux-2.6.28-rc7-mmotm-081203.orig/fs/binfmt_elf.c 2008-12-03 10:19:21.000000000 -0500
+++ linux-2.6.28-rc7-mmotm-081203/fs/binfmt_elf.c 2008-12-03 10:33:29.000000000 -0500
@@ -585,6 +585,7 @@ static int load_elf_binary(struct linux_
unsigned long reloc_func_desc = 0;
int executable_stack = EXSTACK_DEFAULT;
unsigned long def_flags = 0;
+ int mcl_inherit = 0;
struct {
struct elfhdr elf_ex;
struct elfhdr interp_elf_ex;
@@ -749,6 +750,13 @@ static int load_elf_binary(struct linux_
SET_PERSONALITY(loc->elf_ex);
}
+ /* Optionally inherit MCL_FUTURE state before destroying old mm */
+ if (current->mm && current->mm->mcl_inherit) {
+ def_flags = current->mm->def_flags & VM_LOCKED;
+ if (current->mm->mcl_inherit & MCL_RECURSIVE)
+ mcl_inherit = current->mm->mcl_inherit;
+ }
+
/* Flush all traces of the currently running executable */
retval = flush_old_exec(bprm);
if (retval)
@@ -757,6 +765,7 @@ static int load_elf_binary(struct linux_
/* OK, This is the point of no return */
current->flags &= ~PF_FORKNOEXEC;
current->mm->def_flags = def_flags;
+ current->mm->mcl_inherit = mcl_inherit;
/* Do this immediately, since STACK_TOP as used in setup_arg_pages
may depend on the personality. */
Index: linux-2.6.28-rc7-mmotm-081203/arch/x86/include/asm/mman.h
===================================================================
--- linux-2.6.28-rc7-mmotm-081203.orig/arch/x86/include/asm/mman.h 2008-12-03 10:16:26.000000000 -0500
+++ linux-2.6.28-rc7-mmotm-081203/arch/x86/include/asm/mman.h 2008-12-03 10:33:29.000000000 -0500
@@ -16,5 +16,8 @@
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */
+#define MCL_INHERIT 4 /* inherit mlocks across fork */
+ /* inherit '_FUTURE flag across fork/exec */
+#define MCL_RECURSIVE 8 /* inherit mlocks recursively */
#endif /* _ASM_X86_MMAN_H */
Index: linux-2.6.28-rc7-mmotm-081203/include/linux/mm_types.h
===================================================================
--- linux-2.6.28-rc7-mmotm-081203.orig/include/linux/mm_types.h 2008-12-03 10:18:01.000000000 -0500
+++ linux-2.6.28-rc7-mmotm-081203/include/linux/mm_types.h 2008-12-03 10:33:29.000000000 -0500
@@ -235,6 +235,8 @@ struct mm_struct {
unsigned int token_priority;
unsigned int last_interval;
+ int mcl_inherit; /* inherit current/future locks */
+
unsigned long flags; /* Must use atomic bitops to access the bits */
struct core_state *core_state; /* coredumping support */
Index: linux-2.6.28-rc7-mmotm-081203/Documentation/vm/mlock.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.28-rc7-mmotm-081203/Documentation/vm/mlock.c 2008-12-03 10:33:29.000000000 -0500
@@ -0,0 +1,149 @@
+/*
+ * mlock.c
+ *
+ * Command-line utility for launching a program with the
+ * mlockall() MCL_FUTURE flag set such that all of the task's
+ * pages will be locked into memory. This depends on the
+ * MCL_INHERIT|MCL_RECURSIVE enhancement to mlockall(2).
+ *
+ * Based on the taskset command from the schedutils package by
+ *
+ * Robert Love <rml@...h9.net>
+ *
+ * Compile with:
+ *
+ * gcc -o mlock mlock.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, v2, as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Copyright (C) 2004 Robert Love
+ * Copyright (C) 2008 Hewlett-Packard, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/mman.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#define MLOCK_VERSION "0.2"
+
+/*
+ * Version Info
+ *
+ * 0.1 - initial implementation
+ *
+ * 0.2 - add "--recursive" support
+ */
+
+#define OPTIONS "+hr"
+static struct option l_opts[] = {
+ {
+ .name = "help",
+ .has_arg = no_argument,
+ .flag = NULL,
+ .val = 'h'
+ },
+ {
+ .name = "recursive",
+ .has_arg = no_argument,
+ .flag = NULL,
+ .val = 'r'
+ },
+ {
+ .name = NULL,
+ }
+};
+
+/*
+ * For testing before MCL_INHERIT and MCL_RECURSIVE exist in a
+ * user space header. mlockall() will fail if these flags are
+ * not implemented.
+ *
+ * N.B., won't work on platforms with "interesting" values for
+ * MCL_FUTURE -- e.g., powerpc, sparc, alpha
+ * [maybe OK for alpha, but ...]
+ */
+#ifndef MCL_INHERIT
+#define MCL_INHERIT (MCL_FUTURE << 1)
+#define MCL_RECURSIVE (MCL_INHERIT << 1)
+#endif
+
+static const char *usage = "\
+\nmlock version " MLOCK_VERSION "\n\n\
+Usage: %s [-hr] <cmd> [args...]]\n\n\
+Where:\n\
+\t--help/-h = show this help/usage\n\
+\t--recursive/-r = inherit recursively--i.e., across future\n\
+\t generations.\n\n\
+Run <cmd> as if it had called mlockall(2) with the MCL_CURRENT|MCL_FUTURE\n\
+flags set. That is, all of <cmd>'s pages will be locked into memory.\n\
+If '--recursive/-r' specified, the MCL_RECURSIVE flag will be added, and\n\
+all future descendants of <cmd> will run with inherit this condition,\n\
+unless one of them calls munlockall(2) or mlockall(2) without the\n\
+MCL_INHERIT|MCL_RECURSIVE flags.\n\n\
+";
+
+static void show_usage(const char *cmd)
+{
+ fprintf(stderr, usage, cmd);
+}
+
+int main(int argc, char *argv[])
+{
+
+ int opt;
+ int flags = MCL_FUTURE|MCL_INHERIT;
+
+ while ((opt = getopt_long(argc, argv, OPTIONS, l_opts, NULL)) != -1) {
+ int ret = 1;
+
+ switch (opt) {
+ case 'r':
+ flags |= MCL_RECURSIVE;
+ break;
+ case 'h':
+ ret = 0;
+ /* fall through */
+
+ default:
+ show_usage(argv[0]);
+ return ret;
+ }
+ }
+
+ if ((argc - optind) < 1) {
+ show_usage(argv[0]);
+ return 1;
+ }
+
+ if (mlockall(flags) == -1) {
+ fprintf(stderr, "%s mlockall() failed - %s\n", argv[0],
+ strerror(errno));
+ return 1;
+ }
+
+ argv += optind;
+ execvp(argv[0], argv);
+ perror("execvp");
+ fprintf(stderr, "failed to execute %s\n", argv[0]);
+ return 1;
+
+}
+
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists