This patch add ability to run that named "checkpoint" files by enhancing Elf file format, which includes - new Elf file type ET_CKPT - three additional program header types PT_CKPT_VMA, PT_CKPT_CORE and PT_CKPT_PAGES. PT_CKPT_VMA -- holds 'vma_entry' structure, which describes the memory area the kernel should map. It also might contain a file descriptor so the kernel will be mapping a file povided. Usually such file get opened by user-space helper tool which prepares 'vma_entry' structure for the kernel. PT_CKPT_CORE -- 'core_entry' structure (registers, tls, tasks specific settings). The structure is defined as a 16K container which should be enough for most cases. 8K of it is reserved for arch specific settings. PT_CKPT_PAGES -- a set of all pages which contents we should restored. Apart from Elf extension flush_old_exec() has been splitted to two functions -- the former flush_old_exec() and flush_exec_keep_thread(). The later doesn't call for de_thread() allowing to keep threads relationship. Also arch_setup_additional_pages_at() helper added to setup vdso at predefined address. At moment only pure x86-64 architecture is supported. Signed-off-by: Cyrill Gorcunov CC: Andrew Vagin CC: Pavel Emelyanov CC: James Bottomley CC: Glauber Costa CC: H. Peter Anvin CC: Ingo Molnar CC: Tejun Heo CC: Dave Hansen CC: Eric W. Biederman CC: Daniel Lezcano CC: Alexey Dobriyan --- arch/x86/include/asm/elf.h | 3 arch/x86/include/asm/elf_ckpt.h | 80 ++++++++ arch/x86/kernel/Makefile | 2 arch/x86/kernel/elf_ckpt.c | 161 ++++++++++++++++++ arch/x86/vdso/vma.c | 22 ++ fs/Kconfig.binfmt | 11 + fs/Makefile | 1 fs/binfmt_elf.c | 17 + fs/binfmt_elf_ckpt.c | 356 ++++++++++++++++++++++++++++++++++++++++ fs/exec.c | 27 +-- include/linux/binfmts.h | 1 include/linux/elf_ckpt.h | 103 +++++++++++ 12 files changed, 772 insertions(+), 12 deletions(-) Index: linux-2.6.git/arch/x86/include/asm/elf.h =================================================================== --- linux-2.6.git.orig/arch/x86/include/asm/elf.h +++ linux-2.6.git/arch/x86/include/asm/elf.h @@ -314,7 +314,8 @@ struct linux_binprm; #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 extern int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp); - +extern int arch_setup_additional_pages_at(struct linux_binprm *bprm, + void *addr, int uses_interp); extern int syscall32_setup_pages(struct linux_binprm *, int exstack); #define compat_arch_setup_additional_pages syscall32_setup_pages Index: linux-2.6.git/arch/x86/include/asm/elf_ckpt.h =================================================================== --- /dev/null +++ linux-2.6.git/arch/x86/include/asm/elf_ckpt.h @@ -0,0 +1,80 @@ +#ifndef _LINUX_ELF_X86_CHECKPOINT_H +#define _LINUX_ELF_X86_CHECKPOINT_H + +#include + +#include +#include + +#define CKPT_GDT_ENTRY_TLS_ENTRIES 3 + +struct user_regs_entry { + __u64 r15; + __u64 r14; + __u64 r13; + __u64 r12; + __u64 bp; + __u64 bx; + __u64 r11; + __u64 r10; + __u64 r9; + __u64 r8; + __u64 ax; + __u64 cx; + __u64 dx; + __u64 si; + __u64 di; + __u64 orig_ax; + __u64 ip; + __u64 cs; + __u64 flags; + __u64 sp; + __u64 ss; + __u64 fs_base; + __u64 gs_base; + __u64 ds; + __u64 es; + __u64 fs; + __u64 gs; +} __packed; + +struct desc_struct_entry { + __u32 a; + __u32 b; +} __packed; + +struct user_fpregs_entry { + __u16 cwd; + __u16 swd; + __u16 twd; + __u16 fop; + __u64 rip; + __u64 rdp; + __u32 mxcsr; + __u32 mxcsr_mask; + __u32 st_space[32]; + __u32 xmm_space[64]; + __u32 padding[24]; +} __packed; + +struct ckpt_arch_entry { + struct user_regs_entry gpregs; + struct user_fpregs_entry fpregs; + struct desc_struct tls_array[CKPT_GDT_ENTRY_TLS_ENTRIES]; +}; + +struct core_entry; + +#ifdef CONFIG_X86_64 +extern int load_elf_ckpt_arch(struct task_struct *tsk, struct pt_regs *regs, + struct core_entry *core_entry); +#else +static inline int +load_elf_ckpt_arch(struct task_struct *tsk, struct pt_regs *regs, + struct core_entry *core_entry) +{ + return -ENOEXEC; +} +#endif + +#endif /* _LINUX_ELF_X86_CHECKPOINT_H */ Index: linux-2.6.git/arch/x86/kernel/Makefile =================================================================== --- linux-2.6.git.orig/arch/x86/kernel/Makefile +++ linux-2.6.git/arch/x86/kernel/Makefile @@ -99,6 +99,8 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o obj-$(CONFIG_OF) += devicetree.o +obj-$(CONFIG_BINFMT_ELF_CKPT) += elf_ckpt.o + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) Index: linux-2.6.git/arch/x86/kernel/elf_ckpt.c =================================================================== --- /dev/null +++ linux-2.6.git/arch/x86/kernel/elf_ckpt.c @@ -0,0 +1,161 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#ifdef CONFIG_X86_64 + +#define cp_reg(d, s, r) d.r = s.r + +int load_elf_ckpt_arch(struct task_struct *tsk, struct pt_regs *regs, + struct core_entry *core_entry) +{ + struct ckpt_arch_entry *arch = (struct ckpt_arch_entry *)core_entry->arch; + struct thread_struct *thread = ¤t->thread; + + struct user_regs_struct gpregs; + struct user_i387_struct fpregs; + + mm_segment_t old_fs; + int i, ret; + + if (core_entry->header.arch != CKPT_HEADER_ARCH_X86_64) { + pr_err("elf-ckpt-x86: Unsupported or corrupted header\n"); + return -ENOEXEC; + } + + BUILD_BUG_ON(CKPT_GDT_ENTRY_TLS_ENTRIES != GDT_ENTRY_TLS_ENTRIES); + BUILD_BUG_ON(sizeof(struct ckpt_arch_entry) > CKPT_ARCH_SIZE); + + memset(&gpregs, 0, sizeof(gpregs)); + memset(&fpregs, 0, sizeof(fpregs)); + + /* + * General purpose registers + */ + cp_reg(gpregs, arch->gpregs, r15); + cp_reg(gpregs, arch->gpregs, r14); + cp_reg(gpregs, arch->gpregs, r13); + cp_reg(gpregs, arch->gpregs, r12); + cp_reg(gpregs, arch->gpregs, bp); + cp_reg(gpregs, arch->gpregs, bx); + cp_reg(gpregs, arch->gpregs, r11); + cp_reg(gpregs, arch->gpregs, r10); + cp_reg(gpregs, arch->gpregs, r9); + cp_reg(gpregs, arch->gpregs, r8); + cp_reg(gpregs, arch->gpregs, ax); + cp_reg(gpregs, arch->gpregs, cx); + cp_reg(gpregs, arch->gpregs, dx); + cp_reg(gpregs, arch->gpregs, si); + cp_reg(gpregs, arch->gpregs, di); + cp_reg(gpregs, arch->gpregs, orig_ax); + cp_reg(gpregs, arch->gpregs, ip); + cp_reg(gpregs, arch->gpregs, cs); + cp_reg(gpregs, arch->gpregs, flags); + cp_reg(gpregs, arch->gpregs, sp); + cp_reg(gpregs, arch->gpregs, ss); + cp_reg(gpregs, arch->gpregs, fs_base); + cp_reg(gpregs, arch->gpregs, gs_base); + cp_reg(gpregs, arch->gpregs, ds); + cp_reg(gpregs, arch->gpregs, es); + cp_reg(gpregs, arch->gpregs, fs); + cp_reg(gpregs, arch->gpregs, gs); + + old_fs = get_fs(); + set_fs(KERNEL_DS); + ret = arch_ptrace(current, PTRACE_SETREGS, 0, (unsigned long)&gpregs); + set_fs(old_fs); + if (ret) + goto out; + + *regs = *task_pt_regs(current); + + thread->usersp = arch->gpregs.sp; + thread->ds = arch->gpregs.ds; + thread->es = arch->gpregs.es; + thread->fs = arch->gpregs.fs; + thread->gs = arch->gpregs.gs; + + thread->fsindex = thread->fs; + thread->gsindex = thread->gs; + + for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) { + thread->tls_array[i].a = arch->tls_array[i].a; + thread->tls_array[i].b = arch->tls_array[i].b; + } + + if (arch->gpregs.fs_base) { + ret = do_arch_prctl(current, ARCH_SET_FS, arch->gpregs.fs_base); + if (ret) + goto out; + } + + if (arch->gpregs.gs_base) { + ret = do_arch_prctl(current, ARCH_SET_GS, arch->gpregs.gs_base); + if (ret) + goto out; + } + + /* Restoring FPU */ + if (core_entry->task_flags & PF_USED_MATH) { + + cp_reg(fpregs, arch->fpregs, cwd); + cp_reg(fpregs, arch->fpregs, swd); + cp_reg(fpregs, arch->fpregs, twd); + cp_reg(fpregs, arch->fpregs, fop); + cp_reg(fpregs, arch->fpregs, rip); + cp_reg(fpregs, arch->fpregs, rdp); + cp_reg(fpregs, arch->fpregs, mxcsr); + cp_reg(fpregs, arch->fpregs, mxcsr_mask); + + for (i = 0; i < ARRAY_SIZE(arch->fpregs.st_space); i++) + cp_reg(fpregs, arch->fpregs, st_space[i]); + + for (i = 0; i < ARRAY_SIZE(arch->fpregs.xmm_space); i++) + cp_reg(fpregs, arch->fpregs, xmm_space[i]); + + old_fs = get_fs(); + set_fs(KERNEL_DS); + ret = arch_ptrace(current, PTRACE_SETFPREGS, 0, (unsigned long)&fpregs); + set_fs(old_fs); + if (ret) + goto out; + } + +out: + return ret; +} + +#endif /* CONFIG_X86_64 */ Index: linux-2.6.git/arch/x86/vdso/vma.c =================================================================== --- linux-2.6.git.orig/arch/x86/vdso/vma.c +++ linux-2.6.git/arch/x86/vdso/vma.c @@ -137,6 +137,28 @@ up_fail: return ret; } +int arch_setup_additional_pages_at(struct linux_binprm *bprm, void *addr, int uses_interp) +{ + struct mm_struct *mm = current->mm; + int ret; + + if (!vdso_enabled) + return 0; + + down_write(&mm->mmap_sem); + current->mm->context.vdso = addr; + ret = install_special_mapping(mm, (unsigned long)addr, vdso_size, + VM_READ | VM_EXEC | + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | + VM_ALWAYSDUMP, + vdso_pages); + if (ret) + current->mm->context.vdso = NULL; + + up_write(&mm->mmap_sem); + return ret; +} + static __init int vdso_setup(char *s) { vdso_enabled = simple_strtoul(s, NULL, 0); Index: linux-2.6.git/fs/Kconfig.binfmt =================================================================== --- linux-2.6.git.orig/fs/Kconfig.binfmt +++ linux-2.6.git/fs/Kconfig.binfmt @@ -23,6 +23,17 @@ config BINFMT_ELF ld.so (check the file for location and latest version). +config BINFMT_ELF_CKPT + tristate "Kernel support for CKPT ELF binaries" + default n + depends on BINFMT_ELF && X86_64 + help + ELF CKPT (checkpoint) is an extension to ELF format to restore + checkpointed processes. It's not confirmed yet and highly + experimental. + + If unsure, say N. + config COMPAT_BINFMT_ELF bool depends on COMPAT && BINFMT_ELF Index: linux-2.6.git/fs/Makefile =================================================================== --- linux-2.6.git.orig/fs/Makefile +++ linux-2.6.git/fs/Makefile @@ -37,6 +37,7 @@ obj-$(CONFIG_BINFMT_MISC) += binfmt_misc obj-y += binfmt_script.o obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o +obj-$(CONFIG_BINFMT_ELF_CKPT) += binfmt_elf_ckpt.o obj-$(CONFIG_COMPAT_BINFMT_ELF) += compat_binfmt_elf.o obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o obj-$(CONFIG_BINFMT_SOM) += binfmt_som.o Index: linux-2.6.git/fs/binfmt_elf.c =================================================================== --- linux-2.6.git.orig/fs/binfmt_elf.c +++ linux-2.6.git/fs/binfmt_elf.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -592,7 +593,11 @@ static int load_elf_binary(struct linux_ if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0) goto out; - if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN) + if (loc->elf_ex.e_type != ET_EXEC && +#ifdef CONFIG_BINFMT_ELF_CKPT + loc->elf_ex.e_type != ET_CKPT && +#endif + loc->elf_ex.e_type != ET_DYN) goto out; if (!elf_check_arch(&loc->elf_ex)) goto out; @@ -619,6 +624,16 @@ static int load_elf_binary(struct linux_ goto out_free_ph; } +#ifdef CONFIG_BINFMT_ELF_CKPT + if (loc->elf_ex.e_type == ET_CKPT) { + retval = load_elf_ckpt(bprm, regs, &loc->elf_ex, + (struct elf_phdr *)elf_phdata); + if (!retval) + set_binfmt(&elf_format); + goto out_free_ph; + } +#endif + elf_ppnt = elf_phdata; elf_bss = 0; elf_brk = 0; Index: linux-2.6.git/fs/binfmt_elf_ckpt.c =================================================================== --- /dev/null +++ linux-2.6.git/fs/binfmt_elf_ckpt.c @@ -0,0 +1,356 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +int load_elf_ckpt(struct linux_binprm *bprm, struct pt_regs *regs, + struct elfhdr *elf_ex, struct elf_phdr *elf_phdr) +{ + struct elf_phdr *elf_phdr_pages; + struct flex_array *fa = NULL; + struct vma_entry *vma_entry_ptr; + int nr_vma_found, nr_vma_mapped; + struct vma_entry vma_entry; + struct file *file = NULL; + unsigned long map_addr; + +#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES + unsigned long vdso = -1UL; +#endif + + struct core_entry *core_entry = NULL; + unsigned long start_stack = -1UL; + + int i, ret = -ENOEXEC; + loff_t off; + + BUILD_BUG_ON(CKPT_TASK_COMM_LEN != TASK_COMM_LEN); + BUILD_BUG_ON(CKPT_PAGE_SIZE != PAGE_SIZE); + BUILD_BUG_ON(CKPT_CORE_SIZE != sizeof(*core_entry)); + + elf_phdr_pages = NULL; + nr_vma_found = 0; + nr_vma_mapped = 0; + + /* + * An early check for header version so if we fail here + * we would not need to use flex array at all. + */ + for (i = 0; i < elf_ex->e_phnum; i++) { + if (elf_phdr[i].p_type != PT_CKPT_CORE) + continue; + + core_entry = vmalloc(sizeof(*core_entry)); + if (!core_entry) { + ret = -ENOMEM; + goto out; + } + + ret = kernel_read(bprm->file, elf_phdr[i].p_offset, + (char *)core_entry, sizeof(*core_entry)); + if (ret != sizeof(*core_entry)) { + pr_err("elf-ckpt: Can't read core_entry\n"); + ret = -EIO; + goto out; + } + + if (core_entry->header.version != CKPT_HEADER_VERSION) { + pr_err("elf-ckpt: Unsupported or corrupted header\n"); + ret = -ENOEXEC; + goto out; + } + + break; + } + + if (i == elf_ex->e_phnum) { + pr_err("elf-ckpt: No header found\n"); + ret = -ENOEXEC; + goto out; + } + + + fa = flex_array_alloc(sizeof(vma_entry), elf_ex->e_phnum, GFP_KERNEL); + if (!fa || flex_array_prealloc(fa, 0, elf_ex->e_phnum, GFP_KERNEL)) { + ret = -ENOMEM; + if (fa) { + flex_array_free(fa); + fa = NULL; + goto out; + } + } + + ret = flush_exec_keep_thread(bprm); + if (ret) + goto out; + + current->flags &= ~PF_FORKNOEXEC; + current->mm->def_flags = 0; + + /* + * We don't care about parameters passed (such as argc, argv, env) + * when execute checkpoint file because we're to substitute + * all things anyway. + */ + do_munmap(current->mm, 0, TASK_SIZE); + + SET_PERSONALITY(loc->elf_ex); + + for (i = 0; i < elf_ex->e_phnum; i++) { + + switch (elf_phdr[i].p_type) { + case PT_CKPT_VMA: + ret = kernel_read(bprm->file, elf_phdr[i].p_offset, + (char *)&vma_entry, sizeof(vma_entry)); + if (ret != sizeof(vma_entry)) { + pr_err("elf-ckpt: Can't read vma_entry\n"); + ret = -EIO; + goto out; + } + if (flex_array_put(fa, i, &vma_entry, GFP_KERNEL)) + BUG(); + + /* We need to know if there is executable stack */ + if (vma_entry.status & VMA_AREA_STACK) { + if (vma_entry.flags & PROT_EXEC) + current->personality |= READ_IMPLIES_EXEC; + } + + nr_vma_found++; + continue; + case PT_CKPT_PAGES: + elf_phdr_pages = &elf_phdr[i]; + continue; + default: + continue; + } + } + + /* Be sure it has the file structure we expected to see. */ + if (!elf_phdr_pages || !nr_vma_found) { + ret = -ENOEXEC; + goto out; + } + + /* + * VMA randomization still needs to be set (just in case if + * the program we restore will exec() something else later). + */ + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) + current->flags |= PF_RANDOMIZE; + + /* + * FIXME: Note it flushes signal handlers as well, + * so we need to dump queued signals and restore + * them here. + */ + setup_new_exec(bprm); + + current->mm->free_area_cache = current->mm->mmap_base; + current->mm->cached_hole_size = 0; + + for (i = 0; i < nr_vma_found; i++) { + vma_entry_ptr = flex_array_get(fa, i); + +#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES + if (vma_entry_ptr->status & VMA_AREA_VDSO) + vdso = vma_entry_ptr->start; +#endif + + if (vma_entry_ptr->status & VMA_AREA_STACK) { + /* Note if stack is VM_GROWSUP -- it should be reversed */ + start_stack = vma_entry_ptr->start; + } + + /* Anything special should be ignored */ + if (!(vma_entry_ptr->status & VMA_AREA_REGULAR)) + continue; + + /* It's a file mmap'ed */ + if (vma_entry_ptr->fd != -1) { + file = fget((unsigned int)vma_entry_ptr->fd); + if (!file) { + ret = -EBADF; + goto out_unmap; + } + + /* Reuse this field to handle error cases */ + vma_entry_ptr->fd = (__u64)file; + } else + file = NULL; + + down_write(¤t->mm->mmap_sem); + map_addr = do_mmap(file, + vma_entry_ptr->start, + vma_entry_ptr->end - vma_entry_ptr->start, + vma_entry_ptr->prot, + vma_entry_ptr->flags | MAP_FIXED, + vma_entry_ptr->pgoff); + up_write(¤t->mm->mmap_sem); + + if (file) { + fput(file); + do_close((unsigned int)vma_entry_ptr->fd); + } + + if ((unsigned long)(map_addr) >= TASK_SIZE) { + ret = IS_ERR((void *)map_addr) ? PTR_ERR((void*)map_addr) : -EINVAL; + goto out_unmap; + } + + nr_vma_mapped++; + } + +#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES + if (vdso == -1UL) { + pr_err("elf-ckpt: Can't find VDSO address\n"); + ret = -ENOEXEC; + goto out_unmap; + } +#endif + + if (start_stack == -1UL) { + pr_err("elf-ckpt: Can't find stack VMA\n"); + ret = -ENOEXEC; + goto out_unmap; + } + + /* The name it has before */ + set_task_comm(current, core_entry->task_comm); + + bprm->p = core_entry->mm_start_stack; + + current->mm->start_code = core_entry->mm_start_code; + current->mm->end_code = core_entry->mm_end_code; + current->mm->start_data = core_entry->mm_start_data; + current->mm->end_data = core_entry->mm_end_data; + current->mm->start_stack = core_entry->mm_start_stack; + current->mm->start_brk = core_entry->mm_start_brk; + current->mm->brk = core_entry->mm_brk; + +#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES + ret = arch_setup_additional_pages_at(bprm, (void *)vdso, 0); + if (ret) { + pr_err("elf-ckpt: Can't setup additional pages at %lx with %d\n", + vdso, ret); + goto out_unmap; + } +#endif + + /* + * Restore pages + */ + off = elf_phdr_pages->p_offset; + while (1) { + struct vm_area_struct *vma; + struct page *page; + void *page_data; + __u64 va; + + ret = kernel_read(bprm->file, off, (char *)&va, sizeof(va)); + if (ret != sizeof(va)) { + pr_err("elf-ckpt: Can't read page virtual address: " + "ret = %d off = %lx\n", ret, (unsigned long)off); + ret = -EIO; + goto out_unmap; + } + + /* End of pages reached */ + if (!va) + break; + + vma = find_vma(current->mm, (unsigned long)va); + if (!vma) { + pr_err("elf-ckpt: No VMA for page: %16lx\n", (unsigned long)va); + ret = -ESRCH; + goto out_unmap; + } + + ret = get_user_pages(current, current->mm, (unsigned long)va, + 1, 1, 1, &page, NULL); + if (ret != 1) { + pr_err("elf-ckpt: Can't get user page: %16lx\n", (unsigned long)va); + ret = -EFAULT; + goto out_unmap; + } + + page_data = kmap(page); + ret = kernel_read(bprm->file, off + sizeof(va), page_data, PAGE_SIZE); + kunmap(page); + put_page(page); + + if (ret != PAGE_SIZE) { + pr_err("elf-ckpt: Can't read data on page: %16lx\n", (unsigned long)va); + ret = -EFAULT; + goto out_unmap; + } + + off += sizeof(va) + PAGE_SIZE; + } + + /* + * Architecture specific setup for registers + * and friends, it's done lately since if + * an error happened before there is no much + * point to setup arch-specific things at all. + */ + ret = load_elf_ckpt_arch(current, regs, core_entry); + if (ret) + goto out_unmap; + + /* We're done */ + ret = 0; +out: + if (core_entry) + vfree(core_entry); + + if (fa) + flex_array_free(fa); + return ret; + +out_unmap: + for (i = 0; i < nr_vma_mapped; i++) { + vma_entry_ptr = flex_array_get(fa, i); + down_write(¤t->mm->mmap_sem); + do_munmap(current->mm, vma_entry_ptr->start, + vma_entry_ptr->end - vma_entry_ptr->start); + up_write(¤t->mm->mmap_sem); + } + + send_sig(SIGKILL, current, 0); + goto out; +} Index: linux-2.6.git/fs/exec.c =================================================================== --- linux-2.6.git.orig/fs/exec.c +++ linux-2.6.git/fs/exec.c @@ -1071,18 +1071,10 @@ void set_task_comm(struct task_struct *t perf_event_comm(tsk); } -int flush_old_exec(struct linux_binprm * bprm) +int flush_exec_keep_thread(struct linux_binprm * bprm) { int retval; - /* - * Make sure we have a private signal table and that - * we are unassociated from the previous thread group. - */ - retval = de_thread(current); - if (retval) - goto out; - set_mm_exe_file(bprm->mm, bprm->file); /* @@ -1101,10 +1093,25 @@ int flush_old_exec(struct linux_binprm * current->personality &= ~bprm->per_clear; return 0; - out: return retval; } +EXPORT_SYMBOL(flush_exec_keep_thread); + +int flush_old_exec(struct linux_binprm * bprm) +{ + int retval; + + /* + * Make sure we have a private signal table and that + * we are unassociated from the previous thread group. + */ + retval = de_thread(current); + if (retval) + return retval; + + return flush_exec_keep_thread(bprm); +} EXPORT_SYMBOL(flush_old_exec); void would_dump(struct linux_binprm *bprm, struct file *file) Index: linux-2.6.git/include/linux/binfmts.h =================================================================== --- linux-2.6.git.orig/include/linux/binfmts.h +++ linux-2.6.git/include/linux/binfmts.h @@ -110,6 +110,7 @@ extern int prepare_binprm(struct linux_b extern int __must_check remove_arg_zero(struct linux_binprm *); extern int search_binary_handler(struct linux_binprm *, struct pt_regs *); extern int flush_old_exec(struct linux_binprm * bprm); +extern int flush_exec_keep_thread(struct linux_binprm * bprm); extern void setup_new_exec(struct linux_binprm * bprm); extern void would_dump(struct linux_binprm *, struct file *); Index: linux-2.6.git/include/linux/elf_ckpt.h =================================================================== --- /dev/null +++ linux-2.6.git/include/linux/elf_ckpt.h @@ -0,0 +1,103 @@ +#ifndef _LINUX_ELF_CHECKPOINT_H +#define _LINUX_ELF_CHECKPOINT_H + +#ifdef __KERNEL__ + +#include +#include + +#include +#include + +/* + * Elf extension includes new Elf file type + * and program header types as well. + */ +#define ET_CKPT 5 + +#define PT_CKPT_OFFSET 0x01010101 + +#define PT_CKPT_VMA (PT_LOOS + PT_CKPT_OFFSET + 1) +#define PT_CKPT_CORE (PT_LOOS + PT_CKPT_OFFSET + 2) +#define PT_CKPT_PAGES (PT_LOOS + PT_CKPT_OFFSET + 3) + +#define CKPT_PAGE_SIZE 4096 +#define CKPT_TASK_COMM_LEN 16 + +#define CKPT_HEADER_VERSION 1 +#define CKPT_HEADER_ARCH_X86_64 1 + +#define VMA_AREA_REGULAR (1 << 0) +#define VMA_AREA_STACK (1 << 1) +#define VMA_AREA_VSYSCALL (1 << 2) +#define VMA_AREA_VDSO (1 << 3) +#define VMA_FORCE_READ (1 << 4) +#define VMA_AREA_HEAP (1 << 5) +#define VMA_FILE_PRIVATE (1 << 6) +#define VMA_FILE_SHARED (1 << 7) +#define VMA_ANON_SHARED (1 << 8) +#define VMA_ANON_PRIVATE (1 << 9) +#define VMA_FORCE_WRITE (1 << 10) + +struct vma_entry { + __u64 start; + __u64 end; + __u64 pgoff; + __u32 prot; + __u32 flags; + __u32 status; /* from VMA_x above */ + __u32 pid; /* pid VMA belongs to */ + __s64 fd; + __u64 ino; + __u32 dev_maj; + __u32 dev_min; +} __packed; + +struct page_entry { + __u64 va; /* page virtual address */ + __u8 data[CKPT_PAGE_SIZE]; /* page contents */ +} __packed; + +struct image_header { + __u16 version; + __u16 arch; + __u32 flags; +} __packed; + +#define CKPT_ARCH_SIZE (2 * 4096) +#define CKPT_CORE_SIZE (4 * 4096) + +struct core_entry { + union { + struct { + struct image_header header; + __u8 arch[CKPT_ARCH_SIZE]; /* should be enough for all archs */ + __u32 task_personality; + __u8 task_comm[CKPT_TASK_COMM_LEN]; + __u32 task_flags; + __u64 mm_start_code; + __u64 mm_end_code; + __u64 mm_start_data; + __u64 mm_end_data; + __u64 mm_start_stack; + __u64 mm_start_brk; + __u64 mm_brk; + }; + __u8 __core_pad[CKPT_CORE_SIZE]; + }; +} __packed; + +#ifdef CONFIG_BINFMT_ELF_CKPT +extern int load_elf_ckpt(struct linux_binprm *bprm, struct pt_regs *regs, + struct elfhdr *elf_ex, struct elf_phdr *elf_phdr); +#else +static inline int load_elf_ckpt(struct linux_binprm *bprm, struct pt_regs *regs, + struct elfhdr *elf_ex, struct elf_phdr *elf_phdr) +{ + return -ENOEXEC; +} +#endif + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_ELF_CHECKPOINT_H */ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/