While doing the checkpoint-restore in the userspace one need to determine whether various kernel objects (like mm_struct-s of file_struct-s) are shared between tasks and restore this state. The 2nd step can be solved by using appropriate CLONE_ flags and the unshare syscall, while there's currently no ways for solving the 1st one. One of the ways for checking whether two tasks share e.g. mm_struct is to provide some mm_struct ID of a task to its proc file, but showing such info considered to be not that good for security reasons. Thus after some debates we end up in conclusion that using that named 'comparision' syscall might be the best candidate. So here is it -- __NR_kcmp. It takes up to 5 agruments - the pids of the two tasks (which characteristics should be compared), the comparision type and (in case of comparision of files) two file descriptors. At moment only x86 is supported. Signed-off-by: Cyrill Gorcunov CC: "Eric W. Biederman" CC: Pavel Emelyanov CC: Andrey Vagin CC: KOSAKI Motohiro CC: Ingo Molnar CC: H. Peter Anvin CC: Thomas Gleixner CC: Glauber Costa CC: Andi Kleen CC: Tejun Heo CC: Matt Helsley CC: Pekka Enberg CC: Eric Dumazet CC: Vasiliy Kulikov CC: Andrew Morton CC: Alexey Dobriyan CC: Valdis.Kletnieks@vt.edu --- arch/x86/include/asm/kcmp.h | 17 ++++ arch/x86/include/asm/syscalls.h | 4 arch/x86/kernel/Makefile | 1 arch/x86/kernel/kcmp.c | 163 +++++++++++++++++++++++++++++++++++++++ arch/x86/syscalls/syscall_32.tbl | 1 arch/x86/syscalls/syscall_64.tbl | 1 6 files changed, 187 insertions(+) Index: linux-2.6.git/arch/x86/include/asm/kcmp.h =================================================================== --- /dev/null +++ linux-2.6.git/arch/x86/include/asm/kcmp.h @@ -0,0 +1,17 @@ +#ifndef _LINUX_KCMP_H +#define _LINUX_KCMP_H + +/* Comparision type */ +enum { + KCMP_FILE, + KCMP_VM, + KCMP_FILES, + KCMP_FS, + KCMP_SIGHAND, + KCMP_IO, + KCMP_SYSVSEM, + + KCMP_TYPES, +}; + +#endif /* _LINUX_KCMP_H */ Index: linux-2.6.git/arch/x86/include/asm/syscalls.h =================================================================== --- linux-2.6.git.orig/arch/x86/include/asm/syscalls.h +++ linux-2.6.git/arch/x86/include/asm/syscalls.h @@ -42,6 +42,10 @@ long sys_sigaltstack(const stack_t __use asmlinkage int sys_set_thread_area(struct user_desc __user *); asmlinkage int sys_get_thread_area(struct user_desc __user *); +/* kenrel/kcmp.c */ +asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, + unsigned long idx1, unsigned long idx2); + /* X86_32 only */ #ifdef CONFIG_X86_32 Index: linux-2.6.git/arch/x86/kernel/Makefile =================================================================== --- linux-2.6.git.orig/arch/x86/kernel/Makefile +++ linux-2.6.git/arch/x86/kernel/Makefile @@ -34,6 +34,7 @@ obj-y += alternative.o i8253.o pci-nom obj-y += tsc.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o +obj-y += kcmp.o obj-y += trampoline.o trampoline_$(BITS).o obj-y += process.o Index: linux-2.6.git/arch/x86/kernel/kcmp.c =================================================================== --- /dev/null +++ linux-2.6.git/arch/x86/kernel/kcmp.c @@ -0,0 +1,163 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +static unsigned long cookies[KCMP_TYPES][2] __read_mostly; + +static long kptr_obfuscate(long v, int type) +{ + return (v + cookies[type][0]) ^ cookies[type][1]; +} + +/* + * 0 - equal + * 1 - less than + * 2 - greater than + * 3 - not equal but ordering unavailable + */ +static int kcmp_ptr(long v1, long v2, int type) +{ + long ret; + + ret = kptr_obfuscate(v1, type) - kptr_obfuscate(v2, type); + + return (ret < 0) | ((ret > 0) << 1); +} + +#define KCMP_TASK_PTR(task1, task2, member, type) \ + kcmp_ptr((long)(task1)->member, \ + (long)(task2)->member, \ + type) + +#define KCMP_PTR(ptr1, ptr2, type) \ + kcmp_ptr((long)ptr1, (long)ptr2, type) + +/* A caller must be sure the task is presented in memory */ +static struct file * +get_file_raw_ptr(struct task_struct *task, unsigned int idx) +{ + struct fdtable *fdt; + struct file *file; + + spin_lock(&task->files->file_lock); + fdt = files_fdtable(task->files); + if (idx < fdt->max_fds) + file = fdt->fd[idx]; + else + file = NULL; + spin_unlock(&task->files->file_lock); + + return file; +} + +SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type, + unsigned long, idx1, unsigned long, idx2) +{ + struct task_struct *task1; + struct task_struct *task2; + int ret = 0; + + rcu_read_lock(); + + task1 = find_task_by_vpid(pid1); + if (!task1) { + rcu_read_unlock(); + return -ESRCH; + } + + task2 = find_task_by_vpid(pid2); + if (!task2) { + put_task_struct(task1); + rcu_read_unlock(); + return -ESRCH; + } + + get_task_struct(task1); + get_task_struct(task2); + + rcu_read_unlock(); + + if (!ptrace_may_access(task1, PTRACE_MODE_READ) || + !ptrace_may_access(task2, PTRACE_MODE_READ)) { + ret = -EACCES; + goto err; + } + + /* + * Note for all cases but the KCMP_FILE we + * don't take any locks in a sake of speed. + */ + + switch (type) { + case KCMP_FILE: { + struct file *filp1, *filp2; + + filp1 = get_file_raw_ptr(task1, idx1); + filp2 = get_file_raw_ptr(task2, idx2); + + if (filp1 && filp2) + ret = KCMP_PTR(filp1, filp2, KCMP_FILE); + else + ret = -ENOENT; + break; + } + case KCMP_VM: + ret = KCMP_TASK_PTR(task1, task2, mm, KCMP_VM); + break; + case KCMP_FILES: + ret = KCMP_TASK_PTR(task1, task2, files, KCMP_FILES); + break; + case KCMP_FS: + ret = KCMP_TASK_PTR(task1, task2, fs, KCMP_FS); + break; + case KCMP_SIGHAND: + ret = KCMP_TASK_PTR(task1, task2, sighand, KCMP_SIGHAND); + break; + case KCMP_IO: + ret = KCMP_TASK_PTR(task1, task2, io_context, KCMP_IO); + break; + case KCMP_SYSVSEM: +#ifdef CONFIG_SYSVIPC + ret = KCMP_TASK_PTR(task1, task2, sysvsem.undo_list, KCMP_SYSVSEM); +#else + ret = -ENOENT; + goto err; +#endif + break; + default: + ret = -EINVAL; + goto err; + } + +err: + put_task_struct(task1); + put_task_struct(task2); + + return ret; +} + +static __init int kcmp_cookie_init(void) +{ + int i, j; + + for (i = 0; i < KCMP_TYPES; i++) { + for (j = 0; j < 2; j++) { + get_random_bytes(&cookies[i][j], + sizeof(cookies[i][j])); + cookies[i][j] |= (~(~0UL >> 1) | 1); + } + } + + return 0; +} +late_initcall(kcmp_cookie_init); Index: linux-2.6.git/arch/x86/syscalls/syscall_32.tbl =================================================================== --- linux-2.6.git.orig/arch/x86/syscalls/syscall_32.tbl +++ linux-2.6.git/arch/x86/syscalls/syscall_32.tbl @@ -355,3 +355,4 @@ 346 i386 setns sys_setns 347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv 348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev +349 i386 kcmp sys_kcmp Index: linux-2.6.git/arch/x86/syscalls/syscall_64.tbl =================================================================== --- linux-2.6.git.orig/arch/x86/syscalls/syscall_64.tbl +++ linux-2.6.git/arch/x86/syscalls/syscall_64.tbl @@ -318,3 +318,4 @@ 309 64 getcpu sys_getcpu 310 64 process_vm_readv sys_process_vm_readv 311 64 process_vm_writev sys_process_vm_writev +312 64 kcmp sys_kcmp -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/