lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 6 Jun 2007 13:37:28 +0200 (CEST)
From:	Mark Pflueger <pflueger@...ormatik.tu-muenchen.de>
To:	linux-kernel@...r.kernel.org
Subject: checkpointing and restoring processes

hi everyone!

i'm not subscribed to the list, so if you care to flame because of my noob 
question, just do it to the list, otherwise please cc me.

i'm trying to write a checkpoint/restore module for processes and so have 
a basic version going already - problem is, when i restore the process, 
one of three things happens at random. first is, the process restored 
segfaults. second is, i get a kernel null pointer dereference and third 
is, i get a virtual address lookup error and a kernel crash. the trace 
back and the address always change.

the user space process is as simple as i could make it: (error checking 
and debugging messages are left out)


void take_chkpt(void) {
        pid_t pid;
        char call_pid[10];
        char call_num[10];

        chkptpid = getpid();
        snprintf(call_pid, 9, "%d", chkptpid);
        snprintf(call_num, 9, "%d", checkpointnum);

	switch(pid = fork()) {
	case -1:
                fprintf(stderr, "Fork failed.\n");
                return;
                break;
	case  0:   /* child process */
                if(!execl("child_take", call_pid, call_num, (char *)0))
                        perror("execl: ");
                break;
	default:   /* parent process */
                waitpid(pid, NULL, 0);
                break;
	}

        return;
}


void restore_chkpts(void) {
        pid_t pid;
        char call_pid[10];
        char call_num[10];

	ENTERFUN();

        if(restore_retry) // do nothing on second call to restore
                return;

        chkptpid = getpid();
        snprintf(call_pid, 9, "%d", chkptpid);
        snprintf(call_num, 9, "%d", checkpointnum);

	switch(pid = fork()) {
	case -1:
                fprintf(stderr, "MP: Fork failed.\n");
                return;
                break;
	case  0:   /* child process */
                if(!execl("child_restore", call_pid, call_num, (char *)0))
                        perror("execl: ");
                break;
	default:   /* parent process */
                INF(("Parent Process"));
                restore_retry=1;
                INF(("Wait for Child..."));
                waitpid(pid, NULL, 0);
                break;
	}

	LEAVEFUN();

        return;
}

int main(int argc, char* argv[]) {
	take_chkpt();
	printf("Hello cruel world!\n");
	restore_chkpts();
	return 0;
}

where child_take and child_restore do the following:


void child_take_chkpt(int chkptpid, int checkpointnum) {
        struct chkpt_ioctl chkptio;
        int dev_fd; // ioctl device file
        char chkptname[30];

        if ((dev_fd = open(CHKPT_DEVICE, O_RDWR)) < 0) {
                perror("MP: Open device file");
                exit(EXIT_FAILURE);
        }
        chkptio.pid = chkptpid;
        snprintf(chkptname, 29, "/tmp/chkpt_%d_%d", chkptio.pid, checkpointnum);
        chkptio.file = creat(chkptname, 00755);
        sleep(1); // to go sure the parent process is in waitpid -- ugly, 
but works
        kill(chkptio.pid, SIGSTOP);
        sleep(1);
        ioctl(dev_fd, CHKPT_IOCTL_SAVE, (unsigned long)&chkptio);
        close(dev_fd);
        close(chkptio.file);
        kill(chkptio.pid, SIGCONT);
        exit(0);
}

void child_restore_chkpts(int chkptpid, int checkpointnum) {
        struct chkpt_ioctl chkptio;
        int dev_fd; // ioctl device file
        char chkptname[30];

        snprintf(chkptname, 29, "/tmp/chkpt_%d_%d", chkptpid, checkpointnum-1);
        chkptio.file = open(chkptname, O_RDONLY);
        chkptio.pid = chkptpid;
        dev_fd = open(CHKPT_DEVICE, O_RDWR);
        sleep(1);
        kill(chkptpid, SIGSTOP);
        sleep(1);
        ioctl(dev_fd, CHKPT_IOCTL_RESTORE, (unsigned long)&chkptio);
        close(chkptio.file);
        close(dev_fd);
        kill(chkptpid, SIGCONT);
        exit(0);
}

the header for the files is this:


enum {
        CHKPT_IOCTL_SAVE,
        CHKPT_IOCTL_RESTORE
};

struct chkpt_ioctl {
        pid_t pid; // for fork tests
        int file;
};

struct chkpt {
        pid_t pid; // for fork tests
        struct pt_regs regs;
        unsigned int datasize;
        unsigned int brksize;
        unsigned int stacksize;
};


and finally the kernel module:

int chkpt_ioctl_handler(struct inode *i, struct file *f,
                     unsigned int cmd, unsigned long arg) {
        struct chkpt_ioctl pmio, *u_pmio;
        int ret = -1;

        u_pmio = (struct chkpt_ioctl *)arg;

        switch(cmd) {
        case CHKPT_IOCTL_SAVE:
                if (copy_from_user(&pmio, u_pmio, sizeof(struct 
chkpt_ioctl))) {
                        printk("...failed to copy from user\n");
                        ret = -1;
                        break;
                }
                if(chkpt_save(&pmio) < 0) {
                        printk("...failed to save chkpt\n");
                        ret = -1;
                        break;
                }
                ret = 0;
                break;
        case CHKPT_IOCTL_RESTORE:
                INFO(("CHKPT_IOCTL_RESTORE"));
                if (copy_from_user(&pmio, u_pmio, sizeof(struct 
chkpt_ioctl))) {
                        printk("...failed to copy from user\n");
                        ret = -1;
                        break;
                }
                if (chkpt_restore(&pmio) < 0) {
                        printk("...failed to restore chkpt\n");
                        ret = -1;
                        break;
                }        
                ret = 0;
                break;
        default:
                printk("...illegal ioctl cmd\n");
                ret = -1;
                break;
        }
        return ret;
}

static int chkpt_save(struct chkpt_ioctl *chkptio) {
        struct task_struct *tsk;
        struct chkpt chkpt;
        unsigned int datasz, brksz, stacksz;
        struct file *f;

        if (!(tsk = find_task_by_pid(chkptio->pid))) {
                printk("...task %d not found\n", chkptio->pid);
                return -1;
        }

        f = current->files->fd[chkptio->file];

        datasz = tsk->mm->end_data - tsk->mm->start_data; // data
        brksz = tsk->mm->brk - tsk->mm->start_brk; // brk
        stacksz = tsk->thread.esp0 - tsk->thread.esp; // stack

        /* saving most important information belonging to tsk */
        /* NO FILES, SOCKETS, PIPES, SHARED MEMORY AND SEMAPHORES */
        chkpt.pid = chkptio->pid;
        /* REGISTERS */
        memcpy(&chkpt.regs, REGS, sizeof(struct pt_regs));
        if (in_syscall(tsk))
                intr_syscall(&chkpt.regs);
        chkpt.datasize = datasz;
        chkpt.brksize = brksz;
        chkpt.stacksize = stacksz;
        pack_write(f, (void *)&chkpt, sizeof(struct chkpt), 0);
        /* TASK */
        pack_write(f, (void*)tsk, THREAD_SIZE, 0);
        /* MEMORY */
        pack_write(f, (void *)tsk->mm->start_data, datasz, 0);
        pack_write(f, (void *)tsk->mm->start_brk, brksz, 0);
        pack_write(f, (void *)tsk->thread.esp, stacksz, 0);

        pack_write(f, NULL, 0, 1); /* last packet */

        return 0;
}

static int pack_write (struct file *f, char *buf, int len,
		       int last_pkt) {
    static char *pack = NULL;
    static long pos = 0;
    int ret, to_copy, wrtn = 0;
    
    if (pack==NULL)
    {
	pack=(char*)kmalloc(PACKET_SIZE, GFP_KERNEL);
	if (!pack)
	{
	    printk("pack_write: no mem!\n");
	    return -1;
	}
    }
    
    while (len>0)
    {
	to_copy = (len>(PACKET_SIZE-pos))?(PACKET_SIZE-pos):(len);
	
        memcpy(&(pack[pos]), buf+wrtn, to_copy);
	
	pos += to_copy;
	len -= to_copy;
	wrtn +=to_copy; 
	
	if ( (pos==PACKET_SIZE) || (last_pkt) )
	{
	    mm_segment_t fs = get_fs();
	    
	    set_fs(KERNEL_DS);
	    ret = f->f_op->write(f, pack, pos, &(f->f_pos));	
	    set_fs(fs);
	    if (ret!=pos)
		return ret;
	    
	    pos = 0;
	    if (last_pkt)
	    {
		kfree(pack);
		pack = NULL;
	    }
	}
    }
    
    if ( (last_pkt) && (pack!=NULL) )
    {
	if (pos!=0)
	{
	    mm_segment_t fs = get_fs();
	    
	    set_fs(KERNEL_DS);
	    wrtn = f->f_op->write(f, pack, pos, &f->f_pos);
	    set_fs(fs);
	}
	kfree(pack);
	pack = NULL;
	pos = 0;
    }
    
    return wrtn;
}

static int in_syscall(struct task_struct *tsk) {
        unsigned char ins, opc;
        long ret;
        unsigned long flags;
        struct pt_regs *regs;

        spin_lock_irqsave(&runqueue_lock, flags);

        regs = (((struct pt_regs *)(THREAD_SIZE + (unsigned long)tsk)) - 
1);

        get_user(ins, (unsigned char *)(regs->eip) - 2);
        get_user(opc, (unsigned char *)(regs->eip) - 1);

        if ((ins == 0xCD) && (opc == 0x80)) {
                ret = regs->orig_eax;
        } else {
                ret = 0;
        }
        
        if (ret && ((regs->orig_eax < 0) || (regs->orig_eax > 
NR_syscalls))) {
                INFO(("syscall number out of bounds %ld\n", 
regs->orig_eax));
                ret = 0;
        }
        
        spin_unlock_irqrestore(&runqueue_lock, flags);

        return ret;
}

static int intr_syscall(struct pt_regs *regs) {
        
        /* handle in_syscall depending on syscall number */
        switch(regs->orig_eax) {
        case 4: /* write */
                /* report interuption */
                regs->eax = -EINTR;
                break;
        default:
                /* restart */
                regs->eax = regs->orig_eax;
                regs->eip -= 2;
                break;
        }

        return regs->orig_eax;
}

static int chkpt_restore(struct chkpt_ioctl *chkptio) {
        struct task_struct *tsk, *saved_tsk;
        struct chkpt chkpt;
        struct file *f;

        if ((tsk = find_task_by_pid(chkptio->pid)) == NULL) {
                printk("...failed task not found %u\n", chkptio->pid);
                return -1;
        }


        if ((saved_tsk = kmalloc(sizeof(struct task_struct), GFP_KERNEL)) 
== NULL) {
                printk("kmalloc failed\n");
                return -1;
        }

        f = current->files->fd[chkptio->file];
        do_read(f, &f->f_pos, (void *)&chkpt, sizeof(struct chkpt));
        do_read(f, &f->f_pos, (void *)saved_tsk, THREAD_SIZE);

        /* TASK */
        if (pm_overwrite(tsk, saved_tsk) == NULL) {
                printk("...failed task overwrite\n");
                return -1;
        }
        /* REGISTERS */
        memcpy(REGS, &chkpt.regs, sizeof(struct pt_regs));
        /* MEMORY */
        do_read(f, &f->f_pos, (void *)tsk->mm->start_data, 
chkpt.datasize);
        do_read(f, &f->f_pos, (void *)chkpt.regs.esp, chkpt.stacksize);
        do_read(f, &f->f_pos, (void *)tsk->mm->start_brk, chkpt.brksize);
        kfree(saved_tsk);

        return 0;
}


static int inline do_read(struct file *file, loff_t *offset,
			  char * addr, unsigned long count) {
    mm_segment_t old_fs;
    int ret;
    
    if (!file->f_op->read)
	return -ENOSYS;
    old_fs = get_fs();
    set_fs(get_ds());
    ret = file->f_op->read(file, addr, count, offset);
    set_fs(old_fs);
    return ret;
}

/* overwrite tsk with the one in pmio, making a backup first,
   then restoring all values dependant on the current local
   OS state (except register values) */
static struct task_struct *pm_overwrite(struct task_struct *tsk,
                                        struct task_struct *saved_tsk) {
	struct task_struct *backup;

        if ((backup = kmalloc(sizeof(*backup), GFP_KERNEL)) == NULL) {
                printk("kmalloc failed\n");
                return NULL;
        }

        memcpy(backup, tsk, sizeof(struct task_struct));

        write_lock_irq(&tasklist_lock);
        
        memcpy(tsk, saved_tsk, THREAD_SIZE);

        tsk->next_task = backup->next_task;
        tsk->prev_task = backup->prev_task;

        tsk->exec_domain = backup->exec_domain;
        tsk->binfmt      = backup->binfmt;

        tsk->pid          = backup->pid;
        tsk->pgrp         = backup->pgrp;
        tsk->tty_old_pgrp = backup->tty_old_pgrp;
        tsk->session      = backup->session;
        tsk->tgid         = backup->tgid;
        tsk->leader       = backup->leader;

        tsk->p_opptr = backup->p_opptr;
        tsk->p_pptr  = backup->p_pptr;
        tsk->p_cptr  = backup->p_cptr;
        tsk->p_ysptr = backup->p_ysptr;
        tsk->p_osptr = backup->p_osptr;

        tsk->thread_group = backup->thread_group;

        tsk->pidhash_next  = backup->pidhash_next;
        tsk->pidhash_pprev = backup->pidhash_pprev;

        tsk->real_timer    = backup->real_timer;

        tsk->uid   = backup->uid;
        tsk->euid  = backup->euid;
        tsk->suid  = backup->suid;
        tsk->fsuid = backup->fsuid;
        tsk->gid   = backup->gid;
        tsk->egid  = backup->egid;
        tsk->sgid  = backup->sgid;
        tsk->fsgid = backup->fsgid;

        tsk->ngroups = backup->ngroups;
        memcpy(tsk->groups, backup->groups, sizeof(tsk->groups));

        tsk->cap_effective     = backup->cap_effective;
        tsk->cap_inheritable   = backup->cap_inheritable;
        tsk->cap_permitted     = backup->cap_permitted;
        tsk->keep_capabilities = backup->keep_capabilities;

        tsk->user = backup->user;

        tsk->tty = backup->tty;

        tsk->semundo     = backup->semundo;
        tsk->semsleeping = backup->semsleeping;

        tsk->fs = backup->fs;
        
        tsk->files = backup->files;

        tsk->sigmask_lock = backup->sigmask_lock;
        tsk->sig          = backup->sig;
        tsk->pending      = backup->pending;

        tsk->sas_ss_sp     = backup->sas_ss_sp;
        tsk->sas_ss_size   = backup->sas_ss_size;
        tsk->notifier      = backup->notifier;
        tsk->notifier_data = backup->notifier_data;
        tsk->notifier_mask = backup->notifier_mask;

        tsk->parent_exec_id = backup->parent_exec_id;
        tsk->self_exec_id   = backup->self_exec_id;

        tsk->alloc_lock = backup->alloc_lock;

        tsk->mm        = backup->mm;
        tsk->active_mm = backup->active_mm;

        write_unlock_irq(&tasklist_lock);
        kfree(backup);
        return tsk;
}

/* standard module stuff */

int __init chkpt_init(void)
{
	if (register_chrdev(CHKPT_DEV_MAJOR, "chkpt", &file_ops)) {
		printk("...failed register_chrdev\n");
		return -1;
	}
	return 0;
}

void __exit chkpt_exit(void)
{
	unregister_chrdev(CHKPT_DEV_MAJOR, "chkpt");
	return;
}

EXPORT_SYMBOL(chkpt_init);
EXPORT_SYMBOL(chkpt_exit);
EXPORT_SYMBOL(chkpt_ioctl_handler);

if anyone has any ideas, please let me know. thanx in advance.

greetings

marks
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ