lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20080821100736.GI581@hawkmoon.kerlabs.com>
Date:	Thu, 21 Aug 2008 12:07:36 +0200
From:	Louis Rilling <Louis.Rilling@...labs.com>
To:	Oren Laadan <orenl@...columbia.edu>
Cc:	dave@...ux.vnet.ibm.com, arnd@...db.de, jeremy@...p.org,
	linux-kernel@...r.kernel.org, containers@...ts.linux-foundation.org
Subject: Re: [RFC v2][PATCH 5/9] Memory managemnet - restore state

On Wed, Aug 20, 2008 at 11:05:39PM -0400, Oren Laadan wrote:
>
> Restoring the memory address space begins with nuking the existing one
> of the current process, and then reading the VMA state and contents.
> Call do_mmap_pgoffset() for each VMA and then read in the data.

[...]

> diff --git a/checkpoint/rstr_mem.c b/checkpoint/rstr_mem.c
> new file mode 100644
> index 0000000..df602a9
> --- /dev/null
> +++ b/checkpoint/rstr_mem.c

[...]

> +static int cr_read_vma(struct cr_ctx *ctx, struct mm_struct *mm)
> +{
> +	struct cr_hdr_vma *hh = cr_hbuf_get(ctx, sizeof(*hh));
> +	unsigned long vm_size, vm_flags, vm_prot, vm_pgoff;
> +	unsigned long addr;
> +	unsigned long flags;
> +	struct file *file = NULL;
> +	char *fname = NULL;
> +	int ret;
> +
> +	ret = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_VMA);
> +	if (ret < 0)
> +		return ret;
> +	else if (ret != 0)
> +		return -EINVAL;
> +
> +	cr_debug("vma %#lx-%#lx npages %d\n", (unsigned long) hh->vm_start,
> +		 (unsigned long) hh->vm_end, (int) hh->npages);
> +
> +	if (hh->vm_end < hh->vm_start || hh->npages < 0)
> +		return -EINVAL;
> +
> +	vm_size = hh->vm_end - hh->vm_start;
> +	vm_prot = cr_calc_map_prot_bits(hh->vm_flags);
> +	vm_flags = cr_calc_map_flags_bits(hh->vm_flags);
> +	vm_pgoff = hh->vm_pgoff;
> +
> +	if (hh->fname) {
> +		fname = ctx->tbuf;
> +		ret = cr_read_str(ctx, fname, PAGE_SIZE);
> +		if (ret < 0)
> +			return ret;
> +	}
> +
> +	cr_debug("vma fname '%s' how %d\n", fname, hh->how);
> +
> +	switch (hh->how) {
> +
> +	case CR_VMA_ANON:		/* anonymous private mapping */
> +		if (hh->fname)
> +			return -EINVAL;
> +		/* vm_pgoff for anonymous mapping is the "global" page
> +		   offset (namely from addr 0x0), so we force a zero */
> +		vm_pgoff = 0;
> +		break;
> +
> +	case CR_VMA_FILE:		/* private mapping from a file */
> +		if (!hh->fname)
> +			return -EINVAL;
> +		/* O_RDWR only needed if both (VM_WRITE|VM_SHARED) are set */
> +		flags = hh->vm_flags & (VM_WRITE | VM_SHARED);
> +		flags = (flags == (VM_WRITE | VM_SHARED) ? O_RDWR : O_RDONLY);
> +		file = filp_open(fname, flags, 0);
> +		if (IS_ERR(file))
> +			return PTR_ERR(file);
> +		break;
> +
> +	default:
> +		return -EINVAL;
> +
> +	}
> +
> +	addr = do_mmap_pgoff(file, (unsigned long) hh->vm_start,
> +			     vm_size, vm_prot, vm_flags, vm_pgoff);
> +	cr_debug("size %#lx prot %#lx flag %#lx pgoff %#lx => %#lx\n",
> +		 vm_size, vm_prot, vm_flags, vm_pgoff, addr);
> +
> +	/* the file (if opened) is now referenced by the vma */
> +	if (file)
> +		filp_close(file, NULL);
> +
> +	if (IS_ERR((void*) addr))
> +		return (PTR_ERR((void *) addr));
> +
> +	/*
> +	 * CR_VMA_ANON: read in memory as is
> +	 * CR_VMA_FILE: read in memory as is
> +	 * (more to follow ...)
> +	 */
> +
> +	switch (hh->how) {
> +	case CR_VMA_ANON:
> +	case CR_VMA_FILE:
> +		/* standard case: read the data into the memory */
> +		ret = cr_vma_read_pages(ctx, hh);
> +		break;
> +	}
> +
> +	if (ret < 0)
> +		return ret;
> +
> +	if (vm_prot & PROT_EXEC)
> +		flush_icache_range(hh->vm_start, hh->vm_end);
> +
> +	cr_hbuf_put(ctx, sizeof(*hh));
> +	cr_debug("vma retval %d\n", ret);
> +	return 0;
> +}
> +
> +static int cr_destroy_mm(struct mm_struct *mm)
> +{
> +	struct vm_area_struct *vmnext = mm->mmap;
> +	struct vm_area_struct *vma;
> +	int ret;
> +
> +	while (vmnext) {
> +		vma = vmnext;
> +		vmnext = vmnext->vm_next;
> +		ret = do_munmap(mm, vma->vm_start, vma->vm_end-vma->vm_start);
> +		if (ret < 0)
> +			return ret;
> +	}
> +	return 0;
> +}
> +
> +int cr_read_mm(struct cr_ctx *ctx)
> +{
> +	struct cr_hdr_mm *hh = cr_hbuf_get(ctx, sizeof(*hh));
> +	struct mm_struct *mm;
> +	int nr, ret;
> +
> +	ret = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_MM);
> +	if (ret < 0)
> +		return ret;
> +#if 0	/* activate when containers are used */
> +	if (ret != task_pid_vnr(current))
> +		return -EINVAL;
> +#endif
> +	cr_debug("map_count %d\n", hh->map_count);
> +
> +	/* XXX need more sanity checks */
> +	if (hh->start_code > hh->end_code ||
> +	    hh->start_data > hh->end_data || hh->map_count < 0)
> +		return -EINVAL;
> +
> +	mm = current->mm;
> +
> +	/* point of no return -- destruct current mm */
> +	down_write(&mm->mmap_sem);
> +	ret = cr_destroy_mm(mm);
> +	up_write(&mm->mmap_sem);
> +
> +	if (ret < 0)
> +		return ret;
> +

Should down_write(&mm->mmap_sem) again here, and hold it until all vmas are
restored. This means removing down_write() from cr_vma_writable(). Or perhaps
make it finer grain: release it before looping on the vmas and make
cr_read_vma() take it again before calling do_mmap_pgoff().

> +	mm->start_code = hh->start_code;
> +	mm->end_code = hh->end_code;
> +	mm->start_data = hh->start_data;
> +	mm->end_data = hh->end_data;
> +	mm->start_brk = hh->start_brk;
> +	mm->brk = hh->brk;
> +	mm->start_stack = hh->start_stack;
> +	mm->arg_start = hh->arg_start;
> +	mm->arg_end = hh->arg_end;
> +	mm->env_start = hh->env_start;
> +	mm->env_end = hh->env_end;
> +
> +	/* FIX: need also mm->flags */
> +
> +	for (nr = hh->map_count; nr; nr--) {
> +		ret = cr_read_vma(ctx, mm);
> +		if (ret < 0)
> +			return ret;
> +	}
> +
> +	ret = cr_read_mm_context(ctx, mm, hh->tag);
> +
> +	cr_hbuf_put(ctx, sizeof(*hh));
> +	return ret;
> +}

Thanks,

Louis

-- 
Dr Louis Rilling			Kerlabs
Skype: louis.rilling			Batiment Germanium
Phone: (+33|0) 6 80 89 08 23		80 avenue des Buttes de Coesmes
http://www.kerlabs.com/			35700 Rennes

Download attachment "signature.asc" of type "application/pgp-signature" (190 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ