linux-kernel - [PATCH 10/10] BC: user memory accounting (hooks)

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <45252C74.4080809@openvz.org>
Date:	Thu, 05 Oct 2006 20:01:56 +0400
From:	Kirill Korotaev <dev@...nvz.org>
To:	Andrew Morton <akpm@...l.org>
CC:	Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
	Alan Cox <alan@...rguk.ukuu.org.uk>,
	Christoph Hellwig <hch@...radead.org>, xemul@...nvz.org,
	Andrey Savochkin <saw@...ru>, devel@...nvz.org,
	Rik van Riel <riel@...hat.com>, Andi Kleen <ak@...e.de>,
	Oleg Nesterov <oleg@...sign.ru>,
	Matt Helsley <matthltc@...ibm.com>,
	CKRM-Tech <ckrm-tech@...ts.sourceforge.net>,
	Hugh Dickins <hugh@...itas.com>, Srivatsa <vatsa@...ibm.com>,
	Balbir Singh <balbir@...ibm.com>, haveblue@...ibm.com
Subject: [PATCH 10/10] BC: user memory accounting (hooks)

Install BC hooks in appropriate places for user memory accounting.

Signed-off-by: Pavel Emelianov <xemul@...nvz.org>
Signed-off-by: Kirill Korotaev <dev@...nvz.org>

---

 fs/binfmt_elf.c            |    5 +--
 fs/exec.c                  |   14 +++++++-
 include/asm-alpha/mman.h   |    1 
 include/asm-generic/mman.h |    1 
 include/asm-mips/mman.h    |    1 
 include/asm-parisc/mman.h  |    1 
 include/asm-xtensa/mman.h  |    1 
 kernel/fork.c              |   11 ++++++
 mm/fremap.c                |   10 ++++++
 mm/memory.c                |   40 +++++++++++++++++++++++-
 mm/migrate.c               |   10 ++++++
 mm/mlock.c                 |   16 +++++++++
 mm/mmap.c                  |   74 ++++++++++++++++++++++++++++++++++++++-------
 mm/mprotect.c              |   18 ++++++++++
 mm/mremap.c                |   22 ++++++++++---
 mm/rmap.c                  |    3 +
 mm/shmem.c                 |   15 +++++++++
 17 files changed, 219 insertions(+), 24 deletions(-)

--- ./fs/binfmt_elf.c.bc_userpages	2006-10-05 11:42:42.000000000 +0400
+++ ./fs/binfmt_elf.c	2006-10-05 12:44:20.000000000 +0400
@@ -360,7 +360,7 @@ static unsigned long load_elf_interp(str
 	eppnt = elf_phdata;
 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 		if (eppnt->p_type == PT_LOAD) {
-			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
+			int elf_type = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECPRIO;
 			int elf_prot = 0;
 			unsigned long vaddr = 0;
 			unsigned long k, map_addr;
@@ -846,7 +846,8 @@ static int load_elf_binary(struct linux_
 		if (elf_ppnt->p_flags & PF_X)
 			elf_prot |= PROT_EXEC;
 
-		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
+		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE |
+				MAP_EXECPRIO;
 
 		vaddr = elf_ppnt->p_vaddr;
 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
--- ./fs/exec.c.bc_userpages	2006-10-05 11:42:42.000000000 +0400
+++ ./fs/exec.c	2006-10-05 12:44:20.000000000 +0400
@@ -50,6 +50,8 @@
 #include <linux/cn_proc.h>
 #include <linux/audit.h>
 
+#include <bc/rsspages.h>
+
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
 
@@ -308,27 +310,35 @@ void install_arg_page(struct vm_area_str
 	struct mm_struct *mm = vma->vm_mm;
 	pte_t * pte;
 	spinlock_t *ptl;
+	struct page_beancounter *pc;
 
 	if (unlikely(anon_vma_prepare(vma)))
 		goto out;
 
+	if (bc_rsspage_prepare(page, vma, &pc))
+		goto out;
+
 	flush_dcache_page(page);
 	pte = get_locked_pte(mm, address, &ptl);
 	if (!pte)
-		goto out;
+		goto out_unch;
 	if (!pte_none(*pte)) {
 		pte_unmap_unlock(pte, ptl);
-		goto out;
+		goto out_unch;
 	}
 	inc_mm_counter(mm, anon_rss);
 	lru_cache_add_active(page);
 	set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte(
 					page, vma->vm_page_prot))));
 	page_add_new_anon_rmap(page, vma, address);
+	bc_rsspage_charge(pc);
 	pte_unmap_unlock(pte, ptl);
 
 	/* no need for flush_tlb */
 	return;
+
+out_unch:
+	bc_rsspage_release(pc);
 out:
 	__free_page(page);
 	force_sig(SIGKILL, current);
--- ./include/asm-alpha/mman.h.bc_userpages	2006-09-20 14:46:38.000000000 +0400
+++ ./include/asm-alpha/mman.h	2006-10-05 12:44:20.000000000 +0400
@@ -14,6 +14,7 @@
 #define MAP_TYPE	0x0f		/* Mask for type of mapping (OSF/1 is _wrong_) */
 #define MAP_FIXED	0x100		/* Interpret addr exactly */
 #define MAP_ANONYMOUS	0x10		/* don't use a file */
+#define MAP_EXECPRIO	0x20		/* charge with BC_LIMIT severity */
 
 /* not used by linux, but here to make sure we don't clash with OSF/1 defines */
 #define _MAP_HASSEMAPHORE 0x0200
--- ./include/asm-generic/mman.h.bc_userpages	2006-09-20 14:46:38.000000000 +0400
+++ ./include/asm-generic/mman.h	2006-10-05 12:44:20.000000000 +0400
@@ -19,6 +19,7 @@
 #define MAP_TYPE	0x0f		/* Mask for type of mapping */
 #define MAP_FIXED	0x10		/* Interpret addr exactly */
 #define MAP_ANONYMOUS	0x20		/* don't use a file */
+#define MAP_EXECPRIO	0x2000		/* charge with BC_LIMIT severity */
 
 #define MS_ASYNC	1		/* sync memory asynchronously */
 #define MS_INVALIDATE	2		/* invalidate the caches */
--- ./include/asm-mips/mman.h.bc_userpages	2006-09-20 14:46:39.000000000 +0400
+++ ./include/asm-mips/mman.h	2006-10-05 12:44:20.000000000 +0400
@@ -46,6 +46,7 @@
 #define MAP_LOCKED	0x8000		/* pages are locked */
 #define MAP_POPULATE	0x10000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x20000		/* do not block on IO */
+#define MAP_EXECPRIO	0x40000		/* charge with BC_LIMIT severity */
 
 /*
  * Flags for msync
--- ./include/asm-parisc/mman.h.bc_userpages	2006-09-20 14:46:39.000000000 +0400
+++ ./include/asm-parisc/mman.h	2006-10-05 12:44:20.000000000 +0400
@@ -22,6 +22,7 @@
 #define MAP_GROWSDOWN	0x8000		/* stack-like segment */
 #define MAP_POPULATE	0x10000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x20000		/* do not block on IO */
+#define MAP_EXECPRIO	0x40000		/* charge with BC_LIMIT severity */
 
 #define MS_SYNC		1		/* synchronous memory sync */
 #define MS_ASYNC	2		/* sync memory asynchronously */
--- ./include/asm-xtensa/mman.h.bc_userpages	2006-09-20 14:46:40.000000000 +0400
+++ ./include/asm-xtensa/mman.h	2006-10-05 12:44:20.000000000 +0400
@@ -53,6 +53,7 @@
 #define MAP_LOCKED	0x8000		/* pages are locked */
 #define MAP_POPULATE	0x10000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x20000		/* do not block on IO */
+#define MAP_EXECPRIO	0x40000		/* charge with BC_LIMIT severity */
 
 /*
  * Flags for msync
--- ./kernel/fork.c.bc_userpages	2006-10-05 12:14:35.000000000 +0400
+++ ./kernel/fork.c	2006-10-05 12:44:20.000000000 +0400
@@ -50,6 +50,7 @@
 #include <linux/random.h>
 
 #include <bc/task.h>
+#include <bc/vmpages.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -252,6 +253,9 @@ static inline int dup_mmap(struct mm_str
 		tmp->vm_flags &= ~VM_LOCKED;
 		tmp->vm_mm = mm;
 		tmp->vm_next = NULL;
+		vma_set_bc(tmp);
+		if (bc_vma_charge(tmp))
+			goto fail_charge;
 		anon_vma_link(tmp);
 		file = tmp->vm_file;
 		if (file) {
@@ -294,6 +298,10 @@ out:
 	flush_tlb_mm(oldmm);
 	up_write(&oldmm->mmap_sem);
 	return retval;
+
+fail_charge:
+	mpol_free(pol);
+	vma_release_bc(tmp);
 fail_nomem_policy:
 	kmem_cache_free(vm_area_cachep, tmp);
 fail_nomem:
@@ -344,6 +352,7 @@ static struct mm_struct * mm_init(struct
 	mm->cached_hole_size = ~0UL;
 
 	if (likely(!mm_alloc_pgd(mm))) {
+		mm_init_beancounter(mm);
 		mm->def_flags = 0;
 		return mm;
 	}
@@ -374,6 +383,7 @@ struct mm_struct * mm_alloc(void)
 void fastcall __mmdrop(struct mm_struct *mm)
 {
 	BUG_ON(mm == &init_mm);
+	mm_free_beancounter(mm);
 	mm_free_pgd(mm);
 	destroy_context(mm);
 	free_mm(mm);
@@ -508,6 +518,7 @@ fail_nocontext:
 	 * If init_new_context() failed, we cannot use mmput() to free the mm
 	 * because it calls destroy_context()
 	 */
+	mm_free_beancounter(mm);
 	mm_free_pgd(mm);
 	free_mm(mm);
 	return NULL;
--- ./mm/fremap.c.bc_userpages	2006-10-05 11:42:43.000000000 +0400
+++ ./mm/fremap.c	2006-10-05 12:44:20.000000000 +0400
@@ -16,6 +16,8 @@
 #include <linux/module.h>
 #include <linux/syscalls.h>
 
+#include <bc/rsspages.h>
+
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
@@ -57,6 +59,10 @@ int install_page(struct mm_struct *mm, s
 	pte_t *pte;
 	pte_t pte_val;
 	spinlock_t *ptl;
+	struct page_beancounter *pc;
+
+	if (bc_rsspage_prepare(page, vma, &pc))
+		goto out_nocharge;
 
 	pte = get_locked_pte(mm, addr, &ptl);
 	if (!pte)
@@ -84,10 +90,14 @@ int install_page(struct mm_struct *mm, s
 	page_add_file_rmap(page);
 	update_mmu_cache(vma, addr, pte_val);
 	lazy_mmu_prot_update(pte_val);
+	bc_rsspage_charge(pc);
 	err = 0;
 unlock:
 	pte_unmap_unlock(pte, ptl);
 out:
+	if (err != 0)
+		bc_rsspage_release(pc);
+out_nocharge:
 	return err;
 }
 EXPORT_SYMBOL(install_page);
--- ./mm/memory.c.bc_userpages	2006-10-05 11:42:43.000000000 +0400
+++ ./mm/memory.c	2006-10-05 12:44:20.000000000 +0400
@@ -57,6 +57,8 @@
 #include <asm/tlbflush.h>
 #include <asm/pgtable.h>
 
+#include <bc/rsspages.h>
+
 #include <linux/swapops.h>
 #include <linux/elf.h>
 
@@ -1483,6 +1485,7 @@ static int do_wp_page(struct mm_struct *
 	pte_t entry;
 	int reuse = 0, ret = VM_FAULT_MINOR;
 	struct page *dirty_page = NULL;
+	struct page_beancounter *pc;
 
 	old_page = vm_normal_page(vma, address, orig_pte);
 	if (!old_page)
@@ -1568,6 +1571,9 @@ gotten:
 		cow_user_page(new_page, old_page, address);
 	}
 
+	if (bc_rsspage_prepare(new_page, vma, &pc))
+		goto oom;
+
 	/*
 	 * Re-check the pte - we dropped the lock
 	 */
@@ -1596,11 +1602,14 @@ gotten:
 		update_mmu_cache(vma, address, entry);
 		lru_cache_add_active(new_page);
 		page_add_new_anon_rmap(new_page, vma, address);
+		bc_rsspage_charge(pc);
 
 		/* Free the old page.. */
 		new_page = old_page;
 		ret |= VM_FAULT_WRITE;
-	}
+	} else
+		bc_rsspage_release(pc);
+
 	if (new_page)
 		page_cache_release(new_page);
 	if (old_page)
@@ -1977,6 +1986,7 @@ static int do_swap_page(struct mm_struct
 	swp_entry_t entry;
 	pte_t pte;
 	int ret = VM_FAULT_MINOR;
+	struct page_beancounter *pc;
 
 	if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
 		goto out;
@@ -2009,6 +2019,11 @@ static int do_swap_page(struct mm_struct
 		count_vm_event(PGMAJFAULT);
 	}
 
+	if (bc_rsspage_prepare(page, vma, &pc)) {
+		ret = VM_FAULT_OOM;
+		goto out;
+	}
+
 	delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
 	mark_page_accessed(page);
 	lock_page(page);
@@ -2022,6 +2037,7 @@ static int do_swap_page(struct mm_struct
 
 	if (unlikely(!PageUptodate(page))) {
 		ret = VM_FAULT_SIGBUS;
+		bc_rsspage_uncharge(page);
 		goto out_nomap;
 	}
 
@@ -2037,6 +2053,7 @@ static int do_swap_page(struct mm_struct
 	flush_icache_page(vma, page);
 	set_pte_at(mm, address, page_table, pte);
 	page_add_anon_rmap(page, vma, address);
+	bc_rsspage_charge(pc);
 
 	swap_free(entry);
 	if (vm_swap_full())
@@ -2058,6 +2075,7 @@ unlock:
 out:
 	return ret;
 out_nomap:
+	bc_rsspage_release(pc);
 	pte_unmap_unlock(page_table, ptl);
 	unlock_page(page);
 	page_cache_release(page);
@@ -2076,6 +2094,7 @@ static int do_anonymous_page(struct mm_s
 	struct page *page;
 	spinlock_t *ptl;
 	pte_t entry;
+	struct page_beancounter *pc;
 
 	if (write_access) {
 		/* Allocate our own private page. */
@@ -2087,15 +2106,22 @@ static int do_anonymous_page(struct mm_s
 		if (!page)
 			goto oom;
 
+		if (bc_rsspage_prepare(page, vma, &pc))
+			goto oom_release;
+
 		entry = mk_pte(page, vma->vm_page_prot);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 
 		page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
-		if (!pte_none(*page_table))
+		if (!pte_none(*page_table)) {
+			bc_rsspage_release(pc);
 			goto release;
+		}
+
 		inc_mm_counter(mm, anon_rss);
 		lru_cache_add_active(page);
 		page_add_new_anon_rmap(page, vma, address);
+		bc_rsspage_charge(pc);
 	} else {
 		/* Map the ZERO_PAGE - vm_page_prot is readonly */
 		page = ZERO_PAGE(address);
@@ -2121,6 +2147,9 @@ unlock:
 release:
 	page_cache_release(page);
 	goto unlock;
+
+oom_release:
+	page_cache_release(page);
 oom:
 	return VM_FAULT_OOM;
 }
@@ -2150,6 +2179,7 @@ static int do_no_page(struct mm_struct *
 	int ret = VM_FAULT_MINOR;
 	int anon = 0;
 	struct page *dirty_page = NULL;
+	struct page_beancounter *pc;
 
 	pte_unmap(page_table);
 	BUG_ON(vma->vm_flags & VM_PFNMAP);
@@ -2205,6 +2235,9 @@ retry:
 		}
 	}
 
+	if (bc_rsspage_prepare(new_page, vma, &pc))
+		goto oom;
+
 	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
 	/*
 	 * For a file-backed vma, someone could have truncated or otherwise
@@ -2213,6 +2246,7 @@ retry:
 	 */
 	if (mapping && unlikely(sequence != mapping->truncate_count)) {
 		pte_unmap_unlock(page_table, ptl);
+		bc_rsspage_release(pc);
 		page_cache_release(new_page);
 		cond_resched();
 		sequence = mapping->truncate_count;
@@ -2249,8 +2283,10 @@ retry:
 				get_page(dirty_page);
 			}
 		}
+		bc_rsspage_charge(pc);
 	} else {
 		/* One of our sibling threads was faster, back out. */
+		bc_rsspage_release(pc);
 		page_cache_release(new_page);
 		goto unlock;
 	}
--- ./mm/migrate.c.bc_userpages	2006-10-05 11:42:43.000000000 +0400
+++ ./mm/migrate.c	2006-10-05 12:44:20.000000000 +0400
@@ -134,6 +134,7 @@ static void remove_migration_pte(struct 
 	pte_t *ptep, pte;
  	spinlock_t *ptl;
 	unsigned long addr = page_address_in_vma(new, vma);
+	struct page_beancounter *pc;
 
 	if (addr == -EFAULT)
 		return;
@@ -157,6 +158,11 @@ static void remove_migration_pte(struct 
  		return;
  	}
 
+	if (bc_rsspage_prepare(new, vma, &pc)) {
+		pte_unmap(ptep);
+		return;
+	}
+
  	ptl = pte_lockptr(mm, pmd);
  	spin_lock(ptl);
 	pte = *ptep;
@@ -178,13 +184,17 @@ static void remove_migration_pte(struct 
 		page_add_anon_rmap(new, vma, addr);
 	else
 		page_add_file_rmap(new);
+	bc_rsspage_charge(pc);
 
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, addr, pte);
 	lazy_mmu_prot_update(pte);
+	pte_unmap_unlock(ptep, ptl);
+	return;
 
 out:
 	pte_unmap_unlock(ptep, ptl);
+	bc_rsspage_release(pc);
 }
 
 /*
--- ./mm/mlock.c.bc_userpages	2006-09-20 14:46:41.000000000 +0400
+++ ./mm/mlock.c	2006-10-05 12:44:20.000000000 +0400
@@ -11,6 +11,7 @@
 #include <linux/mempolicy.h>
 #include <linux/syscalls.h>
 
+#include <bc/vmpages.h>
 
 static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
 	unsigned long start, unsigned long end, unsigned int newflags)
@@ -19,12 +20,21 @@ static int mlock_fixup(struct vm_area_st
 	pgoff_t pgoff;
 	int pages;
 	int ret = 0;
-
+	int bc_recharge;
+	
+	bc_recharge = BC_NOCHARGE;
 	if (newflags == vma->vm_flags) {
 		*prev = vma;
 		goto out;
 	}
 
+	bc_recharge = bc_need_memory_recharge(vma, vma->vm_file, newflags);
+	if (bc_recharge == BC_CHARGE) {
+		ret = __bc_memory_charge(mm, end - start, BC_BARRIER);
+		if (ret < 0)
+			goto out;
+	}
+
 	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
 	*prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
 			  vma->vm_file, pgoff, vma_policy(vma));
@@ -48,6 +58,8 @@ static int mlock_fixup(struct vm_area_st
 	}
 
 success:
+	if (bc_recharge == BC_UNCHARGE)
+		__bc_memory_uncharge(mm, end - start);
 	/*
 	 * vm_flags is protected by the mmap_sem held in write mode.
 	 * It's okay if try_to_unmap_one unmaps a page just after we
@@ -67,6 +79,8 @@ success:
 
 	vma->vm_mm->locked_vm -= pages;
 out:
+	if (ret < 0 && bc_recharge == BC_CHARGE)
+		__bc_memory_uncharge(mm, end - start);
 	if (ret == -ENOMEM)
 		ret = -EAGAIN;
 	return ret;
--- ./mm/mmap.c.bc_userpages	2006-10-05 11:42:43.000000000 +0400
+++ ./mm/mmap.c	2006-10-05 12:44:20.000000000 +0400
@@ -26,6 +26,8 @@
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
 
+#include <bc/vmpages.h>
+
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
 #include <asm/tlb.h>
@@ -37,6 +39,7 @@
 static void unmap_region(struct mm_struct *mm,
 		struct vm_area_struct *vma, struct vm_area_struct *prev,
 		unsigned long start, unsigned long end);
+static unsigned long __do_brk(unsigned long addr, unsigned long len, int prio);
 
 /*
  * WARNING: the debugging will use recursive algorithms so never enable this
@@ -224,6 +227,8 @@ static struct vm_area_struct *remove_vma
 	struct vm_area_struct *next = vma->vm_next;
 
 	might_sleep();
+
+	bc_vma_uncharge(vma);
 	if (vma->vm_ops && vma->vm_ops->close)
 		vma->vm_ops->close(vma);
 	if (vma->vm_file)
@@ -271,7 +276,7 @@ asmlinkage unsigned long sys_brk(unsigne
 		goto out;
 
 	/* Ok, looks good - let it rip. */
-	if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
+	if (__do_brk(oldbrk, newbrk-oldbrk, BC_BARRIER) != oldbrk)
 		goto out;
 set_brk:
 	mm->brk = brk;
@@ -620,6 +625,7 @@ again:			remove_next = 1 + (end > next->
 			fput(file);
 		mm->map_count--;
 		mpol_free(vma_policy(next));
+		vma_release_bc(next);
 		kmem_cache_free(vm_area_cachep, next);
 		/*
 		 * In mprotect's case 6 (see comments on vma_merge),
@@ -761,15 +767,17 @@ struct vm_area_struct *vma_merge(struct 
 	 */
 	if (prev && prev->vm_end == addr &&
   			mpol_equal(vma_policy(prev), policy) &&
+			bc_equal(mm->mm_bc, prev->vma_bc) &&
 			can_vma_merge_after(prev, vm_flags,
-						anon_vma, file, pgoff)) {
+				anon_vma, file, pgoff)) {
 		/*
 		 * OK, it can.  Can we now merge in the successor as well?
 		 */
 		if (next && end == next->vm_start &&
 				mpol_equal(policy, vma_policy(next)) &&
+				bc_equal(mm->mm_bc, next->vma_bc) &&
 				can_vma_merge_before(next, vm_flags,
-					anon_vma, file, pgoff+pglen) &&
+					anon_vma, file, pgoff + pglen) &&
 				is_mergeable_anon_vma(prev->anon_vma,
 						      next->anon_vma)) {
 							/* cases 1, 6 */
@@ -786,8 +794,9 @@ struct vm_area_struct *vma_merge(struct 
 	 */
 	if (next && end == next->vm_start &&
  			mpol_equal(policy, vma_policy(next)) &&
+			bc_equal(mm->mm_bc, next->vma_bc) &&
 			can_vma_merge_before(next, vm_flags,
-					anon_vma, file, pgoff+pglen)) {
+					anon_vma, file, pgoff + pglen)) {
 		if (prev && addr < prev->vm_end)	/* case 4 */
 			vma_adjust(prev, prev->vm_start,
 				addr, prev->vm_pgoff, NULL);
@@ -828,6 +837,7 @@ struct anon_vma *find_mergeable_anon_vma
 
 	if (near->anon_vma && vma->vm_end == near->vm_start &&
  			mpol_equal(vma_policy(vma), vma_policy(near)) &&
+			bc_equal(vma->vma_bc, near->vma_bc) &&
 			can_vma_merge_before(near, vm_flags,
 				NULL, vma->vm_file, vma->vm_pgoff +
 				((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)))
@@ -849,6 +859,7 @@ try_prev:
 
 	if (near->anon_vma && near->vm_end == vma->vm_start &&
   			mpol_equal(vma_policy(near), vma_policy(vma)) &&
+			bc_equal(vma->vma_bc, near->vma_bc) &&
 			can_vma_merge_after(near, vm_flags,
 				NULL, vma->vm_file, vma->vm_pgoff))
 		return near->anon_vma;
@@ -1055,6 +1066,10 @@ munmap_back:
 		}
 	}
 
+	if (bc_memory_charge(mm, len, file, vm_flags,
+				flags & MAP_EXECPRIO ? BC_LIMIT : BC_BARRIER))
+		goto charge_error;
+
 	/*
 	 * Can we just expand an old private anonymous mapping?
 	 * The VM_SHARED test is necessary because shmem_zero_setup
@@ -1083,6 +1098,7 @@ munmap_back:
 	vma->vm_page_prot = protection_map[vm_flags &
 				(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
 	vma->vm_pgoff = pgoff;
+	vma_set_bc(vma);
 
 	if (file) {
 		error = -EINVAL;
@@ -1139,6 +1155,7 @@ munmap_back:
 			fput(file);
 		}
 		mpol_free(vma_policy(vma));
+		vma_release_bc(vma);
 		kmem_cache_free(vm_area_cachep, vma);
 	}
 out:	
@@ -1168,6 +1185,8 @@ unmap_and_free_vma:
 free_vma:
 	kmem_cache_free(vm_area_cachep, vma);
 unacct_error:
+	bc_memory_uncharge(mm, len, file, vm_flags);
+charge_error:
 	if (charged)
 		vm_unacct_memory(charged);
 	return error;
@@ -1497,12 +1516,16 @@ static int acct_stack_growth(struct vm_a
 			return -ENOMEM;
 	}
 
+	if (bc_memory_charge(mm, grow << PAGE_SHIFT,
+				vma->vm_file, vma->vm_flags, BC_LIMIT))
+		goto out_charge;
+
 	/*
 	 * Overcommit..  This must be the final test, as it will
 	 * update security statistics.
 	 */
 	if (security_vm_enough_memory(grow))
-		return -ENOMEM;
+		goto out_sec;
 
 	/* Ok, everything looks good - let it rip */
 	mm->total_vm += grow;
@@ -1510,6 +1533,11 @@ static int acct_stack_growth(struct vm_a
 		mm->locked_vm += grow;
 	vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
 	return 0;
+
+out_sec:
+	bc_memory_uncharge(mm, grow << PAGE_SHIFT, vma->vm_file, vma->vm_flags);
+out_charge:
+	return -ENOMEM;
 }
 
 #if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
@@ -1743,6 +1771,7 @@ int split_vma(struct mm_struct * mm, str
 
 	/* most fields are the same, copy all, and then fixup */
 	*new = *vma;
+	vma_copy_bc(new);
 
 	if (new_below)
 		new->vm_end = addr;
@@ -1753,6 +1782,7 @@ int split_vma(struct mm_struct * mm, str
 
 	pol = mpol_copy(vma_policy(vma));
 	if (IS_ERR(pol)) {
+		vma_release_bc(new);
 		kmem_cache_free(vm_area_cachep, new);
 		return PTR_ERR(pol);
 	}
@@ -1865,7 +1895,8 @@ static inline void verify_mm_writelocked
  *  anonymous maps.  eventually we may be able to do some
  *  brk-specific accounting here.
  */
-unsigned long do_brk(unsigned long addr, unsigned long len)
+static unsigned long __do_brk(unsigned long addr, unsigned long len,
+		int bc_prio)
 {
 	struct mm_struct * mm = current->mm;
 	struct vm_area_struct * vma, * prev;
@@ -1925,7 +1956,10 @@ unsigned long do_brk(unsigned long addr,
 		return -ENOMEM;
 
 	if (security_vm_enough_memory(len >> PAGE_SHIFT))
-		return -ENOMEM;
+		goto err_sec;
+
+	if (bc_memory_charge(mm, len, NULL, flags, bc_prio))
+		goto err_ch;
 
 	/* Can we just expand an old private anonymous mapping? */
 	if (vma_merge(mm, prev, addr, addr + len, flags,
@@ -1936,10 +1970,8 @@ unsigned long do_brk(unsigned long addr,
 	 * create a vma struct for an anonymous mapping
 	 */
 	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
-	if (!vma) {
-		vm_unacct_memory(len >> PAGE_SHIFT);
-		return -ENOMEM;
-	}
+	if (!vma)
+		goto err_alloc;
 
 	vma->vm_mm = mm;
 	vma->vm_start = addr;
@@ -1948,6 +1980,7 @@ unsigned long do_brk(unsigned long addr,
 	vma->vm_flags = flags;
 	vma->vm_page_prot = protection_map[flags &
 				(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
+	vma_set_bc(vma);
 	vma_link(mm, vma, prev, rb_link, rb_parent);
 out:
 	mm->total_vm += len >> PAGE_SHIFT;
@@ -1956,8 +1989,19 @@ out:
 		make_pages_present(addr, addr + len);
 	}
 	return addr;
+
+err_alloc:
+	bc_memory_uncharge(mm, len, NULL, flags);
+err_ch:
+	vm_unacct_memory(len >> PAGE_SHIFT);
+err_sec:
+	return -ENOMEM;
 }
 
+unsigned long do_brk(unsigned long addr, unsigned long len)
+{
+	return __do_brk(addr, len, BC_LIMIT);
+}
 EXPORT_SYMBOL(do_brk);
 
 /* Release all mmaps. */
@@ -2019,6 +2063,12 @@ int insert_vm_struct(struct mm_struct * 
 	if ((vma->vm_flags & VM_ACCOUNT) &&
 	     security_vm_enough_memory(vma_pages(vma)))
 		return -ENOMEM;
+
+	if (bc_vma_charge(vma)) {
+		if (vma->vm_flags & VM_ACCOUNT)
+			vm_unacct_memory(vma_pages(vma));
+		return -ENOMEM;
+	}
 	vma_link(mm, vma, prev, rb_link, rb_parent);
 	return 0;
 }
@@ -2058,8 +2108,10 @@ struct vm_area_struct *copy_vma(struct v
 		new_vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 		if (new_vma) {
 			*new_vma = *vma;
+			vma_copy_bc(new_vma);
 			pol = mpol_copy(vma_policy(vma));
 			if (IS_ERR(pol)) {
+				vma_release_bc(new_vma);
 				kmem_cache_free(vm_area_cachep, new_vma);
 				return NULL;
 			}
--- ./mm/mprotect.c.bc_userpages	2006-10-05 11:42:43.000000000 +0400
+++ ./mm/mprotect.c	2006-10-05 12:44:20.000000000 +0400
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
+#include <bc/vmpages.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
@@ -140,12 +141,20 @@ mprotect_fixup(struct vm_area_struct *vm
 	pgoff_t pgoff;
 	int error;
 	int dirty_accountable = 0;
+	int bc_recharge;
 
 	if (newflags == oldflags) {
 		*pprev = vma;
 		return 0;
 	}
 
+	bc_recharge = bc_need_memory_recharge(vma, vma->vm_file, newflags);
+	if (bc_recharge == BC_CHARGE) {
+		error = __bc_memory_charge(mm, end - start, BC_BARRIER);
+		if (error < 0)
+			goto fail_charge;
+	}
+
 	/*
 	 * If we make a private mapping writable we increase our commit;
 	 * but (without finer accounting) cannot reduce our commit if we
@@ -157,8 +166,9 @@ mprotect_fixup(struct vm_area_struct *vm
 	if (newflags & VM_WRITE) {
 		if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))) {
 			charged = nrpages;
+			error = -ENOMEM;
 			if (security_vm_enough_memory(charged))
-				return -ENOMEM;
+				goto fail_acct;
 			newflags |= VM_ACCOUNT;
 		}
 	}
@@ -189,6 +199,8 @@ mprotect_fixup(struct vm_area_struct *vm
 	}
 
 success:
+	if (bc_recharge == BC_UNCHARGE)
+		__bc_memory_uncharge(mm, end - start);
 	/*
 	 * vm_flags and vm_page_prot are protected by the mmap_sem
 	 * held in write mode.
@@ -212,6 +224,10 @@ success:
 
 fail:
 	vm_unacct_memory(charged);
+fail_acct:
+	if (bc_recharge == BC_CHARGE)
+		__bc_memory_uncharge(mm, end - start);
+fail_charge:
 	return error;
 }
 /*
--- ./mm/mremap.c.bc_userpages	2006-10-05 11:42:43.000000000 +0400
+++ ./mm/mremap.c	2006-10-05 12:44:20.000000000 +0400
@@ -19,6 +19,8 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 
+#include <bc/vmpages.h>
+
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
@@ -344,10 +346,16 @@ unsigned long do_mremap(unsigned long ad
 		goto out;
 	}
 
+	if (bc_memory_charge(mm, new_len - old_len,
+				vma->vm_file, vma->vm_flags, BC_BARRIER)) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
 	if (vma->vm_flags & VM_ACCOUNT) {
 		charged = (new_len - old_len) >> PAGE_SHIFT;
 		if (security_vm_enough_memory(charged))
-			goto out_nc;
+			goto out_bc;
 	}
 
 	/* old_len exactly to the end of the area..
@@ -374,7 +382,7 @@ unsigned long do_mremap(unsigned long ad
 						   addr + new_len);
 			}
 			ret = addr;
-			goto out;
+			goto out_nc;
 		}
 	}
 
@@ -393,14 +401,18 @@ unsigned long do_mremap(unsigned long ad
 						vma->vm_pgoff, map_flags);
 			ret = new_addr;
 			if (new_addr & ~PAGE_MASK)
-				goto out;
+				goto out_nc;
 		}
 		ret = move_vma(vma, addr, old_len, new_len, new_addr);
 	}
-out:
+out_nc:
 	if (ret & ~PAGE_MASK)
 		vm_unacct_memory(charged);
-out_nc:
+out_bc:
+	if (ret & ~PAGE_MASK)
+		bc_memory_uncharge(mm, new_len - old_len,
+				vma->vm_file, vma->vm_flags);
+out:
 	return ret;
 }
 
--- ./mm/rmap.c.bc_userpages	2006-10-05 12:14:35.000000000 +0400
+++ ./mm/rmap.c	2006-10-05 12:44:20.000000000 +0400
@@ -54,6 +54,8 @@
 #include <linux/rcupdate.h>
 #include <linux/module.h>
 
+#include <bc/rsspages.h>
+
 #include <asm/tlbflush.h>
 
 struct kmem_cache *anon_vma_cachep;
@@ -586,6 +588,7 @@ void page_remove_rmap(struct page *page)
 		}
 #endif
 		BUG_ON(page_mapcount(page) < 0);
+		bc_rsspage_uncharge(page);
 		/*
 		 * It would be tidy to reset the PageAnon mapping here,
 		 * but that might overwrite a racing page_add_anon_rmap
--- ./mm/shmem.c.bc_userpages	2006-10-05 12:14:35.000000000 +0400
+++ ./mm/shmem.c	2006-10-05 12:44:20.000000000 +0400
@@ -49,6 +49,8 @@
 #include <linux/migrate.h>
 #include <linux/highmem.h>
 
+#include <bc/vmpages.h>
+
 #include <asm/uaccess.h>
 #include <asm/div64.h>
 #include <asm/pgtable.h>
@@ -2511,6 +2513,19 @@ int shmem_zero_setup(struct vm_area_stru
 
 	if (vma->vm_file)
 		fput(vma->vm_file);
+	else {
+		switch (bc_need_memory_recharge(vma, file, vma->vm_flags)) {
+		case BC_UNCHARGE:
+			__bc_memory_uncharge(vma->vm_mm,
+					vma->vm_end - vma->vm_start);
+			break;
+		case BC_CHARGE:
+			if (__bc_memory_charge(vma->vm_mm,
+						vma->vm_end - vma->vm_start,
+						BC_BARRIER))
+				return -ENOMEM;
+		}
+	}
 	vma->vm_file = file;
 	vma->vm_ops = &shmem_vm_ops;
 	return 0;
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/