lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20080316173005.8812.88290.sendpatchset@localhost.localdomain>
Date:	Sun, 16 Mar 2008 23:00:05 +0530
From:	Balbir Singh <balbir@...ux.vnet.ibm.com>
To:	linux-mm@...ck.org
Cc:	Hugh Dickins <hugh@...itas.com>,
	Sudhir Kumar <skumar@...ux.vnet.ibm.com>,
	YAMAMOTO Takashi <yamamoto@...inux.co.jp>,
	Paul Menage <menage@...gle.com>, lizf@...fujitsu.com,
	linux-kernel@...r.kernel.org, taka@...inux.co.jp,
	David Rientjes <rientjes@...gle.com>,
	Pavel Emelianov <xemul@...nvz.org>,
	Balbir Singh <balbir@...ux.vnet.ibm.com>,
	Andrew Morton <akpm@...ux-foundation.org>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>
Subject: [RFC][2/3] Account and control virtual address space allocations



This patch implements accounting and control of virtual address space.
Accounting is done when the virtual address space of any task/mm_struct
belonging to the cgroup is incremented or decremented. This patch
fails the expansion if the cgroup goes over its limit. A new function
mem_cgroup_update_as() is added to deal with the accounting of the virtual
address space usage of cgroups.

TODOs

1. IA64 has code in perfmon.c pfm_smpl_buffer_alloc(), which increments
   the total_vm of the mm_struct. This code has not yet been brought into
   virtual address space control
2. Only when CONFIG_MMU is enabled, is the virtual address space control
   enabled. Should we do this for nommu cases as well? My suspicion is
   that we don't have to.

Signed-off-by: Balbir Singh <balbir@...ux.vnet.ibm.com>
---

 arch/x86/kernel/ptrace.c   |   10 +++++++++-
 include/linux/memcontrol.h |    7 +++++++
 init/Kconfig               |    4 +++-
 kernel/fork.c              |    9 +++++++--
 mm/memcontrol.c            |   37 +++++++++++++++++++++++++++++++++++++
 mm/memory.c                |    5 +++++
 mm/mmap.c                  |   22 ++++++++++++++++++++--
 mm/mremap.c                |   21 ++++++++++++++++++---
 8 files changed, 106 insertions(+), 9 deletions(-)

diff -puN mm/memcontrol.c~memory-controller-virtual-address-space-accounting-and-control mm/memcontrol.c
--- linux-2.6.25-rc5/mm/memcontrol.c~memory-controller-virtual-address-space-accounting-and-control	2008-03-16 22:57:40.000000000 +0530
+++ linux-2.6.25-rc5-balbir/mm/memcontrol.c	2008-03-16 22:57:40.000000000 +0530
@@ -525,6 +525,32 @@ unsigned long mem_cgroup_isolate_pages(u
 }
 
 /*
+ * Check if the current cgroup exceeds its address space limit.
+ * Returns 0 on success and 1 on failure.
+ */
+int mem_cgroup_update_as(struct mm_struct *mm, long nr_pages)
+{
+	int ret = 0;
+	struct mem_cgroup *mem;
+	if (mem_cgroup_subsys.disabled)
+		return ret;
+
+	rcu_read_lock();
+	mem = rcu_dereference(mm->mem_cgroup);
+	css_get(&mem->css);
+	rcu_read_unlock();
+
+	if (nr_pages > 0) {
+		if (res_counter_charge(&mem->as_res, (nr_pages * PAGE_SIZE)))
+			ret = 1;
+	} else
+		res_counter_uncharge(&mem->as_res, (-nr_pages * PAGE_SIZE));
+
+	css_put(&mem->css);
+	return ret;
+}
+
+/*
  * Charge the memory controller for page usage.
  * Return
  * 0 if the charge was successful
@@ -1103,6 +1129,17 @@ static void mem_cgroup_move_task(struct 
 		goto out;
 
 	css_get(&mem->css);
+	/*
+	 * For address space accounting, the charges are migrated.
+	 * We need to migrate it since all the future uncharge/charge will
+	 * now happen to the new cgroup. For consistency, we need to migrate
+	 * all charges, otherwise we could end up dropping charges from
+	 * the new cgroup (even though they were incurred in the current
+	 * group).
+	 */
+	if (res_counter_charge(&mem->as_res, mm->total_vm))
+		goto out;
+	res_counter_uncharge(&old_mem->as_res, mm->total_vm);
 	rcu_assign_pointer(mm->mem_cgroup, mem);
 	css_put(&old_mem->css);
 
diff -puN include/linux/memcontrol.h~memory-controller-virtual-address-space-accounting-and-control include/linux/memcontrol.h
--- linux-2.6.25-rc5/include/linux/memcontrol.h~memory-controller-virtual-address-space-accounting-and-control	2008-03-16 22:57:40.000000000 +0530
+++ linux-2.6.25-rc5-balbir/include/linux/memcontrol.h	2008-03-16 22:57:40.000000000 +0530
@@ -54,6 +54,7 @@ int task_in_mem_cgroup(struct task_struc
 extern int mem_cgroup_prepare_migration(struct page *page);
 extern void mem_cgroup_end_migration(struct page *page);
 extern void mem_cgroup_page_migration(struct page *page, struct page *newpage);
+extern int mem_cgroup_update_as(struct mm_struct *mm, long nr_pages);
 
 /*
  * For memory reclaim.
@@ -172,6 +173,12 @@ static inline long mem_cgroup_calc_recla
 {
 	return 0;
 }
+
+static inline int mem_cgroup_update_as(struct mm_struct *mm, long nr_pages)
+{
+	return 0;
+}
+
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff -puN mm/mmap.c~memory-controller-virtual-address-space-accounting-and-control mm/mmap.c
--- linux-2.6.25-rc5/mm/mmap.c~memory-controller-virtual-address-space-accounting-and-control	2008-03-16 22:57:40.000000000 +0530
+++ linux-2.6.25-rc5-balbir/mm/mmap.c	2008-03-16 22:57:40.000000000 +0530
@@ -1117,6 +1117,9 @@ munmap_back:
 		}
 	}
 
+	if (mem_cgroup_update_as(mm, len >> PAGE_SHIFT))
+		return -ENOMEM;
+
 	/*
 	 * Can we just expand an old private anonymous mapping?
 	 * The VM_SHARED test is necessary because shmem_zero_setup
@@ -1226,8 +1229,11 @@ unmap_and_free_vma:
 free_vma:
 	kmem_cache_free(vm_area_cachep, vma);
 unacct_error:
-	if (charged)
+	if (charged) {
+		mem_cgroup_update_as(mm, -charged);
 		vm_unacct_memory(charged);
+	}
+unacct_as_error:
 	return error;
 }
 
@@ -1555,6 +1561,9 @@ static int acct_stack_growth(struct vm_a
 	if (security_vm_enough_memory(grow))
 		return -ENOMEM;
 
+	if (mem_cgroup_update_as(mm, grow))
+		return -ENOMEM;
+
 	/* Ok, everything looks good - let it rip */
 	mm->total_vm += grow;
 	if (vma->vm_flags & VM_LOCKED)
@@ -2003,9 +2012,14 @@ unsigned long do_brk(unsigned long addr,
 	if (mm->map_count > sysctl_max_map_count)
 		return -ENOMEM;
 
-	if (security_vm_enough_memory(len >> PAGE_SHIFT))
+	if (mem_cgroup_update_as(mm, (len >> PAGE_SHIFT)))
 		return -ENOMEM;
 
+	if (security_vm_enough_memory(len >> PAGE_SHIFT)) {
+		mem_cgroup_update_as(mm, -(len >> PAGE_SHIFT));
+		return -ENOMEM;
+	}
+
 	/* Can we just expand an old private anonymous mapping? */
 	if (vma_merge(mm, prev, addr, addr + len, flags,
 					NULL, NULL, pgoff, NULL))
@@ -2236,6 +2250,9 @@ int install_special_mapping(struct mm_st
 	if (unlikely(vma == NULL))
 		return -ENOMEM;
 
+	if (mem_cgroup_update_as(mm, len >> PAGE_SHIFT))
+		return -ENOMEM;
+
 	vma->vm_mm = mm;
 	vma->vm_start = addr;
 	vma->vm_end = addr + len;
@@ -2248,6 +2265,7 @@ int install_special_mapping(struct mm_st
 
 	if (unlikely(insert_vm_struct(mm, vma))) {
 		kmem_cache_free(vm_area_cachep, vma);
+		mem_cgroup_update_as(mm, -(len >> PAGE_SHIFT));
 		return -ENOMEM;
 	}
 
diff -puN arch/x86/kernel/ptrace.c~memory-controller-virtual-address-space-accounting-and-control arch/x86/kernel/ptrace.c
--- linux-2.6.25-rc5/arch/x86/kernel/ptrace.c~memory-controller-virtual-address-space-accounting-and-control	2008-03-16 22:57:40.000000000 +0530
+++ linux-2.6.25-rc5-balbir/arch/x86/kernel/ptrace.c	2008-03-16 22:57:40.000000000 +0530
@@ -20,6 +20,7 @@
 #include <linux/audit.h>
 #include <linux/seccomp.h>
 #include <linux/signal.h>
+#include <linux/memcontrol.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -787,6 +788,8 @@ static int ptrace_bts_realloc(struct tas
 	current->mm->total_vm  -= old_size;
 	current->mm->locked_vm -= old_size;
 
+	mem_cgroup_update_as(current->mm, -old_size);
+
 	if (size == 0)
 		goto out;
 
@@ -816,10 +819,15 @@ static int ptrace_bts_realloc(struct tas
 			goto out;
 	}
 
+	if (mem_cgroup_update_as(current->mm, size))
+		goto out;
+
 	ret = ds_allocate((void **)&child->thread.ds_area_msr,
 			  size << PAGE_SHIFT);
-	if (ret < 0)
+	if (ret < 0) {
+		mem_cgroup_update_as(current->mm, -size);
 		goto out;
+	}
 
 	current->mm->total_vm  += size;
 	current->mm->locked_vm += size;
diff -puN kernel/fork.c~memory-controller-virtual-address-space-accounting-and-control kernel/fork.c
--- linux-2.6.25-rc5/kernel/fork.c~memory-controller-virtual-address-space-accounting-and-control	2008-03-16 22:57:40.000000000 +0530
+++ linux-2.6.25-rc5-balbir/kernel/fork.c	2008-03-16 22:57:40.000000000 +0530
@@ -53,6 +53,7 @@
 #include <linux/tty.h>
 #include <linux/proc_fs.h>
 #include <linux/blkdev.h>
+#include <linux/memcontrol.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -237,6 +238,7 @@ static int dup_mmap(struct mm_struct *mm
 
 	for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
 		struct file *file;
+		unsigned int len = vma_pages(mpnt);
 
 		if (mpnt->vm_flags & VM_DONTCOPY) {
 			long pages = vma_pages(mpnt);
@@ -247,11 +249,12 @@ static int dup_mmap(struct mm_struct *mm
 		}
 		charge = 0;
 		if (mpnt->vm_flags & VM_ACCOUNT) {
-			unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
 			if (security_vm_enough_memory(len))
 				goto fail_nomem;
 			charge = len;
 		}
+		if (mem_cgroup_update_as(mm, len))
+			goto fail_nomem_as;
 		tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 		if (!tmp)
 			goto fail_nomem;
@@ -311,8 +314,10 @@ out:
 fail_nomem_policy:
 	kmem_cache_free(vm_area_cachep, tmp);
 fail_nomem:
-	retval = -ENOMEM;
+	mem_cgroup_update_as(mm, -charge);
 	vm_unacct_memory(charge);
+fail_nomem_as:
+	retval = -ENOMEM;
 	goto out;
 }
 
diff -puN mm/mremap.c~memory-controller-virtual-address-space-accounting-and-control mm/mremap.c
--- linux-2.6.25-rc5/mm/mremap.c~memory-controller-virtual-address-space-accounting-and-control	2008-03-16 22:57:40.000000000 +0530
+++ linux-2.6.25-rc5-balbir/mm/mremap.c	2008-03-16 22:57:40.000000000 +0530
@@ -174,10 +174,15 @@ static unsigned long move_vma(struct vm_
 	if (mm->map_count >= sysctl_max_map_count - 3)
 		return -ENOMEM;
 
+	if (mem_cgroup_update_as(mm, new_len >> PAGE_SHIFT))
+		return -ENOMEM;
+
 	new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
 	new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff);
-	if (!new_vma)
+	if (!new_vma) {
+		mem_cgroup_update_as(mm, -(new_len >> PAGE_SHIFT));
 		return -ENOMEM;
+	}
 
 	moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len);
 	if (moved_len < old_len) {
@@ -187,6 +192,7 @@ static unsigned long move_vma(struct vm_
 		 * and then proceed to unmap new area instead of old.
 		 */
 		move_page_tables(new_vma, new_addr, vma, old_addr, moved_len);
+		mem_cgroup_update_as(mm, -(new_len >> PAGE_SHIFT));
 		vma = new_vma;
 		old_len = new_len;
 		old_addr = new_addr;
@@ -347,10 +353,17 @@ unsigned long do_mremap(unsigned long ad
 		goto out;
 	}
 
+	if (mem_cgroup_update_as(mm, (new_len - old_len) >> PAGE_SHIFT)) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
 	if (vma->vm_flags & VM_ACCOUNT) {
 		charged = (new_len - old_len) >> PAGE_SHIFT;
-		if (security_vm_enough_memory(charged))
+		if (security_vm_enough_memory(charged)) {
+			mem_cgroup_update_as(mm, -charged);
 			goto out_nc;
+		}
 	}
 
 	/* old_len exactly to the end of the area..
@@ -406,8 +419,10 @@ unsigned long do_mremap(unsigned long ad
 		ret = move_vma(vma, addr, old_len, new_len, new_addr);
 	}
 out:
-	if (ret & ~PAGE_MASK)
+	if (ret & ~PAGE_MASK) {
 		vm_unacct_memory(charged);
+		mem_cgroup_update_as(mm, -charged);
+	}
 out_nc:
 	return ret;
 }
diff -puN init/Kconfig~memory-controller-virtual-address-space-accounting-and-control init/Kconfig
--- linux-2.6.25-rc5/init/Kconfig~memory-controller-virtual-address-space-accounting-and-control	2008-03-16 22:57:40.000000000 +0530
+++ linux-2.6.25-rc5-balbir/init/Kconfig	2008-03-16 22:57:40.000000000 +0530
@@ -369,7 +369,9 @@ config CGROUP_MEM_RES_CTLR
 	depends on CGROUPS && RESOURCE_COUNTERS
 	help
 	  Provides a memory resource controller that manages both page cache and
-	  RSS memory.
+	  RSS memory. It also provide accounting and control of address
+	  space allocations (along the lines of RLIMIT_AS) for cgroups
+	  when CONFIG_MMU is enabled.
 
 	  Note that setting this option increases fixed memory overhead
 	  associated with each page of memory in the system by 4/8 bytes
diff -puN mm/swapfile.c~memory-controller-virtual-address-space-accounting-and-control mm/swapfile.c
diff -puN mm/memory.c~memory-controller-virtual-address-space-accounting-and-control mm/memory.c
--- linux-2.6.25-rc5/mm/memory.c~memory-controller-virtual-address-space-accounting-and-control	2008-03-16 22:57:40.000000000 +0530
+++ linux-2.6.25-rc5-balbir/mm/memory.c	2008-03-16 22:57:40.000000000 +0530
@@ -838,6 +838,11 @@ unsigned long unmap_vmas(struct mmu_gath
 
 		if (vma->vm_flags & VM_ACCOUNT)
 			*nr_accounted += (end - start) >> PAGE_SHIFT;
+		/*
+		 * Unaccount used virtual memory for cgroups
+		 */
+		mem_cgroup_update_as(vma->vm_mm,
+					((long)(start - end)) >> PAGE_SHIFT);
 
 		while (start != end) {
 			if (!tlb_start_valid) {
_

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ