lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 17 Aug 2020 10:08:25 -0400
From:   Waiman Long <longman@...hat.com>
To:     Andrew Morton <akpm@...ux-foundation.org>,
        Johannes Weiner <hannes@...xchg.org>,
        Michal Hocko <mhocko@...nel.org>,
        Vladimir Davydov <vdavydov.dev@...il.com>,
        Jonathan Corbet <corbet@....net>,
        Alexey Dobriyan <adobriyan@...il.com>,
        Ingo Molnar <mingo@...nel.org>,
        Peter Zijlstra <peterz@...radead.org>,
        Juri Lelli <juri.lelli@...hat.com>,
        Vincent Guittot <vincent.guittot@...aro.org>
Cc:     linux-kernel@...r.kernel.org, linux-doc@...r.kernel.org,
        linux-fsdevel@...r.kernel.org, cgroups@...r.kernel.org,
        linux-mm@...ck.org, Waiman Long <longman@...hat.com>
Subject: [RFC PATCH 2/8] memcg, mm: Return ENOMEM or delay if memcg_over_limit

The brk(), mmap(), mlock(), mlockall() and mprotect() syscalls are
modified to check the memcg_over_limit flag and return ENOMEM when it
is set and memory control action is PR_MEMACT_ENOMEM.

In case the action is PR_MEMACT_SLOWDOWN, an artificial delay of 20ms
will be added to slow down the memory allocation syscalls.

Signed-off-by: Waiman Long <longman@...hat.com>
---
 include/linux/sched.h | 16 ++++++++++++++++
 kernel/fork.c         |  1 +
 mm/memcontrol.c       | 25 +++++++++++++++++++++++--
 mm/mlock.c            |  6 ++++++
 mm/mmap.c             | 12 ++++++++++++
 mm/mprotect.c         |  3 +++
 6 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c79d606d27ab..9ec1bd072334 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1477,6 +1477,22 @@ static inline char task_state_to_char(struct task_struct *tsk)
 	return task_index_to_char(task_state_index(tsk));
 }
 
+#ifdef CONFIG_MEMCG
+extern bool mem_cgroup_check_over_limit(void);
+
+static inline bool mem_over_memcg_limit(void)
+{
+	if (READ_ONCE(current->memcg_over_limit))
+		return mem_cgroup_check_over_limit();
+	return false;
+}
+#else
+static inline bool mem_over_memcg_limit(void)
+{
+	return false;
+}
+#endif
+
 /**
  * is_global_init - check if a task structure is init. Since init
  * is free to have sub-threads we need to check tgid.
diff --git a/kernel/fork.c b/kernel/fork.c
index 4d32190861bd..61f9a9e5f857 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -940,6 +940,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 
 #ifdef CONFIG_MEMCG
 	tsk->active_memcg = NULL;
+	tsk->memcg_over_limit = false;
 #endif
 	return tsk;
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1106dac024ac..5cad7bb26d13 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2646,7 +2646,9 @@ static bool __mem_cgroup_over_high_action(struct mem_cgroup *memcg, u8 action)
 	if (!mm)
 		return true;	/* No more check is needed */
 
-	current->memcg_over_limit = false;
+	if (READ_ONCE(current->memcg_over_limit))
+		WRITE_ONCE(current->memcg_over_limit, false);
+
 	if ((action == PR_MEMACT_SIGNAL) && !signal)
 		goto out;
 
@@ -2660,7 +2662,11 @@ static bool __mem_cgroup_over_high_action(struct mem_cgroup *memcg, u8 action)
 		WRITE_ONCE(current->memcg_over_limit, true);
 		break;
 	case PR_MEMACT_SLOWDOWN:
-		/* Slow down by yielding the cpu */
+		/*
+		 * Slow down by yielding the cpu & adding delay to
+		 * memory allocation syscalls.
+		 */
+		WRITE_ONCE(current->memcg_over_limit, true);
 		set_tsk_need_resched(current);
 		set_preempt_need_resched();
 		break;
@@ -2694,6 +2700,21 @@ static inline bool mem_cgroup_over_high_action(struct mem_cgroup *memcg)
 	return __mem_cgroup_over_high_action(memcg, action);
 }
 
+/*
+ * Called from memory allocation syscalls.
+ * Return true if ENOMEM should be returned, false otherwise.
+ */
+bool mem_cgroup_check_over_limit(void)
+{
+	u8 action = READ_ONCE(current->memcg_over_high_action);
+
+	if (action == PR_MEMACT_ENOMEM)
+		return true;
+	if (action == PR_MEMACT_SLOWDOWN)
+		msleep(20);	/* Artificial delay of 20ms */
+	return false;
+}
+
 static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 		      unsigned int nr_pages)
 {
diff --git a/mm/mlock.c b/mm/mlock.c
index 93ca2bf30b4f..130d4b3fa0f5 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -678,6 +678,9 @@ static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t fla
 	if (!can_do_mlock())
 		return -EPERM;
 
+	if (mem_over_memcg_limit())
+		return -ENOMEM;
+
 	len = PAGE_ALIGN(len + (offset_in_page(start)));
 	start &= PAGE_MASK;
 
@@ -807,6 +810,9 @@ SYSCALL_DEFINE1(mlockall, int, flags)
 	if (!can_do_mlock())
 		return -EPERM;
 
+	if (mem_over_memcg_limit())
+		return -ENOMEM;
+
 	lock_limit = rlimit(RLIMIT_MEMLOCK);
 	lock_limit >>= PAGE_SHIFT;
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 40248d84ad5f..873ccf2560a6 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -198,6 +198,10 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 	bool downgraded = false;
 	LIST_HEAD(uf);
 
+	/* Too much memory used? */
+	if (mem_over_memcg_limit())
+		return -ENOMEM;
+
 	if (mmap_write_lock_killable(mm))
 		return -EINTR;
 
@@ -1407,6 +1411,10 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 	if (mm->map_count > sysctl_max_map_count)
 		return -ENOMEM;
 
+	/* Too much memory used? */
+	if (mem_over_memcg_limit())
+		return -ENOMEM;
+
 	/* Obtain the address to map to. we verify (or select) it and ensure
 	 * that it represents a valid section of the address space.
 	 */
@@ -1557,6 +1565,10 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
 	struct file *file = NULL;
 	unsigned long retval;
 
+	/* Too much memory used? */
+	if (mem_over_memcg_limit())
+		return -ENOMEM;
+
 	if (!(flags & MAP_ANONYMOUS)) {
 		audit_mmap_fd(fd, flags);
 		file = fget(fd);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index ce8b8a5eacbb..b2c0f50bb0a0 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -519,6 +519,9 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
 	const bool rier = (current->personality & READ_IMPLIES_EXEC) &&
 				(prot & PROT_READ);
 
+	if (mem_over_memcg_limit())
+		return -ENOMEM;
+
 	start = untagged_addr(start);
 
 	prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP);
-- 
2.18.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ