lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20170904142108.7165-4-guro@fb.com>
Date:   Mon, 4 Sep 2017 15:21:06 +0100
From:   Roman Gushchin <guro@...com>
To:     <linux-mm@...ck.org>
CC:     Roman Gushchin <guro@...com>, Michal Hocko <mhocko@...nel.org>,
        Vladimir Davydov <vdavydov.dev@...il.com>,
        Johannes Weiner <hannes@...xchg.org>,
        David Rientjes <rientjes@...gle.com>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Tejun Heo <tj@...nel.org>,
        Tetsuo Handa <penguin-kernel@...ove.SAKURA.ne.jp>,
        <kernel-team@...com>, <cgroups@...r.kernel.org>,
        <linux-doc@...r.kernel.org>, <linux-kernel@...r.kernel.org>
Subject: [v7 3/5] mm, oom: introduce oom_priority for memory cgroups

Introduce a per-memory-cgroup oom_priority setting: an integer number,
which defines the order in which the OOM killer selects victim memory
cgroups.

OOM killer prefers memory cgroups with larger priority if they are
populated with eligible tasks.

The oom_priority value is compared within sibling cgroups.

If two or more sibling cgroups have the same oom_priority,
the decision is based on their memory footprint.

The root cgroup has the oom_priority 0, which cannot be changed.

Signed-off-by: Roman Gushchin <guro@...com>
Cc: Michal Hocko <mhocko@...nel.org>
Cc: Vladimir Davydov <vdavydov.dev@...il.com>
Cc: Johannes Weiner <hannes@...xchg.org>
Cc: David Rientjes <rientjes@...gle.com>
Cc: Andrew Morton <akpm@...ux-foundation.org>
Cc: Tejun Heo <tj@...nel.org>
Cc: Tetsuo Handa <penguin-kernel@...ove.SAKURA.ne.jp>
Cc: kernel-team@...com
Cc: cgroups@...r.kernel.org
Cc: linux-doc@...r.kernel.org
Cc: linux-kernel@...r.kernel.org
Cc: linux-mm@...ck.org
---
 include/linux/memcontrol.h |  3 +++
 mm/memcontrol.c            | 49 ++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5b5c2b89968e..73a0291948fd 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -206,6 +206,9 @@ struct mem_cgroup {
 	/* cached OOM score */
 	long oom_score;
 
+	/* OOM killer priority */
+	int oom_priority;
+
 	/* handle for "memory.events" */
 	struct cgroup_file events_file;
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 97813c56163b..d7dd293897ca 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2757,6 +2757,7 @@ static void select_victim_memcg(struct mem_cgroup *root, struct oom_control *oc)
 	for (;;) {
 		struct cgroup_subsys_state *css;
 		struct mem_cgroup *memcg = NULL;
+		int prio = INT_MIN;
 		long score = LONG_MIN;
 
 		css_for_each_child(css, &root->css) {
@@ -2768,7 +2769,12 @@ static void select_victim_memcg(struct mem_cgroup *root, struct oom_control *oc)
 			if (iter->oom_score == 0)
 				continue;
 
-			if (iter->oom_score > score) {
+			if (iter->oom_priority > prio) {
+				memcg = iter;
+				prio = iter->oom_priority;
+				score = iter->oom_score;
+			} else if (iter->oom_priority == prio &&
+				   iter->oom_score > score) {
 				memcg = iter;
 				score = iter->oom_score;
 			}
@@ -2838,7 +2844,15 @@ bool mem_cgroup_select_oom_victim(struct oom_control *oc)
 	 * For system-wide OOMs we should consider tasks in the root cgroup
 	 * with oom_score larger than oc->chosen_points.
 	 */
-	if (!oc->memcg) {
+	if (!oc->memcg && !(oc->chosen_memcg &&
+			    oc->chosen_memcg->oom_priority > 0)) {
+		/*
+		 * Root memcg has priority 0, so if chosen memcg has lower
+		 * priority, any task in root cgroup is preferable.
+		 */
+		if (oc->chosen_memcg && oc->chosen_memcg->oom_priority < 0)
+			oc->chosen_points = 0;
+
 		select_victim_root_cgroup_task(oc);
 
 		if (oc->chosen_task && oc->chosen_memcg) {
@@ -5480,6 +5494,31 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
 	return nbytes;
 }
 
+static int memory_oom_priority_show(struct seq_file *m, void *v)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+
+	seq_printf(m, "%d\n", memcg->oom_priority);
+
+	return 0;
+}
+
+static ssize_t memory_oom_priority_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	int oom_priority;
+	int err;
+
+	err = kstrtoint(strstrip(buf), 0, &oom_priority);
+	if (err)
+		return err;
+
+	memcg->oom_priority = oom_priority;
+
+	return nbytes;
+}
+
 static int memory_events_show(struct seq_file *m, void *v)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
@@ -5606,6 +5645,12 @@ static struct cftype memory_files[] = {
 		.write = memory_oom_group_write,
 	},
 	{
+		.name = "oom_priority",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = memory_oom_priority_show,
+		.write = memory_oom_priority_write,
+	},
+	{
 		.name = "events",
 		.flags = CFTYPE_NOT_ON_ROOT,
 		.file_offset = offsetof(struct mem_cgroup, events_file),
-- 
2.13.5

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ