[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20151029170422.GH23598@dhcp22.suse.cz>
Date: Thu, 29 Oct 2015 18:04:22 +0100
From: Michal Hocko <mhocko@...nel.org>
To: David Rientjes <rientjes@...gle.com>
Cc: Hongjie Fang (方洪杰)
<Hongjie.Fang@...eadtrum.com>,
"Eric W. Biederman" <ebiederm@...ssion.com>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: Re: 答复: [PATCHv2 4.3-rc6] proc:
fix convert from oom_score_adj to oom_adj
On Wed 28-10-15 16:54:04, David Rientjes wrote:
[...]
> It's a bad situation, I agree, and we anticipated the complete removal of
> /proc/pid/oom_adj years ago since it has been deprecated for years. Maybe
> one day we can convince Linus that is possible, but until then we're stuck
> with it.
Let's do it then.
---
>From 71be967d15b3298f3fad7e49ee51f852761b9632 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@...e.com>
Date: Thu, 29 Oct 2015 17:42:22 +0100
Subject: [PATCH] oom: get rid of oom_adj
oom_adj has been marked as deprecated by 51b1bd2ace15 ("oom: deprecate
oom_adj tunable") which is quite some time ago. The knob was even
removed by 01dc52ebdf47 ("oom: remove deprecated oom_adj") but
then reintroduced back by fa0cbbf145aa ("mm, oom: reintroduce
/proc/pid/oom_adj") with a scaling mechanism to map oom_adj to
oom_score_adj. The mapping is not ideal and quite confusing as noted by
Hongjie Fang:
$ echo 10 > /proc/1450/oom_adj
$ cat /proc/1450/oom_adj
9
This could be fixed but it seems that the knob has been deprecated for
long enough to finally get rid of it altogether. Most of the reports
happened back in 2012 resp. 2013 and programs were fixed to either
use oom_score_adj or have a fallback mechanism to use oom_adj on older
kernels.
Time has come to finally get rid of this duality and potential source of
confusion. Let's keep OOM_ADJUST_* and OOM_DISABLE in user headers to
not break existing code compilation.
Signed-off-by: Michal Hocko <mhocko@...e.com>
---
Documentation/filesystems/proc.txt | 11 +---
fs/proc/base.c | 108 -------------------------------------
2 files changed, 2 insertions(+), 117 deletions(-)
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index d6f259eaa5ef..762bd3f410e9 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -33,8 +33,7 @@ Table of Contents
2 Modifying System Parameters
3 Per-Process Parameters
- 3.1 /proc/<pid>/oom_adj & /proc/<pid>/oom_score_adj - Adjust the oom-killer
- score
+ 3.1 /proc/<pid>/oom_score_adj - Adjust the oom-killer score
3.2 /proc/<pid>/oom_score - Display current oom-killer score
3.3 /proc/<pid>/io - Display the IO accounting fields
3.4 /proc/<pid>/coredump_filter - Core dump filtering settings
@@ -1436,7 +1435,7 @@ of the kernel.
CHAPTER 3: PER-PROCESS PARAMETERS
------------------------------------------------------------------------------
-3.1 /proc/<pid>/oom_adj & /proc/<pid>/oom_score_adj- Adjust the oom-killer score
+3.1 /proc/<pid>/oom_score_adj- Adjust the oom-killer score
--------------------------------------------------------------------------------
These file can be used to adjust the badness heuristic used to select which
@@ -1477,12 +1476,6 @@ same system, cpuset, mempolicy, or memory controller resources to use at least
equivalent to discounting 50% of the task's allowed memory from being considered
as scoring against the task.
-For backwards compatibility with previous kernels, /proc/<pid>/oom_adj may also
-be used to tune the badness score. Its acceptable values range from -16
-(OOM_ADJUST_MIN) to +15 (OOM_ADJUST_MAX) and a special value of -17
-(OOM_DISABLE) to disable oom killing entirely for that task. Its value is
-scaled linearly with /proc/<pid>/oom_score_adj.
-
The value of /proc/<pid>/oom_score_adj may be reduced no lower than the last
value set by a CAP_SYS_RESOURCE process. To reduce the value any lower
requires CAP_SYS_RESOURCE.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 60c71b10eaee..f34e43dd8727 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1011,112 +1011,6 @@ static const struct file_operations proc_environ_operations = {
.release = mem_release,
};
-static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
- loff_t *ppos)
-{
- struct task_struct *task = get_proc_task(file_inode(file));
- char buffer[PROC_NUMBUF];
- int oom_adj = OOM_ADJUST_MIN;
- size_t len;
- unsigned long flags;
-
- if (!task)
- return -ESRCH;
- if (lock_task_sighand(task, &flags)) {
- if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
- oom_adj = OOM_ADJUST_MAX;
- else
- oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
- OOM_SCORE_ADJ_MAX;
- unlock_task_sighand(task, &flags);
- }
- put_task_struct(task);
- len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
- return simple_read_from_buffer(buf, count, ppos, buffer, len);
-}
-
-static ssize_t oom_adj_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct task_struct *task;
- char buffer[PROC_NUMBUF];
- int oom_adj;
- unsigned long flags;
- int err;
-
- memset(buffer, 0, sizeof(buffer));
- if (count > sizeof(buffer) - 1)
- count = sizeof(buffer) - 1;
- if (copy_from_user(buffer, buf, count)) {
- err = -EFAULT;
- goto out;
- }
-
- err = kstrtoint(strstrip(buffer), 0, &oom_adj);
- if (err)
- goto out;
- if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) &&
- oom_adj != OOM_DISABLE) {
- err = -EINVAL;
- goto out;
- }
-
- task = get_proc_task(file_inode(file));
- if (!task) {
- err = -ESRCH;
- goto out;
- }
-
- task_lock(task);
- if (!task->mm) {
- err = -EINVAL;
- goto err_task_lock;
- }
-
- if (!lock_task_sighand(task, &flags)) {
- err = -ESRCH;
- goto err_task_lock;
- }
-
- /*
- * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
- * value is always attainable.
- */
- if (oom_adj == OOM_ADJUST_MAX)
- oom_adj = OOM_SCORE_ADJ_MAX;
- else
- oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
-
- if (oom_adj < task->signal->oom_score_adj &&
- !capable(CAP_SYS_RESOURCE)) {
- err = -EACCES;
- goto err_sighand;
- }
-
- /*
- * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
- * /proc/pid/oom_score_adj instead.
- */
- pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
- current->comm, task_pid_nr(current), task_pid_nr(task),
- task_pid_nr(task));
-
- task->signal->oom_score_adj = oom_adj;
- trace_oom_score_adj_update(task);
-err_sighand:
- unlock_task_sighand(task, &flags);
-err_task_lock:
- task_unlock(task);
- put_task_struct(task);
-out:
- return err < 0 ? err : count;
-}
-
-static const struct file_operations proc_oom_adj_operations = {
- .read = oom_adj_read,
- .write = oom_adj_write,
- .llseek = generic_file_llseek,
-};
static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
@@ -2813,7 +2707,6 @@ static const struct pid_entry tgid_base_stuff[] = {
ONE("cgroup", S_IRUGO, proc_cgroup_show),
#endif
ONE("oom_score", S_IRUGO, proc_oom_score),
- REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
@@ -3161,7 +3054,6 @@ static const struct pid_entry tid_base_stuff[] = {
ONE("cgroup", S_IRUGO, proc_cgroup_show),
#endif
ONE("oom_score", S_IRUGO, proc_oom_score),
- REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
--
2.6.1
--
Michal Hocko
SUSE Labs
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists