[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <88916bbe-1de6-4845-acb2-624668581fa2@redhat.com>
Date: Thu, 29 Feb 2024 22:11:46 -0500
From: Waiman Long <longman@...hat.com>
To: 许春光 <brookxu.cn@...il.com>
Cc: lizefan.x@...edance.com, tj@...nel.org, hannes@...xchg.org,
linux-kernel@...r.kernel.org
Subject: Re: [RFC PATCH] cpuset: remove /proc/PID/cpuset
On 2/29/24 21:43, 许春光 wrote:
> Hi waiman:
>
> Thanks for analyse in detail, we found some guy use /proc/PID/cpuset
> to get the path and cause a live issue.
Just remember that in cgroup v2, /proc/PID/cpuset may not give you the
actual cgroup the process is in. It just give you the cgroup where the
corresponding cpuset setting will apply to that "PID" process.
Cheers,
Longman
>
> Thanks.
>
> Waiman Long <longman@...hat.com> 于2024年2月10日周六 12:34写道:
>> On 2/2/24 05:49, brookxu.cn wrote:
>>> From: Chunguang Xu <chunguang.xu@...pee.com>
>>>
>>> Now we can get all cgroup paths from /proc/PID/cgroup for
>>> a long time, so it maybe useless to keep /proc/PID/cpuset,
>>> besides the path get from /proc/PID/cpuset is not consistent
>>> with /proc/PID/cgroup in default hierarchy, so now we may
>>> can safely remove /proc/PID/cpuset to avoid the mismatch.
>> With cgroup v2, the value of /proc/PID/cgroup may not match
>> /proc/PID/cpuset. That is the expected behavior due to the way cgroup v2
>> works. For example,
>>
>> Root [CS] --> A [CS] --> B --> C where [CS] means cpuset controller is
>> enabled at that cgroup.
>>
>> If a process P is in cgroup C, /proc/PID/cgroup will show /A/B/C.
>> However, /proc/PID/cpuset will show /A because it is where the cpuset
>> controller setting will apply to process P. Also removing an existing
>> procfs file may break an existing application that relies on it. So we
>> don't do that unless there is a strong reason to do so.
>>
>> NAK
>>
>>> root@...t:~# cat /proc/1186/cgroup
>>> 0::/system.slice/lmeter.service
>>>
>>> root@...t:~# cat /proc/1186/cpuset
>>> /system.slice
>>>
>>> Signed-off-by: Chunguang Xu <chunguang.xu@...pee.com>
>>> ---
>>> arch/mips/configs/sb1250_swarm_defconfig | 1 -
>>> arch/sh/configs/sdk7786_defconfig | 1 -
>>> arch/sh/configs/urquell_defconfig | 1 -
>>> fs/proc/base.c | 6 ----
>>> kernel/cgroup/cpuset.c | 40 ------------------------
>>> 5 files changed, 49 deletions(-)
>>>
>>> diff --git a/arch/mips/configs/sb1250_swarm_defconfig b/arch/mips/configs/sb1250_swarm_defconfig
>>> index ce855b644bb0..2fbea9c604df 100644
>>> --- a/arch/mips/configs/sb1250_swarm_defconfig
>>> +++ b/arch/mips/configs/sb1250_swarm_defconfig
>>> @@ -3,7 +3,6 @@ CONFIG_HIGH_RES_TIMERS=y
>>> CONFIG_LOG_BUF_SHIFT=15
>>> CONFIG_CGROUPS=y
>>> CONFIG_CPUSETS=y
>>> -# CONFIG_PROC_PID_CPUSET is not set
>>> CONFIG_CGROUP_CPUACCT=y
>>> CONFIG_NAMESPACES=y
>>> CONFIG_RELAY=y
>>> diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig
>>> index 7b427c17fbfe..6fe340d4860c 100644
>>> --- a/arch/sh/configs/sdk7786_defconfig
>>> +++ b/arch/sh/configs/sdk7786_defconfig
>>> @@ -13,7 +13,6 @@ CONFIG_CGROUP_DEBUG=y
>>> CONFIG_CGROUP_FREEZER=y
>>> CONFIG_CGROUP_DEVICE=y
>>> CONFIG_CPUSETS=y
>>> -# CONFIG_PROC_PID_CPUSET is not set
>>> CONFIG_CGROUP_CPUACCT=y
>>> CONFIG_CGROUP_MEMCG=y
>>> CONFIG_CGROUP_SCHED=y
>>> diff --git a/arch/sh/configs/urquell_defconfig b/arch/sh/configs/urquell_defconfig
>>> index 445bb451a5ec..c960221a0549 100644
>>> --- a/arch/sh/configs/urquell_defconfig
>>> +++ b/arch/sh/configs/urquell_defconfig
>>> @@ -11,7 +11,6 @@ CONFIG_CGROUP_DEBUG=y
>>> CONFIG_CGROUP_FREEZER=y
>>> CONFIG_CGROUP_DEVICE=y
>>> CONFIG_CPUSETS=y
>>> -# CONFIG_PROC_PID_CPUSET is not set
>>> CONFIG_CGROUP_CPUACCT=y
>>> CONFIG_CGROUP_MEMCG=y
>>> CONFIG_CGROUP_SCHED=y
>>> diff --git a/fs/proc/base.c b/fs/proc/base.c
>>> index 98a031ac2648..8dcd23b9212a 100644
>>> --- a/fs/proc/base.c
>>> +++ b/fs/proc/base.c
>>> @@ -3309,9 +3309,6 @@ static const struct pid_entry tgid_base_stuff[] = {
>>> #ifdef CONFIG_LATENCYTOP
>>> REG("latency", S_IRUGO, proc_lstats_operations),
>>> #endif
>>> -#ifdef CONFIG_PROC_PID_CPUSET
>>> - ONE("cpuset", S_IRUGO, proc_cpuset_show),
>>> -#endif
>>> #ifdef CONFIG_CGROUPS
>>> ONE("cgroup", S_IRUGO, proc_cgroup_show),
>>> #endif
>>> @@ -3658,9 +3655,6 @@ static const struct pid_entry tid_base_stuff[] = {
>>> #ifdef CONFIG_LATENCYTOP
>>> REG("latency", S_IRUGO, proc_lstats_operations),
>>> #endif
>>> -#ifdef CONFIG_PROC_PID_CPUSET
>>> - ONE("cpuset", S_IRUGO, proc_cpuset_show),
>>> -#endif
>>> #ifdef CONFIG_CGROUPS
>>> ONE("cgroup", S_IRUGO, proc_cgroup_show),
>>> #endif
>>> diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
>>> index ba36c073304a..908cd7e6efa8 100644
>>> --- a/kernel/cgroup/cpuset.c
>>> +++ b/kernel/cgroup/cpuset.c
>>> @@ -5066,46 +5066,6 @@ void __cpuset_memory_pressure_bump(void)
>>> rcu_read_unlock();
>>> }
>>>
>>> -#ifdef CONFIG_PROC_PID_CPUSET
>>> -/*
>>> - * proc_cpuset_show()
>>> - * - Print tasks cpuset path into seq_file.
>>> - * - Used for /proc/<pid>/cpuset.
>>> - * - No need to task_lock(tsk) on this tsk->cpuset reference, as it
>>> - * doesn't really matter if tsk->cpuset changes after we read it,
>>> - * and we take cpuset_mutex, keeping cpuset_attach() from changing it
>>> - * anyway.
>>> - */
>>> -int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
>>> - struct pid *pid, struct task_struct *tsk)
>>> -{
>>> - char *buf;
>>> - struct cgroup_subsys_state *css;
>>> - int retval;
>>> -
>>> - retval = -ENOMEM;
>>> - buf = kmalloc(PATH_MAX, GFP_KERNEL);
>>> - if (!buf)
>>> - goto out;
>>> -
>>> - css = task_get_css(tsk, cpuset_cgrp_id);
>>> - retval = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
>>> - current->nsproxy->cgroup_ns);
>>> - css_put(css);
>>> - if (retval == -E2BIG)
>>> - retval = -ENAMETOOLONG;
>>> - if (retval < 0)
>>> - goto out_free;
>>> - seq_puts(m, buf);
>>> - seq_putc(m, '\n');
>>> - retval = 0;
>>> -out_free:
>>> - kfree(buf);
>>> -out:
>>> - return retval;
>>> -}
>>> -#endif /* CONFIG_PROC_PID_CPUSET */
>>> -
>>> /* Display task mems_allowed in /proc/<pid>/status file. */
>>> void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
>>> {
Powered by blists - more mailing lists