[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <b161acba-2e0b-4d00-9bf1-3930b307653d@redhat.com>
Date: Thu, 13 Nov 2025 09:14:05 -0500
From: Waiman Long <llong@...hat.com>
To: Chen Ridong <chenridong@...weicloud.com>, tj@...nel.org,
hannes@...xchg.org, mkoutny@...e.com
Cc: cgroups@...r.kernel.org, linux-kernel@...r.kernel.org,
lujialin4@...wei.com, chenridong@...wei.com
Subject: Re: [PATCH -next v2] cpuset: Treat cpusets in attaching as populated
On 11/13/25 8:28 AM, Chen Ridong wrote:
> From: Chen Ridong <chenridong@...wei.com>
>
> Currently, the check for whether a partition is populated does not
> account for tasks in the cpuset of attaching. This is a corner case
> that can leave a task stuck in a partition with no effective CPUs.
>
> The race condition occurs as follows:
>
> cpu0 cpu1
> //cpuset A with cpu N
> migrate task p to A
> cpuset_can_attach
> // with effective cpus
> // check ok
>
> // cpuset_mutex is not held // clear cpuset.cpus.exclusive
> // making effective cpus empty
> update_exclusive_cpumask
> // tasks_nocpu_error check ok
> // empty effective cpus, partition valid
> cpuset_attach
> ...
> // task p stays in A, with non-effective cpus.
>
> To fix this issue, this patch introduces cs_is_populated, which considers
> tasks in the attaching cpuset. This new helper is used in validate_change
> and partition_is_populated.
>
> Fixes: e2d59900d936 ("cgroup/cpuset: Allow no-task partition to have empty cpuset.cpus.effective")
> Signed-off-by: Chen Ridong <chenridong@...wei.com>
> ---
> kernel/cgroup/cpuset.c | 31 +++++++++++++++++++++++--------
> 1 file changed, 23 insertions(+), 8 deletions(-)
>
> diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
> index daf813386260..bd273b1e09b0 100644
> --- a/kernel/cgroup/cpuset.c
> +++ b/kernel/cgroup/cpuset.c
> @@ -356,6 +356,15 @@ static inline bool is_in_v2_mode(void)
> (cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE);
> }
>
> +static inline bool cs_is_populated(struct cpuset *cs)
Could you name it as "cpuset_is_populated()" as it is a cpuset specific
version of cgroup_is_populated()?
> +{
> + lockdep_assert_held(&cpuset_mutex);
> +
> + /* Cpusets in the process of attaching should be considered as populated */
> + return cgroup_is_populated(cs->css.cgroup) ||
> + cs->attach_in_progress;
> +}
> +
> /**
> * partition_is_populated - check if partition has tasks
> * @cs: partition root to be checked
> @@ -373,19 +382,25 @@ static inline bool is_in_v2_mode(void)
> static inline bool partition_is_populated(struct cpuset *cs,
> struct cpuset *excluded_child)
> {
> - struct cgroup_subsys_state *css;
> - struct cpuset *child;
> + struct cpuset *cp;
> + struct cgroup_subsys_state *pos_css;
>
> - if (cs->css.cgroup->nr_populated_csets)
> + /*
> + * We cannot call cs_is_populated(cs) directly, as
> + * nr_populated_domain_children may include populated
> + * csets from descendants that are partitions.
> + */
> + if (cs->css.cgroup->nr_populated_csets ||
> + cs->attach_in_progress)
> return true;
>
> rcu_read_lock();
> - cpuset_for_each_child(child, css, cs) {
> - if (child == excluded_child)
> + cpuset_for_each_descendant_pre(cp, pos_css, cs) {
> + if (cp == cs || cp == excluded_child)
> continue;
> - if (is_partition_valid(child))
> + if (is_partition_valid(cp))
You should add " pos_css = css_rightmost_descendant(pos_css);" to skip
the whole subtree.
Cheers,
Longman
> continue;
> - if (cgroup_is_populated(child->css.cgroup)) {
> + if (cs_is_populated(cp)) {
> rcu_read_unlock();
> return true;
> }
> @@ -670,7 +685,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
> * be changed to have empty cpus_allowed or mems_allowed.
> */
> ret = -ENOSPC;
> - if ((cgroup_is_populated(cur->css.cgroup) || cur->attach_in_progress)) {
> + if (cs_is_populated(cur)) {
> if (!cpumask_empty(cur->cpus_allowed) &&
> cpumask_empty(trial->cpus_allowed))
> goto out;
Powered by blists - more mailing lists