[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <6812a73c-ace6-447a-9f92-4bc8783b3ed5@huaweicloud.com>
Date: Mon, 12 Jan 2026 10:29:26 +0800
From: Chen Ridong <chenridong@...weicloud.com>
To: Waiman Long <longman@...hat.com>, Tejun Heo <tj@...nel.org>,
Johannes Weiner <hannes@...xchg.org>, Michal Koutný
<mkoutny@...e.com>, Jonathan Corbet <corbet@....net>,
Shuah Khan <shuah@...nel.org>
Cc: linux-kernel@...r.kernel.org, cgroups@...r.kernel.org,
linux-kselftest@...r.kernel.org, linux-doc@...r.kernel.org,
Sun Shaojie <sunshaojie@...inos.cn>
Subject: Re: [PATCH cgroup/for-6.20 v3 5/5] cgroup/cpuset: Move the v1 empty
cpus/mems check to cpuset1_validate_change()
On 2026/1/10 9:32, Waiman Long wrote:
> As stated in commit 1c09b195d37f ("cpuset: fix a regression in validating
> config change"), it is not allowed to clear masks of a cpuset if
> there're tasks in it. This is specific to v1 since empty "cpuset.cpus"
> or "cpuset.mems" will cause the v2 cpuset to inherit the effective CPUs
> or memory nodes from its parent. So it is OK to have empty cpus or mems
> even if there are tasks in the cpuset.
>
> Move this empty cpus/mems check in validate_change() to
> cpuset1_validate_change() to allow more flexibility in setting
> cpus or mems in v2. cpuset_is_populated() needs to be moved into
> cpuset-internal.h as it is needed by the empty cpus/mems checking code.
>
> Also add a test case to test_cpuset_prs.sh to verify that.
>
> Reported-by: Chen Ridong <chenridong@...weicloud.com>
> Closes: https://lore.kernel.org/lkml/7a3ec392-2e86-4693-aa9f-1e668a668b9c@huaweicloud.com/
> Signed-off-by: Waiman Long <longman@...hat.com>
> ---
> kernel/cgroup/cpuset-internal.h | 9 ++++++++
> kernel/cgroup/cpuset-v1.c | 14 +++++++++++
> kernel/cgroup/cpuset.c | 23 -------------------
> .../selftests/cgroup/test_cpuset_prs.sh | 3 +++
> 4 files changed, 26 insertions(+), 23 deletions(-)
>
> diff --git a/kernel/cgroup/cpuset-internal.h b/kernel/cgroup/cpuset-internal.h
> index e8e2683cb067..fd7d19842ded 100644
> --- a/kernel/cgroup/cpuset-internal.h
> +++ b/kernel/cgroup/cpuset-internal.h
> @@ -260,6 +260,15 @@ static inline int nr_cpusets(void)
> return static_key_count(&cpusets_enabled_key.key) + 1;
> }
>
> +static inline bool cpuset_is_populated(struct cpuset *cs)
> +{
> + lockdep_assert_cpuset_lock_held();
> +
> + /* Cpusets in the process of attaching should be considered as populated */
> + return cgroup_is_populated(cs->css.cgroup) ||
> + cs->attach_in_progress;
> +}
> +
> /**
> * cpuset_for_each_child - traverse online children of a cpuset
> * @child_cs: loop cursor pointing to the current child
> diff --git a/kernel/cgroup/cpuset-v1.c b/kernel/cgroup/cpuset-v1.c
> index 04124c38a774..7a23b9e8778f 100644
> --- a/kernel/cgroup/cpuset-v1.c
> +++ b/kernel/cgroup/cpuset-v1.c
> @@ -368,6 +368,20 @@ int cpuset1_validate_change(struct cpuset *cur, struct cpuset *trial)
> if (par && !is_cpuset_subset(trial, par))
> goto out;
>
> + /*
> + * Cpusets with tasks - existing or newly being attached - can't
> + * be changed to have empty cpus_allowed or mems_allowed.
> + */
> + ret = -ENOSPC;
> + if (cpuset_is_populated(cur)) {
> + if (!cpumask_empty(cur->cpus_allowed) &&
> + cpumask_empty(trial->cpus_allowed))
> + goto out;
> + if (!nodes_empty(cur->mems_allowed) &&
> + nodes_empty(trial->mems_allowed))
> + goto out;
> + }
> +
> ret = 0;
> out:
> return ret;
The current implementation is sufficient.
However, I suggest we fully separate the validation logic for v1 and v2. While this may introduce
some code duplication (likely minimal), it would allow us to modify the validate_change logic for v2
in the future without needing to consider v1 compatibility. Given that v1 is unlikely to see further
changes, this separation would be a practical long-term decision.
@@ -368,6 +368,48 @@ int cpuset1_validate_change(struct cpuset *cur, struct cpuset *trial)
if (par && !is_cpuset_subset(trial, par))
goto out;
+ /*
+ * Cpusets with tasks - existing or newly being attached - can't
+ * be changed to have empty cpus_allowed or mems_allowed.
+ */
+ ret = -ENOSPC;
+ if (cpuset_is_populated(cur)) {
+ if (!cpumask_empty(cur->cpus_allowed) &&
+ cpumask_empty(trial->cpus_allowed))
+ goto out;
+ if (!nodes_empty(cur->mems_allowed) &&
+ nodes_empty(trial->mems_allowed))
+ goto out;
+ }
+
+ /*
+ * We can't shrink if we won't have enough room for SCHED_DEADLINE
+ * tasks. This check is not done when scheduling is disabled as the
+ * users should know what they are doing.
+ *
+ * For v1, effective_cpus == cpus_allowed & user_xcpus() returns
+ * cpus_allowed.
+ *
+ */
+ ret = -EBUSY;
+ if (is_cpu_exclusive(cur) && is_sched_load_balance(cur) &&
+ !cpuset_cpumask_can_shrink(cur->effective_cpus, user_xcpus(trial)))
+ goto out;
+
+ /*
+ * If either I or some sibling (!= me) is exclusive, we can't
+ * overlap. exclusive_cpus cannot overlap with each other if set.
+ */
+ ret = -EINVAL;
+ cpuset_for_each_child(c, css, par) {
+ if (c == cur)
+ continue;
+ if (cpuset1_cpus_excl_conflict(trial, c))
+ goto out;
+ if (mems_excl_conflict(trial, c))
+ goto out;
+ }
+
ret = 0;
out:
return ret;
> diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
> index 83fb83a86b4b..a3dbca125588 100644
> --- a/kernel/cgroup/cpuset.c
> +++ b/kernel/cgroup/cpuset.c
> @@ -370,15 +370,6 @@ static inline bool is_in_v2_mode(void)
> (cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE);
> }
>
> -static inline bool cpuset_is_populated(struct cpuset *cs)
> -{
> - lockdep_assert_held(&cpuset_mutex);
> -
> - /* Cpusets in the process of attaching should be considered as populated */
> - return cgroup_is_populated(cs->css.cgroup) ||
> - cs->attach_in_progress;
> -}
> -
> /**
> * partition_is_populated - check if partition has tasks
> * @cs: partition root to be checked
> @@ -695,20 +686,6 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
>
> par = parent_cs(cur);
>
> - /*
> - * Cpusets with tasks - existing or newly being attached - can't
> - * be changed to have empty cpus_allowed or mems_allowed.
> - */
> - ret = -ENOSPC;
> - if (cpuset_is_populated(cur)) {
> - if (!cpumask_empty(cur->cpus_allowed) &&
> - cpumask_empty(trial->cpus_allowed))
> - goto out;
> - if (!nodes_empty(cur->mems_allowed) &&
> - nodes_empty(trial->mems_allowed))
> - goto out;
> - }
> -
> /*
> * We can't shrink if we won't have enough room for SCHED_DEADLINE
> * tasks. This check is not done when scheduling is disabled as the
> diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
> index ff4540b0490e..5dff3ad53867 100755
> --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh
> +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
> @@ -425,6 +425,9 @@ TEST_MATRIX=(
> # cpuset.cpus can be set to a subset of sibling's cpuset.cpus.exclusive
> " C1-3:X1-3 . . C4-5 . . . C1-2 0 A1:1-3|B1:1-2"
>
> + # cpuset.cpus can become empty with task in it as it inherits parent's effective CPUs
> + " C1-3:S+ C2 . . . T:C . . 0 A1:1-3|A2:1-3"
> +
> # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
> # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
> # Failure cases:
--
Best regards,
Ridong
Powered by blists - more mailing lists