[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20221129111055.953833-3-peternewman@google.com>
Date: Tue, 29 Nov 2022 12:10:55 +0100
From: Peter Newman <peternewman@...gle.com>
To: reinette.chatre@...el.com, fenghua.yu@...el.com
Cc: bp@...en8.de, derkling@...gle.com, eranian@...gle.com,
hpa@...or.com, james.morse@....com, jannh@...gle.com,
kpsingh@...gle.com, linux-kernel@...r.kernel.org, mingo@...hat.com,
tglx@...utronix.de, x86@...nel.org,
Peter Newman <peternewman@...gle.com>
Subject: [PATCH v4 2/2] x86/resctrl: IPI all online CPUs for group updates
Removing a CTRL_MON or MON group directory moves all tasks to the parent
group. The rmdir implementation therefore interrupts any running
tasks which were in the deleted group to update their CLOSID/RMID to
those of the parent.
The rmdir operation iterates over all tasks in the deleted group while
read-locking the tasklist_lock to ensure that no newly-created child
tasks remain in the deleted group. Calling task_call_func() to perform
the updates on every task in the deleted group, similar to the recent
fix in __rdtgroup_move_task(), would result in a much longer
tasklist_lock critical section.
To avoid this, stop attempting to construct a precise mask of CPUs
hosting the moved tasks in rdt_move_group_tasks(). Its callers instead
perform the PQR_ASSOC MSR update on all online CPUs to ensure all
affected tasks are notified.
To measure the impact of the rdt_move_group_tasks() implementation
options, the following command was run in an rdtgroup to produce a
1600-task workload:
# mkdir /sys/fs/resctrl/test
# echo $$ > /sys/fs/resctrl/test/tasks
# perf bench sched messaging -g 40 -l 100000
Results collected using:
# perf stat rmdir /sys/fs/resctrl/test
CPU: Intel(R) Xeon(R) Platinum P-8136 CPU @ 2.00GHz (112 threads)
Calling task_call_func() on all tasks in the deleted group increased
task-clock time from 1.54 to 2.35 ms, while the IPI broadcast reduced
the time to 1.31 ms.
Restructuring resctrl groups is assumed to be a rare act of system-level
reconfiguration by the user, so the impact of additional IPIs resulting
from this change to a CPU-isolated workload is not a concern.
Signed-off-by: Peter Newman <peternewman@...gle.com>
Reviewed-by: James Morse <james.morse@....com>
---
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 52 +++++++-------------------
1 file changed, 13 insertions(+), 39 deletions(-)
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 59b7ffcd53bb..4a3c0b315484 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2401,12 +2401,10 @@ static int reset_all_ctrls(struct rdt_resource *r)
* Move tasks from one to the other group. If @from is NULL, then all tasks
* in the systems are moved unconditionally (used for teardown).
*
- * If @mask is not NULL the cpus on which moved tasks are running are set
- * in that mask so the update smp function call is restricted to affected
- * cpus.
+ * Following this operation, the caller should update PQR_ASSOC MSR and per-CPU
+ * storage on all online CPUs.
*/
-static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
- struct cpumask *mask)
+static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to)
{
struct task_struct *p, *t;
@@ -2416,16 +2414,6 @@ static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
is_rmid_match(t, from)) {
WRITE_ONCE(t->closid, to->closid);
WRITE_ONCE(t->rmid, to->mon.rmid);
-
- /*
- * If the task is on a CPU, set the CPU in the mask.
- * The detection is inaccurate as tasks might move or
- * schedule before the smp function call takes place.
- * In such a case the function call is pointless, but
- * there is no other side effect.
- */
- if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t))
- cpumask_set_cpu(task_cpu(t), mask);
}
}
read_unlock(&tasklist_lock);
@@ -2456,7 +2444,7 @@ static void rmdir_all_sub(void)
struct rdtgroup *rdtgrp, *tmp;
/* Move all tasks to the default resource group */
- rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
+ rdt_move_group_tasks(NULL, &rdtgroup_default);
list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
/* Free any child rmids */
@@ -3115,23 +3103,19 @@ static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
return -EPERM;
}
-static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
+static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp)
{
struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
int cpu;
/* Give any tasks back to the parent group */
- rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
+ rdt_move_group_tasks(rdtgrp, prdtgrp);
/* Update per cpu rmid of the moved CPUs first */
for_each_cpu(cpu, &rdtgrp->cpu_mask)
per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid;
- /*
- * Update the MSR on moved CPUs and CPUs which have moved
- * task running on them.
- */
- cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
- update_closid_rmid(tmpmask, NULL);
+
+ update_closid_rmid(cpu_online_mask, NULL);
rdtgrp->flags = RDT_DELETED;
free_rmid(rdtgrp->mon.rmid);
@@ -3156,12 +3140,12 @@ static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp)
return 0;
}
-static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
+static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp)
{
int cpu;
/* Give any tasks back to the default group */
- rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
+ rdt_move_group_tasks(rdtgrp, &rdtgroup_default);
/* Give any CPUs back to the default group */
cpumask_or(&rdtgroup_default.cpu_mask,
@@ -3173,12 +3157,7 @@ static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid;
}
- /*
- * Update the MSR on moved CPUs and CPUs which have moved
- * task running on them.
- */
- cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
- update_closid_rmid(tmpmask, NULL);
+ update_closid_rmid(cpu_online_mask, NULL);
closid_free(rdtgrp->closid);
free_rmid(rdtgrp->mon.rmid);
@@ -3197,12 +3176,8 @@ static int rdtgroup_rmdir(struct kernfs_node *kn)
{
struct kernfs_node *parent_kn = kn->parent;
struct rdtgroup *rdtgrp;
- cpumask_var_t tmpmask;
int ret = 0;
- if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
- return -ENOMEM;
-
rdtgrp = rdtgroup_kn_lock_live(kn);
if (!rdtgrp) {
ret = -EPERM;
@@ -3222,18 +3197,17 @@ static int rdtgroup_rmdir(struct kernfs_node *kn)
rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
ret = rdtgroup_ctrl_remove(rdtgrp);
} else {
- ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask);
+ ret = rdtgroup_rmdir_ctrl(rdtgrp);
}
} else if (rdtgrp->type == RDTMON_GROUP &&
is_mon_groups(parent_kn, kn->name)) {
- ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask);
+ ret = rdtgroup_rmdir_mon(rdtgrp);
} else {
ret = -EPERM;
}
out:
rdtgroup_kn_unlock(kn);
- free_cpumask_var(tmpmask);
return ret;
}
--
2.38.1.584.g0f3c55d4c2-goog
Powered by blists - more mailing lists