[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230728164254.27562-22-james.morse@arm.com>
Date: Fri, 28 Jul 2023 16:42:51 +0000
From: James Morse <james.morse@....com>
To: x86@...nel.org, linux-kernel@...r.kernel.org
Cc: Fenghua Yu <fenghua.yu@...el.com>,
Reinette Chatre <reinette.chatre@...el.com>,
Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
H Peter Anvin <hpa@...or.com>,
Babu Moger <Babu.Moger@....com>,
James Morse <james.morse@....com>,
shameerali.kolothum.thodi@...wei.com,
D Scott Phillips OS <scott@...amperecomputing.com>,
carl@...amperecomputing.com, lcherian@...vell.com,
bobo.shaobowang@...wei.com, tan.shaopeng@...itsu.com,
xingxin.hx@...nanolis.org, baolin.wang@...ux.alibaba.com,
Jamie Iles <quic_jiles@...cinc.com>,
Xin Hao <xhao@...ux.alibaba.com>, peternewman@...gle.com,
dfustini@...libre.com
Subject: [PATCH v5 21/24] x86/resctrl: Allow overflow/limbo handlers to be scheduled on any-but cpu
When a CPU is taken offline resctrl may need to move the overflow or
limbo handlers to run on a different CPU.
Once the offline callbacks have been split, cqm_setup_limbo_handler()
will be called while the CPU that is going offline is still present
in the cpu_mask.
Pass the CPU to exclude to cqm_setup_limbo_handler() and
mbm_setup_overflow_handler(). These functions can use a variant of
cpumask_any_but() when selecting the CPU. -1 is used to indicate no CPUs
need excluding.
A subsequent patch moves these calls to be before CPUs have been removed,
so this exclude_cpus behaviour is temporary.
Tested-by: Shaopeng Tan <tan.shaopeng@...itsu.com>
Signed-off-by: James Morse <james.morse@....com>
---
Changes since v2:
* Rephrased a comment to avoid a two letter bad-word. (we)
* Avoid assigning mbm_work_cpu if the domain is going to be free()d
* Added cpumask_any_housekeeping_but(), I dislike the name
Changes since v3:
* Marked an explanatory comment as temporary as the subsequent patch is
no longer adjacent.
Changes since v4:
* Check against RESCTRL_PICK_ANY_CPU instead of -1.
* Leave cqm_work_cpu as nr_cpu_ids when no CPU is available.
* Made cpumask_any_housekeeping_but() more readable.
---
arch/x86/kernel/cpu/resctrl/core.c | 8 +++--
arch/x86/kernel/cpu/resctrl/internal.h | 36 ++++++++++++++++++++--
arch/x86/kernel/cpu/resctrl/monitor.c | 42 +++++++++++++++++++++-----
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 6 ++--
include/linux/resctrl.h | 2 ++
5 files changed, 81 insertions(+), 13 deletions(-)
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index a694563d3929..d39572a0a3cd 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -582,12 +582,16 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) {
if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
cancel_delayed_work(&d->mbm_over);
- mbm_setup_overflow_handler(d, 0);
+ /*
+ * temporary: exclude_cpu=-1 as this CPU has already
+ * been removed by cpumask_clear_cpu()d
+ */
+ mbm_setup_overflow_handler(d, 0, RESCTRL_PICK_ANY_CPU);
}
if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
has_busy_rmid(d)) {
cancel_delayed_work(&d->cqm_limbo);
- cqm_setup_limbo_handler(d, 0);
+ cqm_setup_limbo_handler(d, 0, RESCTRL_PICK_ANY_CPU);
}
}
}
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index f99e0a1f39c8..655418c23c0e 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -79,6 +79,36 @@ static inline unsigned int cpumask_any_housekeeping(const struct cpumask *mask)
return cpu;
}
+/**
+ * cpumask_any_housekeeping_but() - Chose any cpu in @mask, preferring those
+ * that aren't marked nohz_full, excluding
+ * the provided CPU
+ * @mask: The mask to pick a CPU from.
+ * @exclude_cpu:The CPU to avoid picking.
+ *
+ * Returns a CPU from @mask, but not @exclude_cpus. If there are housekeeping
+ * CPUs that don't use nohz_full, these are preferred.
+ * Returns >= nr_cpu_ids if no CPUs are available.
+ */
+static inline unsigned int
+cpumask_any_housekeeping_but(const struct cpumask *mask, int exclude_cpu)
+{
+ unsigned int cpu, hk_cpu;
+
+ cpu = cpumask_any_but(mask, exclude_cpu);
+ if (!tick_nohz_full_cpu(cpu))
+ return cpu;
+
+ hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
+ if (hk_cpu == exclude_cpu)
+ hk_cpu = cpumask_nth_andnot(1, mask, tick_nohz_full_mask);
+
+ if (hk_cpu < nr_cpu_ids)
+ cpu = hk_cpu;
+
+ return cpu;
+}
+
struct rdt_fs_context {
struct kernfs_fs_context kfc;
bool enable_cdpl2;
@@ -564,11 +594,13 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
struct rdt_domain *d, struct rdtgroup *rdtgrp,
int evtid, int first);
void mbm_setup_overflow_handler(struct rdt_domain *dom,
- unsigned long delay_ms);
+ unsigned long delay_ms,
+ int exclude_cpu);
void mbm_handle_overflow(struct work_struct *work);
void __init intel_rdt_mbm_apply_quirk(void);
bool is_mba_sc(struct rdt_resource *r);
-void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
+void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
+ int exclude_cpu);
void cqm_handle_limbo(struct work_struct *work);
bool has_busy_rmid(struct rdt_domain *d);
void __check_limbo(struct rdt_domain *d, bool force_free);
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index c0b1ad8d8f6d..471cdc4e4eae 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -493,7 +493,8 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
* setup up the limbo worker.
*/
if (!has_busy_rmid(d))
- cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
+ cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL,
+ RESCTRL_PICK_ANY_CPU);
set_bit(idx, d->rmid_busy_llc);
entry->busy++;
}
@@ -816,15 +817,28 @@ void cqm_handle_limbo(struct work_struct *work)
mutex_unlock(&rdtgroup_mutex);
}
-void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
+/**
+ * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this
+ * domain.
+ * @delay_ms: How far in the future the handler should run.
+ * @exclude_cpu: Which CPU the handler should not run on,
+ * RESCTRL_PICK_ANY_CPU to pick any CPU.
+ */
+void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
+ int exclude_cpu)
{
unsigned long delay = msecs_to_jiffies(delay_ms);
int cpu;
- cpu = cpumask_any_housekeeping(&dom->cpu_mask);
+ if (exclude_cpu == RESCTRL_PICK_ANY_CPU)
+ cpu = cpumask_any_housekeeping(&dom->cpu_mask);
+ else
+ cpu = cpumask_any_housekeeping_but(&dom->cpu_mask,
+ exclude_cpu);
dom->cqm_work_cpu = cpu;
- schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
+ if (cpu < nr_cpu_ids)
+ schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
}
void mbm_handle_overflow(struct work_struct *work)
@@ -870,7 +884,15 @@ void mbm_handle_overflow(struct work_struct *work)
mutex_unlock(&rdtgroup_mutex);
}
-void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
+/**
+ * mbm_setup_overflow_handler() - Schedule the overflow handler to run for this
+ * domain.
+ * @delay_ms: How far in the future the handler should run.
+ * @exclude_cpu: Which CPU the handler should not run on,
+ * RESCTRL_PICK_ANY_CPU to pick any CPU.
+ */
+void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms,
+ int exclude_cpu)
{
unsigned long delay = msecs_to_jiffies(delay_ms);
int cpu;
@@ -881,9 +903,15 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
*/
if (!resctrl_mounted || !resctrl_arch_mon_capable())
return;
- cpu = cpumask_any_housekeeping(&dom->cpu_mask);
+ if (exclude_cpu == RESCTRL_PICK_ANY_CPU)
+ cpu = cpumask_any_housekeeping(&dom->cpu_mask);
+ else
+ cpu = cpumask_any_housekeeping_but(&dom->cpu_mask,
+ exclude_cpu);
dom->mbm_work_cpu = cpu;
- schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
+
+ if (cpu < nr_cpu_ids)
+ schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
}
static int dom_data_init(struct rdt_resource *r)
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 7bd3a3dc0f44..dac7ed7ac71a 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2552,7 +2552,8 @@ static int rdt_get_tree(struct fs_context *fc)
if (is_mbm_enabled()) {
r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
list_for_each_entry(dom, &r->domains, list)
- mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL);
+ mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
+ RESCTRL_PICK_ANY_CPU);
}
goto out;
@@ -3850,7 +3851,8 @@ int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d)
if (is_mbm_enabled()) {
INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
- mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL);
+ mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL,
+ RESCTRL_PICK_ANY_CPU);
}
if (is_llc_occupancy_enabled())
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 35d3c97df212..56b4645940a7 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -10,6 +10,8 @@
#define RESCTRL_RESERVED_CLOSID 0
#define RESCTRL_RESERVED_RMID 0
+#define RESCTRL_PICK_ANY_CPU -1
+
#ifdef CONFIG_PROC_CPU_RESCTRL
int proc_resctrl_show(struct seq_file *m,
--
2.39.2
Powered by blists - more mailing lists